hugetlb: introduce generic version of hugetlb_free_pgd_range
[linux/fpc-iii.git] / arch / x86 / kernel / process_64.c
blob31b4755369f084575f6b3a0ec30b340392106f70
1 /*
2 * Copyright (C) 1995 Linus Torvalds
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
7 * X86-64 port
8 * Andi Kleen.
10 * CPU hotplug support - ashok.raj@intel.com
14 * This file handles the architecture-dependent parts of process handling..
17 #include <linux/cpu.h>
18 #include <linux/errno.h>
19 #include <linux/sched.h>
20 #include <linux/sched/task.h>
21 #include <linux/sched/task_stack.h>
22 #include <linux/fs.h>
23 #include <linux/kernel.h>
24 #include <linux/mm.h>
25 #include <linux/elfcore.h>
26 #include <linux/smp.h>
27 #include <linux/slab.h>
28 #include <linux/user.h>
29 #include <linux/interrupt.h>
30 #include <linux/delay.h>
31 #include <linux/export.h>
32 #include <linux/ptrace.h>
33 #include <linux/notifier.h>
34 #include <linux/kprobes.h>
35 #include <linux/kdebug.h>
36 #include <linux/prctl.h>
37 #include <linux/uaccess.h>
38 #include <linux/io.h>
39 #include <linux/ftrace.h>
40 #include <linux/syscalls.h>
42 #include <asm/pgtable.h>
43 #include <asm/processor.h>
44 #include <asm/fpu/internal.h>
45 #include <asm/mmu_context.h>
46 #include <asm/prctl.h>
47 #include <asm/desc.h>
48 #include <asm/proto.h>
49 #include <asm/ia32.h>
50 #include <asm/syscalls.h>
51 #include <asm/debugreg.h>
52 #include <asm/switch_to.h>
53 #include <asm/xen/hypervisor.h>
54 #include <asm/vdso.h>
55 #include <asm/intel_rdt_sched.h>
56 #include <asm/unistd.h>
57 #include <asm/fsgsbase.h>
58 #ifdef CONFIG_IA32_EMULATION
59 /* Not included via unistd.h */
60 #include <asm/unistd_32_ia32.h>
61 #endif
63 /* Prints also some state that isn't saved in the pt_regs */
64 void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
66 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
67 unsigned long d0, d1, d2, d3, d6, d7;
68 unsigned int fsindex, gsindex;
69 unsigned int ds, cs, es;
71 show_iret_regs(regs);
73 if (regs->orig_ax != -1)
74 pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
75 else
76 pr_cont("\n");
78 printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
79 regs->ax, regs->bx, regs->cx);
80 printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
81 regs->dx, regs->si, regs->di);
82 printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
83 regs->bp, regs->r8, regs->r9);
84 printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
85 regs->r10, regs->r11, regs->r12);
86 printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
87 regs->r13, regs->r14, regs->r15);
89 if (mode == SHOW_REGS_SHORT)
90 return;
92 if (mode == SHOW_REGS_USER) {
93 rdmsrl(MSR_FS_BASE, fs);
94 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
95 printk(KERN_DEFAULT "FS: %016lx GS: %016lx\n",
96 fs, shadowgs);
97 return;
100 asm("movl %%ds,%0" : "=r" (ds));
101 asm("movl %%cs,%0" : "=r" (cs));
102 asm("movl %%es,%0" : "=r" (es));
103 asm("movl %%fs,%0" : "=r" (fsindex));
104 asm("movl %%gs,%0" : "=r" (gsindex));
106 rdmsrl(MSR_FS_BASE, fs);
107 rdmsrl(MSR_GS_BASE, gs);
108 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
110 cr0 = read_cr0();
111 cr2 = read_cr2();
112 cr3 = __read_cr3();
113 cr4 = __read_cr4();
115 printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
116 fs, fsindex, gs, gsindex, shadowgs);
117 printk(KERN_DEFAULT "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
118 es, cr0);
119 printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
120 cr4);
122 get_debugreg(d0, 0);
123 get_debugreg(d1, 1);
124 get_debugreg(d2, 2);
125 get_debugreg(d3, 3);
126 get_debugreg(d6, 6);
127 get_debugreg(d7, 7);
129 /* Only print out debug registers if they are in their non-default state. */
130 if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
131 (d6 == DR6_RESERVED) && (d7 == 0x400))) {
132 printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n",
133 d0, d1, d2);
134 printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n",
135 d3, d6, d7);
138 if (boot_cpu_has(X86_FEATURE_OSPKE))
139 printk(KERN_DEFAULT "PKRU: %08x\n", read_pkru());
142 void release_thread(struct task_struct *dead_task)
144 if (dead_task->mm) {
145 #ifdef CONFIG_MODIFY_LDT_SYSCALL
146 if (dead_task->mm->context.ldt) {
147 pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
148 dead_task->comm,
149 dead_task->mm->context.ldt->entries,
150 dead_task->mm->context.ldt->nr_entries);
151 BUG();
153 #endif
157 enum which_selector {
163 * Saves the FS or GS base for an outgoing thread if FSGSBASE extensions are
164 * not available. The goal is to be reasonably fast on non-FSGSBASE systems.
165 * It's forcibly inlined because it'll generate better code and this function
166 * is hot.
168 static __always_inline void save_base_legacy(struct task_struct *prev_p,
169 unsigned short selector,
170 enum which_selector which)
172 if (likely(selector == 0)) {
174 * On Intel (without X86_BUG_NULL_SEG), the segment base could
175 * be the pre-existing saved base or it could be zero. On AMD
176 * (with X86_BUG_NULL_SEG), the segment base could be almost
177 * anything.
179 * This branch is very hot (it's hit twice on almost every
180 * context switch between 64-bit programs), and avoiding
181 * the RDMSR helps a lot, so we just assume that whatever
182 * value is already saved is correct. This matches historical
183 * Linux behavior, so it won't break existing applications.
185 * To avoid leaking state, on non-X86_BUG_NULL_SEG CPUs, if we
186 * report that the base is zero, it needs to actually be zero:
187 * see the corresponding logic in load_seg_legacy.
189 } else {
191 * If the selector is 1, 2, or 3, then the base is zero on
192 * !X86_BUG_NULL_SEG CPUs and could be anything on
193 * X86_BUG_NULL_SEG CPUs. In the latter case, Linux
194 * has never attempted to preserve the base across context
195 * switches.
197 * If selector > 3, then it refers to a real segment, and
198 * saving the base isn't necessary.
200 if (which == FS)
201 prev_p->thread.fsbase = 0;
202 else
203 prev_p->thread.gsbase = 0;
207 static __always_inline void save_fsgs(struct task_struct *task)
209 savesegment(fs, task->thread.fsindex);
210 savesegment(gs, task->thread.gsindex);
211 save_base_legacy(task, task->thread.fsindex, FS);
212 save_base_legacy(task, task->thread.gsindex, GS);
215 #if IS_ENABLED(CONFIG_KVM)
217 * While a process is running,current->thread.fsbase and current->thread.gsbase
218 * may not match the corresponding CPU registers (see save_base_legacy()). KVM
219 * wants an efficient way to save and restore FSBASE and GSBASE.
220 * When FSGSBASE extensions are enabled, this will have to use RD{FS,GS}BASE.
222 void save_fsgs_for_kvm(void)
224 save_fsgs(current);
226 EXPORT_SYMBOL_GPL(save_fsgs_for_kvm);
227 #endif
229 static __always_inline void loadseg(enum which_selector which,
230 unsigned short sel)
232 if (which == FS)
233 loadsegment(fs, sel);
234 else
235 load_gs_index(sel);
238 static __always_inline void load_seg_legacy(unsigned short prev_index,
239 unsigned long prev_base,
240 unsigned short next_index,
241 unsigned long next_base,
242 enum which_selector which)
244 if (likely(next_index <= 3)) {
246 * The next task is using 64-bit TLS, is not using this
247 * segment at all, or is having fun with arcane CPU features.
249 if (next_base == 0) {
251 * Nasty case: on AMD CPUs, we need to forcibly zero
252 * the base.
254 if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
255 loadseg(which, __USER_DS);
256 loadseg(which, next_index);
257 } else {
259 * We could try to exhaustively detect cases
260 * under which we can skip the segment load,
261 * but there's really only one case that matters
262 * for performance: if both the previous and
263 * next states are fully zeroed, we can skip
264 * the load.
266 * (This assumes that prev_base == 0 has no
267 * false positives. This is the case on
268 * Intel-style CPUs.)
270 if (likely(prev_index | next_index | prev_base))
271 loadseg(which, next_index);
273 } else {
274 if (prev_index != next_index)
275 loadseg(which, next_index);
276 wrmsrl(which == FS ? MSR_FS_BASE : MSR_KERNEL_GS_BASE,
277 next_base);
279 } else {
281 * The next task is using a real segment. Loading the selector
282 * is sufficient.
284 loadseg(which, next_index);
288 static __always_inline void x86_fsgsbase_load(struct thread_struct *prev,
289 struct thread_struct *next)
291 load_seg_legacy(prev->fsindex, prev->fsbase,
292 next->fsindex, next->fsbase, FS);
293 load_seg_legacy(prev->gsindex, prev->gsbase,
294 next->gsindex, next->gsbase, GS);
297 static unsigned long x86_fsgsbase_read_task(struct task_struct *task,
298 unsigned short selector)
300 unsigned short idx = selector >> 3;
301 unsigned long base;
303 if (likely((selector & SEGMENT_TI_MASK) == 0)) {
304 if (unlikely(idx >= GDT_ENTRIES))
305 return 0;
308 * There are no user segments in the GDT with nonzero bases
309 * other than the TLS segments.
311 if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
312 return 0;
314 idx -= GDT_ENTRY_TLS_MIN;
315 base = get_desc_base(&task->thread.tls_array[idx]);
316 } else {
317 #ifdef CONFIG_MODIFY_LDT_SYSCALL
318 struct ldt_struct *ldt;
321 * If performance here mattered, we could protect the LDT
322 * with RCU. This is a slow path, though, so we can just
323 * take the mutex.
325 mutex_lock(&task->mm->context.lock);
326 ldt = task->mm->context.ldt;
327 if (unlikely(idx >= ldt->nr_entries))
328 base = 0;
329 else
330 base = get_desc_base(ldt->entries + idx);
331 mutex_unlock(&task->mm->context.lock);
332 #else
333 base = 0;
334 #endif
337 return base;
340 void x86_fsbase_write_cpu(unsigned long fsbase)
343 * Set the selector to 0 as a notion, that the segment base is
344 * overwritten, which will be checked for skipping the segment load
345 * during context switch.
347 loadseg(FS, 0);
348 wrmsrl(MSR_FS_BASE, fsbase);
351 void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
353 /* Set the selector to 0 for the same reason as %fs above. */
354 loadseg(GS, 0);
355 wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
358 unsigned long x86_fsbase_read_task(struct task_struct *task)
360 unsigned long fsbase;
362 if (task == current)
363 fsbase = x86_fsbase_read_cpu();
364 else if (task->thread.fsindex == 0)
365 fsbase = task->thread.fsbase;
366 else
367 fsbase = x86_fsgsbase_read_task(task, task->thread.fsindex);
369 return fsbase;
372 unsigned long x86_gsbase_read_task(struct task_struct *task)
374 unsigned long gsbase;
376 if (task == current)
377 gsbase = x86_gsbase_read_cpu_inactive();
378 else if (task->thread.gsindex == 0)
379 gsbase = task->thread.gsbase;
380 else
381 gsbase = x86_fsgsbase_read_task(task, task->thread.gsindex);
383 return gsbase;
386 int x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase)
389 * Not strictly needed for %fs, but do it for symmetry
390 * with %gs
392 if (unlikely(fsbase >= TASK_SIZE_MAX))
393 return -EPERM;
395 preempt_disable();
396 task->thread.fsbase = fsbase;
397 if (task == current)
398 x86_fsbase_write_cpu(fsbase);
399 task->thread.fsindex = 0;
400 preempt_enable();
402 return 0;
405 int x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase)
407 if (unlikely(gsbase >= TASK_SIZE_MAX))
408 return -EPERM;
410 preempt_disable();
411 task->thread.gsbase = gsbase;
412 if (task == current)
413 x86_gsbase_write_cpu_inactive(gsbase);
414 task->thread.gsindex = 0;
415 preempt_enable();
417 return 0;
420 int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
421 unsigned long arg, struct task_struct *p, unsigned long tls)
423 int err;
424 struct pt_regs *childregs;
425 struct fork_frame *fork_frame;
426 struct inactive_task_frame *frame;
427 struct task_struct *me = current;
429 childregs = task_pt_regs(p);
430 fork_frame = container_of(childregs, struct fork_frame, regs);
431 frame = &fork_frame->frame;
432 frame->bp = 0;
433 frame->ret_addr = (unsigned long) ret_from_fork;
434 p->thread.sp = (unsigned long) fork_frame;
435 p->thread.io_bitmap_ptr = NULL;
437 savesegment(gs, p->thread.gsindex);
438 p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase;
439 savesegment(fs, p->thread.fsindex);
440 p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase;
441 savesegment(es, p->thread.es);
442 savesegment(ds, p->thread.ds);
443 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
445 if (unlikely(p->flags & PF_KTHREAD)) {
446 /* kernel thread */
447 memset(childregs, 0, sizeof(struct pt_regs));
448 frame->bx = sp; /* function */
449 frame->r12 = arg;
450 return 0;
452 frame->bx = 0;
453 *childregs = *current_pt_regs();
455 childregs->ax = 0;
456 if (sp)
457 childregs->sp = sp;
459 err = -ENOMEM;
460 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
461 p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
462 IO_BITMAP_BYTES, GFP_KERNEL);
463 if (!p->thread.io_bitmap_ptr) {
464 p->thread.io_bitmap_max = 0;
465 return -ENOMEM;
467 set_tsk_thread_flag(p, TIF_IO_BITMAP);
471 * Set a new TLS for the child thread?
473 if (clone_flags & CLONE_SETTLS) {
474 #ifdef CONFIG_IA32_EMULATION
475 if (in_ia32_syscall())
476 err = do_set_thread_area(p, -1,
477 (struct user_desc __user *)tls, 0);
478 else
479 #endif
480 err = do_arch_prctl_64(p, ARCH_SET_FS, tls);
481 if (err)
482 goto out;
484 err = 0;
485 out:
486 if (err && p->thread.io_bitmap_ptr) {
487 kfree(p->thread.io_bitmap_ptr);
488 p->thread.io_bitmap_max = 0;
491 return err;
494 static void
495 start_thread_common(struct pt_regs *regs, unsigned long new_ip,
496 unsigned long new_sp,
497 unsigned int _cs, unsigned int _ss, unsigned int _ds)
499 WARN_ON_ONCE(regs != current_pt_regs());
501 if (static_cpu_has(X86_BUG_NULL_SEG)) {
502 /* Loading zero below won't clear the base. */
503 loadsegment(fs, __USER_DS);
504 load_gs_index(__USER_DS);
507 loadsegment(fs, 0);
508 loadsegment(es, _ds);
509 loadsegment(ds, _ds);
510 load_gs_index(0);
512 regs->ip = new_ip;
513 regs->sp = new_sp;
514 regs->cs = _cs;
515 regs->ss = _ss;
516 regs->flags = X86_EFLAGS_IF;
517 force_iret();
520 void
521 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
523 start_thread_common(regs, new_ip, new_sp,
524 __USER_CS, __USER_DS, 0);
526 EXPORT_SYMBOL_GPL(start_thread);
528 #ifdef CONFIG_COMPAT
529 void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp)
531 start_thread_common(regs, new_ip, new_sp,
532 test_thread_flag(TIF_X32)
533 ? __USER_CS : __USER32_CS,
534 __USER_DS, __USER_DS);
536 #endif
539 * switch_to(x,y) should switch tasks from x to y.
541 * This could still be optimized:
542 * - fold all the options into a flag word and test it with a single test.
543 * - could test fs/gs bitsliced
545 * Kprobes not supported here. Set the probe on schedule instead.
546 * Function graph tracer not supported too.
548 __visible __notrace_funcgraph struct task_struct *
549 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
551 struct thread_struct *prev = &prev_p->thread;
552 struct thread_struct *next = &next_p->thread;
553 struct fpu *prev_fpu = &prev->fpu;
554 struct fpu *next_fpu = &next->fpu;
555 int cpu = smp_processor_id();
556 struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
558 WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
559 this_cpu_read(irq_count) != -1);
561 switch_fpu_prepare(prev_fpu, cpu);
563 /* We must save %fs and %gs before load_TLS() because
564 * %fs and %gs may be cleared by load_TLS().
566 * (e.g. xen_load_tls())
568 save_fsgs(prev_p);
571 * Load TLS before restoring any segments so that segment loads
572 * reference the correct GDT entries.
574 load_TLS(next, cpu);
577 * Leave lazy mode, flushing any hypercalls made here. This
578 * must be done after loading TLS entries in the GDT but before
579 * loading segments that might reference them, and and it must
580 * be done before fpu__restore(), so the TS bit is up to
581 * date.
583 arch_end_context_switch(next_p);
585 /* Switch DS and ES.
587 * Reading them only returns the selectors, but writing them (if
588 * nonzero) loads the full descriptor from the GDT or LDT. The
589 * LDT for next is loaded in switch_mm, and the GDT is loaded
590 * above.
592 * We therefore need to write new values to the segment
593 * registers on every context switch unless both the new and old
594 * values are zero.
596 * Note that we don't need to do anything for CS and SS, as
597 * those are saved and restored as part of pt_regs.
599 savesegment(es, prev->es);
600 if (unlikely(next->es | prev->es))
601 loadsegment(es, next->es);
603 savesegment(ds, prev->ds);
604 if (unlikely(next->ds | prev->ds))
605 loadsegment(ds, next->ds);
607 x86_fsgsbase_load(prev, next);
609 switch_fpu_finish(next_fpu, cpu);
612 * Switch the PDA and FPU contexts.
614 this_cpu_write(current_task, next_p);
615 this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
617 /* Reload sp0. */
618 update_task_stack(next_p);
621 * Now maybe reload the debug registers and handle I/O bitmaps
623 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
624 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
625 __switch_to_xtra(prev_p, next_p, tss);
627 #ifdef CONFIG_XEN_PV
629 * On Xen PV, IOPL bits in pt_regs->flags have no effect, and
630 * current_pt_regs()->flags may not match the current task's
631 * intended IOPL. We need to switch it manually.
633 if (unlikely(static_cpu_has(X86_FEATURE_XENPV) &&
634 prev->iopl != next->iopl))
635 xen_set_iopl_mask(next->iopl);
636 #endif
638 if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
640 * AMD CPUs have a misfeature: SYSRET sets the SS selector but
641 * does not update the cached descriptor. As a result, if we
642 * do SYSRET while SS is NULL, we'll end up in user mode with
643 * SS apparently equal to __USER_DS but actually unusable.
645 * The straightforward workaround would be to fix it up just
646 * before SYSRET, but that would slow down the system call
647 * fast paths. Instead, we ensure that SS is never NULL in
648 * system call context. We do this by replacing NULL SS
649 * selectors at every context switch. SYSCALL sets up a valid
650 * SS, so the only way to get NULL is to re-enter the kernel
651 * from CPL 3 through an interrupt. Since that can't happen
652 * in the same task as a running syscall, we are guaranteed to
653 * context switch between every interrupt vector entry and a
654 * subsequent SYSRET.
656 * We read SS first because SS reads are much faster than
657 * writes. Out of caution, we force SS to __KERNEL_DS even if
658 * it previously had a different non-NULL value.
660 unsigned short ss_sel;
661 savesegment(ss, ss_sel);
662 if (ss_sel != __KERNEL_DS)
663 loadsegment(ss, __KERNEL_DS);
666 /* Load the Intel cache allocation PQR MSR. */
667 intel_rdt_sched_in();
669 return prev_p;
672 void set_personality_64bit(void)
674 /* inherit personality from parent */
676 /* Make sure to be in 64bit mode */
677 clear_thread_flag(TIF_IA32);
678 clear_thread_flag(TIF_ADDR32);
679 clear_thread_flag(TIF_X32);
680 /* Pretend that this comes from a 64bit execve */
681 task_pt_regs(current)->orig_ax = __NR_execve;
682 current_thread_info()->status &= ~TS_COMPAT;
684 /* Ensure the corresponding mm is not marked. */
685 if (current->mm)
686 current->mm->context.ia32_compat = 0;
688 /* TBD: overwrites user setup. Should have two bits.
689 But 64bit processes have always behaved this way,
690 so it's not too bad. The main problem is just that
691 32bit childs are affected again. */
692 current->personality &= ~READ_IMPLIES_EXEC;
695 static void __set_personality_x32(void)
697 #ifdef CONFIG_X86_X32
698 clear_thread_flag(TIF_IA32);
699 set_thread_flag(TIF_X32);
700 if (current->mm)
701 current->mm->context.ia32_compat = TIF_X32;
702 current->personality &= ~READ_IMPLIES_EXEC;
704 * in_compat_syscall() uses the presence of the x32 syscall bit
705 * flag to determine compat status. The x86 mmap() code relies on
706 * the syscall bitness so set x32 syscall bit right here to make
707 * in_compat_syscall() work during exec().
709 * Pretend to come from a x32 execve.
711 task_pt_regs(current)->orig_ax = __NR_x32_execve | __X32_SYSCALL_BIT;
712 current_thread_info()->status &= ~TS_COMPAT;
713 #endif
716 static void __set_personality_ia32(void)
718 #ifdef CONFIG_IA32_EMULATION
719 set_thread_flag(TIF_IA32);
720 clear_thread_flag(TIF_X32);
721 if (current->mm)
722 current->mm->context.ia32_compat = TIF_IA32;
723 current->personality |= force_personality32;
724 /* Prepare the first "return" to user space */
725 task_pt_regs(current)->orig_ax = __NR_ia32_execve;
726 current_thread_info()->status |= TS_COMPAT;
727 #endif
730 void set_personality_ia32(bool x32)
732 /* Make sure to be in 32bit mode */
733 set_thread_flag(TIF_ADDR32);
735 if (x32)
736 __set_personality_x32();
737 else
738 __set_personality_ia32();
740 EXPORT_SYMBOL_GPL(set_personality_ia32);
742 #ifdef CONFIG_CHECKPOINT_RESTORE
743 static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
745 int ret;
747 ret = map_vdso_once(image, addr);
748 if (ret)
749 return ret;
751 return (long)image->size;
753 #endif
755 long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
757 int ret = 0;
759 switch (option) {
760 case ARCH_SET_GS: {
761 ret = x86_gsbase_write_task(task, arg2);
762 break;
764 case ARCH_SET_FS: {
765 ret = x86_fsbase_write_task(task, arg2);
766 break;
768 case ARCH_GET_FS: {
769 unsigned long base = x86_fsbase_read_task(task);
771 ret = put_user(base, (unsigned long __user *)arg2);
772 break;
774 case ARCH_GET_GS: {
775 unsigned long base = x86_gsbase_read_task(task);
777 ret = put_user(base, (unsigned long __user *)arg2);
778 break;
781 #ifdef CONFIG_CHECKPOINT_RESTORE
782 # ifdef CONFIG_X86_X32_ABI
783 case ARCH_MAP_VDSO_X32:
784 return prctl_map_vdso(&vdso_image_x32, arg2);
785 # endif
786 # if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
787 case ARCH_MAP_VDSO_32:
788 return prctl_map_vdso(&vdso_image_32, arg2);
789 # endif
790 case ARCH_MAP_VDSO_64:
791 return prctl_map_vdso(&vdso_image_64, arg2);
792 #endif
794 default:
795 ret = -EINVAL;
796 break;
799 return ret;
802 SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
804 long ret;
806 ret = do_arch_prctl_64(current, option, arg2);
807 if (ret == -EINVAL)
808 ret = do_arch_prctl_common(current, option, arg2);
810 return ret;
813 #ifdef CONFIG_IA32_EMULATION
814 COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
816 return do_arch_prctl_common(current, option, arg2);
818 #endif
820 unsigned long KSTK_ESP(struct task_struct *task)
822 return task_pt_regs(task)->sp;