1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright 2007 Andi Kleen, SUSE Labs.
5 * This contains most of the x86 vDSO kernel-side code.
9 #include <linux/sched.h>
10 #include <linux/sched/task_stack.h>
11 #include <linux/slab.h>
12 #include <linux/init.h>
13 #include <linux/random.h>
14 #include <linux/elf.h>
15 #include <linux/cpu.h>
16 #include <linux/ptrace.h>
17 #include <linux/time_namespace.h>
19 #include <asm/pvclock.h>
20 #include <asm/vgtod.h>
21 #include <asm/proto.h>
26 #include <asm/cpufeature.h>
27 #include <asm/vdso/vsyscall.h>
28 #include <clocksource/hyperv_timer.h>
30 struct vdso_data
*arch_get_vdso_data(void *vvar_page
)
32 return (struct vdso_data
*)vvar_page
;
35 static union vdso_data_store vdso_data_store __page_aligned_data
;
36 struct vdso_data
*vdso_data
= vdso_data_store
.data
;
38 unsigned int vclocks_used __read_mostly
;
40 #if defined(CONFIG_X86_64)
41 unsigned int __read_mostly vdso64_enabled
= 1;
44 int __init
init_vdso_image(const struct vdso_image
*image
)
46 BUILD_BUG_ON(VDSO_CLOCKMODE_MAX
>= 32);
47 BUG_ON(image
->size
% PAGE_SIZE
!= 0);
49 apply_alternatives((struct alt_instr
*)(image
->data
+ image
->alt
),
50 (struct alt_instr
*)(image
->data
+ image
->alt
+
57 static const struct vm_special_mapping vvar_mapping
;
60 static vm_fault_t
vdso_fault(const struct vm_special_mapping
*sm
,
61 struct vm_area_struct
*vma
, struct vm_fault
*vmf
)
63 const struct vdso_image
*image
= vma
->vm_mm
->context
.vdso_image
;
65 if (!image
|| (vmf
->pgoff
<< PAGE_SHIFT
) >= image
->size
)
66 return VM_FAULT_SIGBUS
;
68 vmf
->page
= virt_to_page(image
->data
+ (vmf
->pgoff
<< PAGE_SHIFT
));
73 static void vdso_fix_landing(const struct vdso_image
*image
,
74 struct vm_area_struct
*new_vma
)
76 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
77 if (in_ia32_syscall() && image
== &vdso_image_32
) {
78 struct pt_regs
*regs
= current_pt_regs();
79 unsigned long vdso_land
= image
->sym_int80_landing_pad
;
80 unsigned long old_land_addr
= vdso_land
+
81 (unsigned long)current
->mm
->context
.vdso
;
83 /* Fixing userspace landing - look at do_fast_syscall_32 */
84 if (regs
->ip
== old_land_addr
)
85 regs
->ip
= new_vma
->vm_start
+ vdso_land
;
90 static int vdso_mremap(const struct vm_special_mapping
*sm
,
91 struct vm_area_struct
*new_vma
)
93 const struct vdso_image
*image
= current
->mm
->context
.vdso_image
;
95 vdso_fix_landing(image
, new_vma
);
96 current
->mm
->context
.vdso
= (void __user
*)new_vma
->vm_start
;
101 #ifdef CONFIG_TIME_NS
103 * The vvar page layout depends on whether a task belongs to the root or
104 * non-root time namespace. Whenever a task changes its namespace, the VVAR
105 * page tables are cleared and then they will re-faulted with a
106 * corresponding layout.
107 * See also the comment near timens_setup_vdso_data() for details.
109 int vdso_join_timens(struct task_struct
*task
, struct time_namespace
*ns
)
111 struct mm_struct
*mm
= task
->mm
;
112 struct vm_area_struct
*vma
;
113 VMA_ITERATOR(vmi
, mm
, 0);
116 for_each_vma(vmi
, vma
) {
117 if (vma_is_special_mapping(vma
, &vvar_mapping
))
120 mmap_read_unlock(mm
);
126 static vm_fault_t
vvar_fault(const struct vm_special_mapping
*sm
,
127 struct vm_area_struct
*vma
, struct vm_fault
*vmf
)
129 const struct vdso_image
*image
= vma
->vm_mm
->context
.vdso_image
;
134 return VM_FAULT_SIGBUS
;
136 sym_offset
= (long)(vmf
->pgoff
<< PAGE_SHIFT
) +
137 image
->sym_vvar_start
;
140 * Sanity check: a symbol offset of zero means that the page
141 * does not exist for this vdso image, not that the page is at
142 * offset zero relative to the text mapping. This should be
143 * impossible here, because sym_offset should only be zero for
144 * the page past the end of the vvar mapping.
147 return VM_FAULT_SIGBUS
;
149 if (sym_offset
== image
->sym_vvar_page
) {
150 struct page
*timens_page
= find_timens_vvar_page(vma
);
152 pfn
= __pa_symbol(vdso_data
) >> PAGE_SHIFT
;
155 * If a task belongs to a time namespace then a namespace
156 * specific VVAR is mapped with the sym_vvar_page offset and
157 * the real VVAR page is mapped with the sym_timens_page
159 * See also the comment near timens_setup_vdso_data().
166 * Optimization: inside time namespace pre-fault
167 * VVAR page too. As on timens page there are only
168 * offsets for clocks on VVAR, it'll be faulted
169 * shortly by VDSO code.
171 addr
= vmf
->address
+ (image
->sym_timens_page
- sym_offset
);
172 err
= vmf_insert_pfn(vma
, addr
, pfn
);
173 if (unlikely(err
& VM_FAULT_ERROR
))
176 pfn
= page_to_pfn(timens_page
);
179 return vmf_insert_pfn(vma
, vmf
->address
, pfn
);
181 } else if (sym_offset
== image
->sym_timens_page
) {
182 struct page
*timens_page
= find_timens_vvar_page(vma
);
185 return VM_FAULT_SIGBUS
;
187 pfn
= __pa_symbol(vdso_data
) >> PAGE_SHIFT
;
188 return vmf_insert_pfn(vma
, vmf
->address
, pfn
);
191 return VM_FAULT_SIGBUS
;
194 static vm_fault_t
vvar_vclock_fault(const struct vm_special_mapping
*sm
,
195 struct vm_area_struct
*vma
, struct vm_fault
*vmf
)
197 switch (vmf
->pgoff
) {
198 #ifdef CONFIG_PARAVIRT_CLOCK
199 case VDSO_PAGE_PVCLOCK_OFFSET
:
201 struct pvclock_vsyscall_time_info
*pvti
=
202 pvclock_get_pvti_cpu0_va();
204 if (pvti
&& vclock_was_used(VDSO_CLOCKMODE_PVCLOCK
))
205 return vmf_insert_pfn_prot(vma
, vmf
->address
,
206 __pa(pvti
) >> PAGE_SHIFT
,
207 pgprot_decrypted(vma
->vm_page_prot
));
210 #endif /* CONFIG_PARAVIRT_CLOCK */
211 #ifdef CONFIG_HYPERV_TIMER
212 case VDSO_PAGE_HVCLOCK_OFFSET
:
214 unsigned long pfn
= hv_get_tsc_pfn();
216 if (pfn
&& vclock_was_used(VDSO_CLOCKMODE_HVCLOCK
))
217 return vmf_insert_pfn(vma
, vmf
->address
, pfn
);
220 #endif /* CONFIG_HYPERV_TIMER */
223 return VM_FAULT_SIGBUS
;
226 static const struct vm_special_mapping vdso_mapping
= {
229 .mremap
= vdso_mremap
,
231 static const struct vm_special_mapping vvar_mapping
= {
235 static const struct vm_special_mapping vvar_vclock_mapping
= {
236 .name
= "[vvar_vclock]",
237 .fault
= vvar_vclock_fault
,
241 * Add vdso and vvar mappings to current process.
242 * @image - blob to map
243 * @addr - request a specific address (zero to map at free addr)
245 static int map_vdso(const struct vdso_image
*image
, unsigned long addr
)
247 struct mm_struct
*mm
= current
->mm
;
248 struct vm_area_struct
*vma
;
249 unsigned long text_start
;
252 if (mmap_write_lock_killable(mm
))
255 addr
= get_unmapped_area(NULL
, addr
,
256 image
->size
- image
->sym_vvar_start
, 0, 0);
257 if (IS_ERR_VALUE(addr
)) {
262 text_start
= addr
- image
->sym_vvar_start
;
265 * MAYWRITE to allow gdb to COW and set breakpoints
267 vma
= _install_special_mapping(mm
,
271 VM_MAYREAD
|VM_MAYWRITE
|VM_MAYEXEC
,
279 vma
= _install_special_mapping(mm
,
281 (__VVAR_PAGES
- VDSO_NR_VCLOCK_PAGES
) * PAGE_SIZE
,
282 VM_READ
|VM_MAYREAD
|VM_IO
|VM_DONTDUMP
|
288 do_munmap(mm
, text_start
, image
->size
, NULL
);
292 vma
= _install_special_mapping(mm
,
293 addr
+ (__VVAR_PAGES
- VDSO_NR_VCLOCK_PAGES
) * PAGE_SIZE
,
294 VDSO_NR_VCLOCK_PAGES
* PAGE_SIZE
,
295 VM_READ
|VM_MAYREAD
|VM_IO
|VM_DONTDUMP
|
297 &vvar_vclock_mapping
);
301 do_munmap(mm
, text_start
, image
->size
, NULL
);
302 do_munmap(mm
, addr
, image
->size
, NULL
);
306 current
->mm
->context
.vdso
= (void __user
*)text_start
;
307 current
->mm
->context
.vdso_image
= image
;
310 mmap_write_unlock(mm
);
314 int map_vdso_once(const struct vdso_image
*image
, unsigned long addr
)
316 struct mm_struct
*mm
= current
->mm
;
317 struct vm_area_struct
*vma
;
318 VMA_ITERATOR(vmi
, mm
, 0);
322 * Check if we have already mapped vdso blob - fail to prevent
323 * abusing from userspace install_special_mapping, which may
324 * not do accounting and rlimit right.
325 * We could search vma near context.vdso, but it's a slowpath,
326 * so let's explicitly check all VMAs to be completely sure.
328 for_each_vma(vmi
, vma
) {
329 if (vma_is_special_mapping(vma
, &vdso_mapping
) ||
330 vma_is_special_mapping(vma
, &vvar_mapping
) ||
331 vma_is_special_mapping(vma
, &vvar_vclock_mapping
)) {
332 mmap_write_unlock(mm
);
336 mmap_write_unlock(mm
);
338 return map_vdso(image
, addr
);
341 #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
342 static int load_vdso32(void)
344 if (vdso32_enabled
!= 1) /* Other values all mean "disabled" */
347 return map_vdso(&vdso_image_32
, 0);
352 int arch_setup_additional_pages(struct linux_binprm
*bprm
, int uses_interp
)
357 return map_vdso(&vdso_image_64
, 0);
361 int compat_arch_setup_additional_pages(struct linux_binprm
*bprm
,
362 int uses_interp
, bool x32
)
364 #ifdef CONFIG_X86_X32_ABI
368 return map_vdso(&vdso_image_x32
, 0);
371 #ifdef CONFIG_IA32_EMULATION
372 return load_vdso32();
379 int arch_setup_additional_pages(struct linux_binprm
*bprm
, int uses_interp
)
381 return load_vdso32();
385 bool arch_syscall_is_vdso_sigreturn(struct pt_regs
*regs
)
387 #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
388 const struct vdso_image
*image
= current
->mm
->context
.vdso_image
;
389 unsigned long vdso
= (unsigned long) current
->mm
->context
.vdso
;
391 if (in_ia32_syscall() && image
== &vdso_image_32
) {
392 if (regs
->ip
== vdso
+ image
->sym_vdso32_sigreturn_landing_pad
||
393 regs
->ip
== vdso
+ image
->sym_vdso32_rt_sigreturn_landing_pad
)
401 static __init
int vdso_setup(char *s
)
403 vdso64_enabled
= simple_strtoul(s
, NULL
, 0);
406 __setup("vdso=", vdso_setup
);
407 #endif /* CONFIG_X86_64 */