1 // SPDX-License-Identifier: GPL-2.0-only
3 * VDSO implementations.
5 * Copyright (C) 2012 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
10 #include <linux/cache.h>
11 #include <linux/clocksource.h>
12 #include <linux/elf.h>
13 #include <linux/err.h>
14 #include <linux/errno.h>
15 #include <linux/gfp.h>
16 #include <linux/kernel.h>
18 #include <linux/sched.h>
19 #include <linux/signal.h>
20 #include <linux/slab.h>
21 #include <linux/time_namespace.h>
22 #include <linux/timekeeper_internal.h>
23 #include <linux/vmalloc.h>
24 #include <vdso/datapage.h>
25 #include <vdso/helpers.h>
26 #include <vdso/vsyscall.h>
28 #include <asm/cacheflush.h>
29 #include <asm/signal32.h>
32 extern char vdso_start
[], vdso_end
[];
33 extern char vdso32_start
[], vdso32_end
[];
41 VVAR_DATA_PAGE_OFFSET
,
42 VVAR_TIMENS_PAGE_OFFSET
,
46 struct vdso_abi_info
{
48 const char *vdso_code_start
;
49 const char *vdso_code_end
;
50 unsigned long vdso_pages
;
52 struct vm_special_mapping
*dm
;
54 struct vm_special_mapping
*cm
;
57 static struct vdso_abi_info vdso_info
[] __ro_after_init
= {
60 .vdso_code_start
= vdso_start
,
61 .vdso_code_end
= vdso_end
,
63 #ifdef CONFIG_COMPAT_VDSO
66 .vdso_code_start
= vdso32_start
,
67 .vdso_code_end
= vdso32_end
,
69 #endif /* CONFIG_COMPAT_VDSO */
76 struct vdso_data data
[CS_BASES
];
78 } vdso_data_store __page_aligned_data
;
79 struct vdso_data
*vdso_data
= vdso_data_store
.data
;
81 static int vdso_mremap(const struct vm_special_mapping
*sm
,
82 struct vm_area_struct
*new_vma
)
84 current
->mm
->context
.vdso
= (void *)new_vma
->vm_start
;
89 static int __vdso_init(enum vdso_abi abi
)
92 struct page
**vdso_pagelist
;
95 if (memcmp(vdso_info
[abi
].vdso_code_start
, "\177ELF", 4)) {
96 pr_err("vDSO is not a valid ELF object!\n");
100 vdso_info
[abi
].vdso_pages
= (
101 vdso_info
[abi
].vdso_code_end
-
102 vdso_info
[abi
].vdso_code_start
) >>
105 vdso_pagelist
= kcalloc(vdso_info
[abi
].vdso_pages
,
106 sizeof(struct page
*),
108 if (vdso_pagelist
== NULL
)
111 /* Grab the vDSO code pages. */
112 pfn
= sym_to_pfn(vdso_info
[abi
].vdso_code_start
);
114 for (i
= 0; i
< vdso_info
[abi
].vdso_pages
; i
++)
115 vdso_pagelist
[i
] = pfn_to_page(pfn
+ i
);
117 vdso_info
[abi
].cm
->pages
= vdso_pagelist
;
122 #ifdef CONFIG_TIME_NS
123 struct vdso_data
*arch_get_vdso_data(void *vvar_page
)
125 return (struct vdso_data
*)(vvar_page
);
129 * The vvar mapping contains data for a specific time namespace, so when a task
130 * changes namespace we must unmap its vvar data for the old namespace.
131 * Subsequent faults will map in data for the new namespace.
133 * For more details see timens_setup_vdso_data().
135 int vdso_join_timens(struct task_struct
*task
, struct time_namespace
*ns
)
137 struct mm_struct
*mm
= task
->mm
;
138 struct vm_area_struct
*vma
;
142 for (vma
= mm
->mmap
; vma
; vma
= vma
->vm_next
) {
143 unsigned long size
= vma
->vm_end
- vma
->vm_start
;
145 if (vma_is_special_mapping(vma
, vdso_info
[VDSO_ABI_AA64
].dm
))
146 zap_page_range(vma
, vma
->vm_start
, size
);
147 #ifdef CONFIG_COMPAT_VDSO
148 if (vma_is_special_mapping(vma
, vdso_info
[VDSO_ABI_AA32
].dm
))
149 zap_page_range(vma
, vma
->vm_start
, size
);
153 mmap_read_unlock(mm
);
157 static struct page
*find_timens_vvar_page(struct vm_area_struct
*vma
)
159 if (likely(vma
->vm_mm
== current
->mm
))
160 return current
->nsproxy
->time_ns
->vvar_page
;
163 * VM_PFNMAP | VM_IO protect .fault() handler from being called
164 * through interfaces like /proc/$pid/mem or
165 * process_vm_{readv,writev}() as long as there's no .access()
166 * in special_mapping_vmops.
167 * For more details check_vma_flags() and __access_remote_vm()
169 WARN(1, "vvar_page accessed remotely");
174 static struct page
*find_timens_vvar_page(struct vm_area_struct
*vma
)
180 static vm_fault_t
vvar_fault(const struct vm_special_mapping
*sm
,
181 struct vm_area_struct
*vma
, struct vm_fault
*vmf
)
183 struct page
*timens_page
= find_timens_vvar_page(vma
);
186 switch (vmf
->pgoff
) {
187 case VVAR_DATA_PAGE_OFFSET
:
189 pfn
= page_to_pfn(timens_page
);
191 pfn
= sym_to_pfn(vdso_data
);
193 #ifdef CONFIG_TIME_NS
194 case VVAR_TIMENS_PAGE_OFFSET
:
196 * If a task belongs to a time namespace then a namespace
197 * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
198 * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
200 * See also the comment near timens_setup_vdso_data().
203 return VM_FAULT_SIGBUS
;
204 pfn
= sym_to_pfn(vdso_data
);
206 #endif /* CONFIG_TIME_NS */
208 return VM_FAULT_SIGBUS
;
211 return vmf_insert_pfn(vma
, vmf
->address
, pfn
);
214 static int __setup_additional_pages(enum vdso_abi abi
,
215 struct mm_struct
*mm
,
216 struct linux_binprm
*bprm
,
219 unsigned long vdso_base
, vdso_text_len
, vdso_mapping_len
;
220 unsigned long gp_flags
= 0;
223 BUILD_BUG_ON(VVAR_NR_PAGES
!= __VVAR_PAGES
);
225 vdso_text_len
= vdso_info
[abi
].vdso_pages
<< PAGE_SHIFT
;
226 /* Be sure to map the data page */
227 vdso_mapping_len
= vdso_text_len
+ VVAR_NR_PAGES
* PAGE_SIZE
;
229 vdso_base
= get_unmapped_area(NULL
, 0, vdso_mapping_len
, 0, 0);
230 if (IS_ERR_VALUE(vdso_base
)) {
231 ret
= ERR_PTR(vdso_base
);
235 ret
= _install_special_mapping(mm
, vdso_base
, VVAR_NR_PAGES
* PAGE_SIZE
,
236 VM_READ
|VM_MAYREAD
|VM_PFNMAP
,
241 if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL
) && system_supports_bti())
242 gp_flags
= VM_ARM64_BTI
;
244 vdso_base
+= VVAR_NR_PAGES
* PAGE_SIZE
;
245 mm
->context
.vdso
= (void *)vdso_base
;
246 ret
= _install_special_mapping(mm
, vdso_base
, vdso_text_len
,
247 VM_READ
|VM_EXEC
|gp_flags
|
248 VM_MAYREAD
|VM_MAYWRITE
|VM_MAYEXEC
,
256 mm
->context
.vdso
= NULL
;
262 * Create and map the vectors page for AArch32 tasks.
265 AA32_MAP_VECTORS
, /* kuser helpers */
271 static struct page
*aarch32_vectors_page __ro_after_init
;
272 static struct page
*aarch32_sig_page __ro_after_init
;
274 static struct vm_special_mapping aarch32_vdso_maps
[] = {
275 [AA32_MAP_VECTORS
] = {
276 .name
= "[vectors]", /* ABI */
277 .pages
= &aarch32_vectors_page
,
279 [AA32_MAP_SIGPAGE
] = {
280 .name
= "[sigpage]", /* ABI */
281 .pages
= &aarch32_sig_page
,
289 .mremap
= vdso_mremap
,
293 static int aarch32_alloc_kuser_vdso_page(void)
295 extern char __kuser_helper_start
[], __kuser_helper_end
[];
296 int kuser_sz
= __kuser_helper_end
- __kuser_helper_start
;
297 unsigned long vdso_page
;
299 if (!IS_ENABLED(CONFIG_KUSER_HELPERS
))
302 vdso_page
= get_zeroed_page(GFP_ATOMIC
);
306 memcpy((void *)(vdso_page
+ 0x1000 - kuser_sz
), __kuser_helper_start
,
308 aarch32_vectors_page
= virt_to_page(vdso_page
);
309 flush_dcache_page(aarch32_vectors_page
);
313 static int aarch32_alloc_sigpage(void)
315 extern char __aarch32_sigret_code_start
[], __aarch32_sigret_code_end
[];
316 int sigret_sz
= __aarch32_sigret_code_end
- __aarch32_sigret_code_start
;
317 unsigned long sigpage
;
319 sigpage
= get_zeroed_page(GFP_ATOMIC
);
323 memcpy((void *)sigpage
, __aarch32_sigret_code_start
, sigret_sz
);
324 aarch32_sig_page
= virt_to_page(sigpage
);
325 flush_dcache_page(aarch32_sig_page
);
329 static int __aarch32_alloc_vdso_pages(void)
332 if (!IS_ENABLED(CONFIG_COMPAT_VDSO
))
335 vdso_info
[VDSO_ABI_AA32
].dm
= &aarch32_vdso_maps
[AA32_MAP_VVAR
];
336 vdso_info
[VDSO_ABI_AA32
].cm
= &aarch32_vdso_maps
[AA32_MAP_VDSO
];
338 return __vdso_init(VDSO_ABI_AA32
);
341 static int __init
aarch32_alloc_vdso_pages(void)
345 ret
= __aarch32_alloc_vdso_pages();
349 ret
= aarch32_alloc_sigpage();
353 return aarch32_alloc_kuser_vdso_page();
355 arch_initcall(aarch32_alloc_vdso_pages
);
357 static int aarch32_kuser_helpers_setup(struct mm_struct
*mm
)
361 if (!IS_ENABLED(CONFIG_KUSER_HELPERS
))
365 * Avoid VM_MAYWRITE for compatibility with arch/arm/, where it's
366 * not safe to CoW the page containing the CPU exception vectors.
368 ret
= _install_special_mapping(mm
, AARCH32_VECTORS_BASE
, PAGE_SIZE
,
370 VM_MAYREAD
| VM_MAYEXEC
,
371 &aarch32_vdso_maps
[AA32_MAP_VECTORS
]);
373 return PTR_ERR_OR_ZERO(ret
);
376 static int aarch32_sigreturn_setup(struct mm_struct
*mm
)
381 addr
= get_unmapped_area(NULL
, 0, PAGE_SIZE
, 0, 0);
382 if (IS_ERR_VALUE(addr
)) {
388 * VM_MAYWRITE is required to allow gdb to Copy-on-Write and
391 ret
= _install_special_mapping(mm
, addr
, PAGE_SIZE
,
392 VM_READ
| VM_EXEC
| VM_MAYREAD
|
393 VM_MAYWRITE
| VM_MAYEXEC
,
394 &aarch32_vdso_maps
[AA32_MAP_SIGPAGE
]);
398 mm
->context
.sigpage
= (void *)addr
;
401 return PTR_ERR_OR_ZERO(ret
);
404 int aarch32_setup_additional_pages(struct linux_binprm
*bprm
, int uses_interp
)
406 struct mm_struct
*mm
= current
->mm
;
409 if (mmap_write_lock_killable(mm
))
412 ret
= aarch32_kuser_helpers_setup(mm
);
416 if (IS_ENABLED(CONFIG_COMPAT_VDSO
)) {
417 ret
= __setup_additional_pages(VDSO_ABI_AA32
, mm
, bprm
,
423 ret
= aarch32_sigreturn_setup(mm
);
425 mmap_write_unlock(mm
);
428 #endif /* CONFIG_COMPAT */
435 static struct vm_special_mapping aarch64_vdso_maps
[] __ro_after_init
= {
442 .mremap
= vdso_mremap
,
446 static int __init
vdso_init(void)
448 vdso_info
[VDSO_ABI_AA64
].dm
= &aarch64_vdso_maps
[AA64_MAP_VVAR
];
449 vdso_info
[VDSO_ABI_AA64
].cm
= &aarch64_vdso_maps
[AA64_MAP_VDSO
];
451 return __vdso_init(VDSO_ABI_AA64
);
453 arch_initcall(vdso_init
);
455 int arch_setup_additional_pages(struct linux_binprm
*bprm
, int uses_interp
)
457 struct mm_struct
*mm
= current
->mm
;
460 if (mmap_write_lock_killable(mm
))
463 ret
= __setup_additional_pages(VDSO_ABI_AA64
, mm
, bprm
, uses_interp
);
464 mmap_write_unlock(mm
);