powerpc/perf: Fix book3s kernel to userspace backtraces
[linux/fpc-iii.git] / fs / binfmt_elf.c
blob8081aba116a728efe37b97904b5c71c66e360f6f
1 /*
2 * linux/fs/binfmt_elf.c
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7 * Tools".
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/utsname.h>
35 #include <linux/coredump.h>
36 #include <linux/sched.h>
37 #include <asm/uaccess.h>
38 #include <asm/param.h>
39 #include <asm/page.h>
41 #ifndef user_long_t
42 #define user_long_t long
43 #endif
44 #ifndef user_siginfo_t
45 #define user_siginfo_t siginfo_t
46 #endif
48 static int load_elf_binary(struct linux_binprm *bprm);
49 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
50 int, int, unsigned long);
52 #ifdef CONFIG_USELIB
53 static int load_elf_library(struct file *);
54 #else
55 #define load_elf_library NULL
56 #endif
59 * If we don't support core dumping, then supply a NULL so we
60 * don't even try.
62 #ifdef CONFIG_ELF_CORE
63 static int elf_core_dump(struct coredump_params *cprm);
64 #else
65 #define elf_core_dump NULL
66 #endif
68 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
69 #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
70 #else
71 #define ELF_MIN_ALIGN PAGE_SIZE
72 #endif
74 #ifndef ELF_CORE_EFLAGS
75 #define ELF_CORE_EFLAGS 0
76 #endif
78 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
79 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
80 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
82 static struct linux_binfmt elf_format = {
83 .module = THIS_MODULE,
84 .load_binary = load_elf_binary,
85 .load_shlib = load_elf_library,
86 .core_dump = elf_core_dump,
87 .min_coredump = ELF_EXEC_PAGESIZE,
90 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
92 static int set_brk(unsigned long start, unsigned long end)
94 start = ELF_PAGEALIGN(start);
95 end = ELF_PAGEALIGN(end);
96 if (end > start) {
97 unsigned long addr;
98 addr = vm_brk(start, end - start);
99 if (BAD_ADDR(addr))
100 return addr;
102 current->mm->start_brk = current->mm->brk = end;
103 return 0;
106 /* We need to explicitly zero any fractional pages
107 after the data section (i.e. bss). This would
108 contain the junk from the file that should not
109 be in memory
111 static int padzero(unsigned long elf_bss)
113 unsigned long nbyte;
115 nbyte = ELF_PAGEOFFSET(elf_bss);
116 if (nbyte) {
117 nbyte = ELF_MIN_ALIGN - nbyte;
118 if (clear_user((void __user *) elf_bss, nbyte))
119 return -EFAULT;
121 return 0;
124 /* Let's use some macros to make this stack manipulation a little clearer */
125 #ifdef CONFIG_STACK_GROWSUP
126 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
127 #define STACK_ROUND(sp, items) \
128 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
129 #define STACK_ALLOC(sp, len) ({ \
130 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
131 old_sp; })
132 #else
133 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
134 #define STACK_ROUND(sp, items) \
135 (((unsigned long) (sp - items)) &~ 15UL)
136 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
137 #endif
139 #ifndef ELF_BASE_PLATFORM
141 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
142 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
143 * will be copied to the user stack in the same manner as AT_PLATFORM.
145 #define ELF_BASE_PLATFORM NULL
146 #endif
148 static int
149 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
150 unsigned long load_addr, unsigned long interp_load_addr)
152 unsigned long p = bprm->p;
153 int argc = bprm->argc;
154 int envc = bprm->envc;
155 elf_addr_t __user *argv;
156 elf_addr_t __user *envp;
157 elf_addr_t __user *sp;
158 elf_addr_t __user *u_platform;
159 elf_addr_t __user *u_base_platform;
160 elf_addr_t __user *u_rand_bytes;
161 const char *k_platform = ELF_PLATFORM;
162 const char *k_base_platform = ELF_BASE_PLATFORM;
163 unsigned char k_rand_bytes[16];
164 int items;
165 elf_addr_t *elf_info;
166 int ei_index = 0;
167 const struct cred *cred = current_cred();
168 struct vm_area_struct *vma;
171 * In some cases (e.g. Hyper-Threading), we want to avoid L1
172 * evictions by the processes running on the same package. One
173 * thing we can do is to shuffle the initial stack for them.
176 p = arch_align_stack(p);
179 * If this architecture has a platform capability string, copy it
180 * to userspace. In some cases (Sparc), this info is impossible
181 * for userspace to get any other way, in others (i386) it is
182 * merely difficult.
184 u_platform = NULL;
185 if (k_platform) {
186 size_t len = strlen(k_platform) + 1;
188 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
189 if (__copy_to_user(u_platform, k_platform, len))
190 return -EFAULT;
194 * If this architecture has a "base" platform capability
195 * string, copy it to userspace.
197 u_base_platform = NULL;
198 if (k_base_platform) {
199 size_t len = strlen(k_base_platform) + 1;
201 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
202 if (__copy_to_user(u_base_platform, k_base_platform, len))
203 return -EFAULT;
207 * Generate 16 random bytes for userspace PRNG seeding.
209 get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
210 u_rand_bytes = (elf_addr_t __user *)
211 STACK_ALLOC(p, sizeof(k_rand_bytes));
212 if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
213 return -EFAULT;
215 /* Create the ELF interpreter info */
216 elf_info = (elf_addr_t *)current->mm->saved_auxv;
217 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
218 #define NEW_AUX_ENT(id, val) \
219 do { \
220 elf_info[ei_index++] = id; \
221 elf_info[ei_index++] = val; \
222 } while (0)
224 #ifdef ARCH_DLINFO
226 * ARCH_DLINFO must come first so PPC can do its special alignment of
227 * AUXV.
228 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
229 * ARCH_DLINFO changes
231 ARCH_DLINFO;
232 #endif
233 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
234 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
235 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
236 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
237 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
238 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
239 NEW_AUX_ENT(AT_BASE, interp_load_addr);
240 NEW_AUX_ENT(AT_FLAGS, 0);
241 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
242 NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
243 NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
244 NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
245 NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
246 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
247 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
248 #ifdef ELF_HWCAP2
249 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
250 #endif
251 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
252 if (k_platform) {
253 NEW_AUX_ENT(AT_PLATFORM,
254 (elf_addr_t)(unsigned long)u_platform);
256 if (k_base_platform) {
257 NEW_AUX_ENT(AT_BASE_PLATFORM,
258 (elf_addr_t)(unsigned long)u_base_platform);
260 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
261 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
263 #undef NEW_AUX_ENT
264 /* AT_NULL is zero; clear the rest too */
265 memset(&elf_info[ei_index], 0,
266 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
268 /* And advance past the AT_NULL entry. */
269 ei_index += 2;
271 sp = STACK_ADD(p, ei_index);
273 items = (argc + 1) + (envc + 1) + 1;
274 bprm->p = STACK_ROUND(sp, items);
276 /* Point sp at the lowest address on the stack */
277 #ifdef CONFIG_STACK_GROWSUP
278 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
279 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
280 #else
281 sp = (elf_addr_t __user *)bprm->p;
282 #endif
286 * Grow the stack manually; some architectures have a limit on how
287 * far ahead a user-space access may be in order to grow the stack.
289 vma = find_extend_vma(current->mm, bprm->p);
290 if (!vma)
291 return -EFAULT;
293 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
294 if (__put_user(argc, sp++))
295 return -EFAULT;
296 argv = sp;
297 envp = argv + argc + 1;
299 /* Populate argv and envp */
300 p = current->mm->arg_end = current->mm->arg_start;
301 while (argc-- > 0) {
302 size_t len;
303 if (__put_user((elf_addr_t)p, argv++))
304 return -EFAULT;
305 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
306 if (!len || len > MAX_ARG_STRLEN)
307 return -EINVAL;
308 p += len;
310 if (__put_user(0, argv))
311 return -EFAULT;
312 current->mm->arg_end = current->mm->env_start = p;
313 while (envc-- > 0) {
314 size_t len;
315 if (__put_user((elf_addr_t)p, envp++))
316 return -EFAULT;
317 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
318 if (!len || len > MAX_ARG_STRLEN)
319 return -EINVAL;
320 p += len;
322 if (__put_user(0, envp))
323 return -EFAULT;
324 current->mm->env_end = p;
326 /* Put the elf_info on the stack in the right place. */
327 sp = (elf_addr_t __user *)envp + 1;
328 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
329 return -EFAULT;
330 return 0;
333 #ifndef elf_map
335 static unsigned long elf_map(struct file *filep, unsigned long addr,
336 struct elf_phdr *eppnt, int prot, int type,
337 unsigned long total_size)
339 unsigned long map_addr;
340 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
341 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
342 addr = ELF_PAGESTART(addr);
343 size = ELF_PAGEALIGN(size);
345 /* mmap() will return -EINVAL if given a zero size, but a
346 * segment with zero filesize is perfectly valid */
347 if (!size)
348 return addr;
351 * total_size is the size of the ELF (interpreter) image.
352 * The _first_ mmap needs to know the full size, otherwise
353 * randomization might put this image into an overlapping
354 * position with the ELF binary image. (since size < total_size)
355 * So we first map the 'big' image - and unmap the remainder at
356 * the end. (which unmap is needed for ELF images with holes.)
358 if (total_size) {
359 total_size = ELF_PAGEALIGN(total_size);
360 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
361 if (!BAD_ADDR(map_addr))
362 vm_munmap(map_addr+size, total_size-size);
363 } else
364 map_addr = vm_mmap(filep, addr, size, prot, type, off);
366 return(map_addr);
369 #endif /* !elf_map */
371 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
373 int i, first_idx = -1, last_idx = -1;
375 for (i = 0; i < nr; i++) {
376 if (cmds[i].p_type == PT_LOAD) {
377 last_idx = i;
378 if (first_idx == -1)
379 first_idx = i;
382 if (first_idx == -1)
383 return 0;
385 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
386 ELF_PAGESTART(cmds[first_idx].p_vaddr);
390 * load_elf_phdrs() - load ELF program headers
391 * @elf_ex: ELF header of the binary whose program headers should be loaded
392 * @elf_file: the opened ELF binary file
394 * Loads ELF program headers from the binary file elf_file, which has the ELF
395 * header pointed to by elf_ex, into a newly allocated array. The caller is
396 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
398 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
399 struct file *elf_file)
401 struct elf_phdr *elf_phdata = NULL;
402 int retval, size, err = -1;
405 * If the size of this structure has changed, then punt, since
406 * we will be doing the wrong thing.
408 if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
409 goto out;
411 /* Sanity check the number of program headers... */
412 if (elf_ex->e_phnum < 1 ||
413 elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
414 goto out;
416 /* ...and their total size. */
417 size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
418 if (size > ELF_MIN_ALIGN)
419 goto out;
421 elf_phdata = kmalloc(size, GFP_KERNEL);
422 if (!elf_phdata)
423 goto out;
425 /* Read in the program headers */
426 retval = kernel_read(elf_file, elf_ex->e_phoff,
427 (char *)elf_phdata, size);
428 if (retval != size) {
429 err = (retval < 0) ? retval : -EIO;
430 goto out;
433 /* Success! */
434 err = 0;
435 out:
436 if (err) {
437 kfree(elf_phdata);
438 elf_phdata = NULL;
440 return elf_phdata;
443 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
446 * struct arch_elf_state - arch-specific ELF loading state
448 * This structure is used to preserve architecture specific data during
449 * the loading of an ELF file, throughout the checking of architecture
450 * specific ELF headers & through to the point where the ELF load is
451 * known to be proceeding (ie. SET_PERSONALITY).
453 * This implementation is a dummy for architectures which require no
454 * specific state.
456 struct arch_elf_state {
459 #define INIT_ARCH_ELF_STATE {}
462 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
463 * @ehdr: The main ELF header
464 * @phdr: The program header to check
465 * @elf: The open ELF file
466 * @is_interp: True if the phdr is from the interpreter of the ELF being
467 * loaded, else false.
468 * @state: Architecture-specific state preserved throughout the process
469 * of loading the ELF.
471 * Inspects the program header phdr to validate its correctness and/or
472 * suitability for the system. Called once per ELF program header in the
473 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
474 * interpreter.
476 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
477 * with that return code.
479 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
480 struct elf_phdr *phdr,
481 struct file *elf, bool is_interp,
482 struct arch_elf_state *state)
484 /* Dummy implementation, always proceed */
485 return 0;
489 * arch_check_elf() - check a PT_LOPROC..PT_HIPROC ELF program header
490 * @ehdr: The main ELF header
491 * @has_interp: True if the ELF has an interpreter, else false.
492 * @state: Architecture-specific state preserved throughout the process
493 * of loading the ELF.
495 * Provides a final opportunity for architecture code to reject the loading
496 * of the ELF & cause an exec syscall to return an error. This is called after
497 * all program headers to be checked by arch_elf_pt_proc have been.
499 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
500 * with that return code.
502 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
503 struct arch_elf_state *state)
505 /* Dummy implementation, always proceed */
506 return 0;
509 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
511 /* This is much more generalized than the library routine read function,
512 so we keep this separate. Technically the library read function
513 is only provided so that we can read a.out libraries that have
514 an ELF header */
516 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
517 struct file *interpreter, unsigned long *interp_map_addr,
518 unsigned long no_base, struct elf_phdr *interp_elf_phdata)
520 struct elf_phdr *eppnt;
521 unsigned long load_addr = 0;
522 int load_addr_set = 0;
523 unsigned long last_bss = 0, elf_bss = 0;
524 unsigned long error = ~0UL;
525 unsigned long total_size;
526 int i;
528 /* First of all, some simple consistency checks */
529 if (interp_elf_ex->e_type != ET_EXEC &&
530 interp_elf_ex->e_type != ET_DYN)
531 goto out;
532 if (!elf_check_arch(interp_elf_ex))
533 goto out;
534 if (!interpreter->f_op->mmap)
535 goto out;
537 total_size = total_mapping_size(interp_elf_phdata,
538 interp_elf_ex->e_phnum);
539 if (!total_size) {
540 error = -EINVAL;
541 goto out;
544 eppnt = interp_elf_phdata;
545 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
546 if (eppnt->p_type == PT_LOAD) {
547 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
548 int elf_prot = 0;
549 unsigned long vaddr = 0;
550 unsigned long k, map_addr;
552 if (eppnt->p_flags & PF_R)
553 elf_prot = PROT_READ;
554 if (eppnt->p_flags & PF_W)
555 elf_prot |= PROT_WRITE;
556 if (eppnt->p_flags & PF_X)
557 elf_prot |= PROT_EXEC;
558 vaddr = eppnt->p_vaddr;
559 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
560 elf_type |= MAP_FIXED;
561 else if (no_base && interp_elf_ex->e_type == ET_DYN)
562 load_addr = -vaddr;
564 map_addr = elf_map(interpreter, load_addr + vaddr,
565 eppnt, elf_prot, elf_type, total_size);
566 total_size = 0;
567 if (!*interp_map_addr)
568 *interp_map_addr = map_addr;
569 error = map_addr;
570 if (BAD_ADDR(map_addr))
571 goto out;
573 if (!load_addr_set &&
574 interp_elf_ex->e_type == ET_DYN) {
575 load_addr = map_addr - ELF_PAGESTART(vaddr);
576 load_addr_set = 1;
580 * Check to see if the section's size will overflow the
581 * allowed task size. Note that p_filesz must always be
582 * <= p_memsize so it's only necessary to check p_memsz.
584 k = load_addr + eppnt->p_vaddr;
585 if (BAD_ADDR(k) ||
586 eppnt->p_filesz > eppnt->p_memsz ||
587 eppnt->p_memsz > TASK_SIZE ||
588 TASK_SIZE - eppnt->p_memsz < k) {
589 error = -ENOMEM;
590 goto out;
594 * Find the end of the file mapping for this phdr, and
595 * keep track of the largest address we see for this.
597 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
598 if (k > elf_bss)
599 elf_bss = k;
602 * Do the same thing for the memory mapping - between
603 * elf_bss and last_bss is the bss section.
605 k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
606 if (k > last_bss)
607 last_bss = k;
611 if (last_bss > elf_bss) {
613 * Now fill out the bss section. First pad the last page up
614 * to the page boundary, and then perform a mmap to make sure
615 * that there are zero-mapped pages up to and including the
616 * last bss page.
618 if (padzero(elf_bss)) {
619 error = -EFAULT;
620 goto out;
623 /* What we have mapped so far */
624 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
626 /* Map the last of the bss segment */
627 error = vm_brk(elf_bss, last_bss - elf_bss);
628 if (BAD_ADDR(error))
629 goto out;
632 error = load_addr;
633 out:
634 return error;
638 * These are the functions used to load ELF style executables and shared
639 * libraries. There is no binary dependent code anywhere else.
642 #ifndef STACK_RND_MASK
643 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
644 #endif
646 static unsigned long randomize_stack_top(unsigned long stack_top)
648 unsigned long random_variable = 0;
650 if ((current->flags & PF_RANDOMIZE) &&
651 !(current->personality & ADDR_NO_RANDOMIZE)) {
652 random_variable = (unsigned long) get_random_int();
653 random_variable &= STACK_RND_MASK;
654 random_variable <<= PAGE_SHIFT;
656 #ifdef CONFIG_STACK_GROWSUP
657 return PAGE_ALIGN(stack_top) + random_variable;
658 #else
659 return PAGE_ALIGN(stack_top) - random_variable;
660 #endif
663 static int load_elf_binary(struct linux_binprm *bprm)
665 struct file *interpreter = NULL; /* to shut gcc up */
666 unsigned long load_addr = 0, load_bias = 0;
667 int load_addr_set = 0;
668 char * elf_interpreter = NULL;
669 unsigned long error;
670 struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
671 unsigned long elf_bss, elf_brk;
672 int retval, i;
673 unsigned long elf_entry;
674 unsigned long interp_load_addr = 0;
675 unsigned long start_code, end_code, start_data, end_data;
676 unsigned long reloc_func_desc __maybe_unused = 0;
677 int executable_stack = EXSTACK_DEFAULT;
678 struct pt_regs *regs = current_pt_regs();
679 struct {
680 struct elfhdr elf_ex;
681 struct elfhdr interp_elf_ex;
682 } *loc;
683 struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
685 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
686 if (!loc) {
687 retval = -ENOMEM;
688 goto out_ret;
691 /* Get the exec-header */
692 loc->elf_ex = *((struct elfhdr *)bprm->buf);
694 retval = -ENOEXEC;
695 /* First of all, some simple consistency checks */
696 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
697 goto out;
699 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
700 goto out;
701 if (!elf_check_arch(&loc->elf_ex))
702 goto out;
703 if (!bprm->file->f_op->mmap)
704 goto out;
706 elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
707 if (!elf_phdata)
708 goto out;
710 elf_ppnt = elf_phdata;
711 elf_bss = 0;
712 elf_brk = 0;
714 start_code = ~0UL;
715 end_code = 0;
716 start_data = 0;
717 end_data = 0;
719 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
720 if (elf_ppnt->p_type == PT_INTERP) {
721 /* This is the program interpreter used for
722 * shared libraries - for now assume that this
723 * is an a.out format binary
725 retval = -ENOEXEC;
726 if (elf_ppnt->p_filesz > PATH_MAX ||
727 elf_ppnt->p_filesz < 2)
728 goto out_free_ph;
730 retval = -ENOMEM;
731 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
732 GFP_KERNEL);
733 if (!elf_interpreter)
734 goto out_free_ph;
736 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
737 elf_interpreter,
738 elf_ppnt->p_filesz);
739 if (retval != elf_ppnt->p_filesz) {
740 if (retval >= 0)
741 retval = -EIO;
742 goto out_free_interp;
744 /* make sure path is NULL terminated */
745 retval = -ENOEXEC;
746 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
747 goto out_free_interp;
749 interpreter = open_exec(elf_interpreter);
750 retval = PTR_ERR(interpreter);
751 if (IS_ERR(interpreter))
752 goto out_free_interp;
755 * If the binary is not readable then enforce
756 * mm->dumpable = 0 regardless of the interpreter's
757 * permissions.
759 would_dump(bprm, interpreter);
761 retval = kernel_read(interpreter, 0, bprm->buf,
762 BINPRM_BUF_SIZE);
763 if (retval != BINPRM_BUF_SIZE) {
764 if (retval >= 0)
765 retval = -EIO;
766 goto out_free_dentry;
769 /* Get the exec headers */
770 loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
771 break;
773 elf_ppnt++;
776 elf_ppnt = elf_phdata;
777 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
778 switch (elf_ppnt->p_type) {
779 case PT_GNU_STACK:
780 if (elf_ppnt->p_flags & PF_X)
781 executable_stack = EXSTACK_ENABLE_X;
782 else
783 executable_stack = EXSTACK_DISABLE_X;
784 break;
786 case PT_LOPROC ... PT_HIPROC:
787 retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
788 bprm->file, false,
789 &arch_state);
790 if (retval)
791 goto out_free_dentry;
792 break;
795 /* Some simple consistency checks for the interpreter */
796 if (elf_interpreter) {
797 retval = -ELIBBAD;
798 /* Not an ELF interpreter */
799 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
800 goto out_free_dentry;
801 /* Verify the interpreter has a valid arch */
802 if (!elf_check_arch(&loc->interp_elf_ex))
803 goto out_free_dentry;
805 /* Load the interpreter program headers */
806 interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
807 interpreter);
808 if (!interp_elf_phdata)
809 goto out_free_dentry;
811 /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
812 elf_ppnt = interp_elf_phdata;
813 for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
814 switch (elf_ppnt->p_type) {
815 case PT_LOPROC ... PT_HIPROC:
816 retval = arch_elf_pt_proc(&loc->interp_elf_ex,
817 elf_ppnt, interpreter,
818 true, &arch_state);
819 if (retval)
820 goto out_free_dentry;
821 break;
826 * Allow arch code to reject the ELF at this point, whilst it's
827 * still possible to return an error to the code that invoked
828 * the exec syscall.
830 retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state);
831 if (retval)
832 goto out_free_dentry;
834 /* Flush all traces of the currently running executable */
835 retval = flush_old_exec(bprm);
836 if (retval)
837 goto out_free_dentry;
839 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
840 may depend on the personality. */
841 SET_PERSONALITY2(loc->elf_ex, &arch_state);
842 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
843 current->personality |= READ_IMPLIES_EXEC;
845 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
846 current->flags |= PF_RANDOMIZE;
848 setup_new_exec(bprm);
850 /* Do this so that we can load the interpreter, if need be. We will
851 change some of these later */
852 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
853 executable_stack);
854 if (retval < 0)
855 goto out_free_dentry;
857 current->mm->start_stack = bprm->p;
859 /* Now we do a little grungy work by mmapping the ELF image into
860 the correct location in memory. */
861 for(i = 0, elf_ppnt = elf_phdata;
862 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
863 int elf_prot = 0, elf_flags;
864 unsigned long k, vaddr;
865 unsigned long total_size = 0;
867 if (elf_ppnt->p_type != PT_LOAD)
868 continue;
870 if (unlikely (elf_brk > elf_bss)) {
871 unsigned long nbyte;
873 /* There was a PT_LOAD segment with p_memsz > p_filesz
874 before this one. Map anonymous pages, if needed,
875 and clear the area. */
876 retval = set_brk(elf_bss + load_bias,
877 elf_brk + load_bias);
878 if (retval)
879 goto out_free_dentry;
880 nbyte = ELF_PAGEOFFSET(elf_bss);
881 if (nbyte) {
882 nbyte = ELF_MIN_ALIGN - nbyte;
883 if (nbyte > elf_brk - elf_bss)
884 nbyte = elf_brk - elf_bss;
885 if (clear_user((void __user *)elf_bss +
886 load_bias, nbyte)) {
888 * This bss-zeroing can fail if the ELF
889 * file specifies odd protections. So
890 * we don't check the return value
896 if (elf_ppnt->p_flags & PF_R)
897 elf_prot |= PROT_READ;
898 if (elf_ppnt->p_flags & PF_W)
899 elf_prot |= PROT_WRITE;
900 if (elf_ppnt->p_flags & PF_X)
901 elf_prot |= PROT_EXEC;
903 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
905 vaddr = elf_ppnt->p_vaddr;
906 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
907 elf_flags |= MAP_FIXED;
908 } else if (loc->elf_ex.e_type == ET_DYN) {
909 /* Try and get dynamic programs out of the way of the
910 * default mmap base, as well as whatever program they
911 * might try to exec. This is because the brk will
912 * follow the loader, and is not movable. */
913 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
914 /* Memory randomization might have been switched off
915 * in runtime via sysctl or explicit setting of
916 * personality flags.
917 * If that is the case, retain the original non-zero
918 * load_bias value in order to establish proper
919 * non-randomized mappings.
921 if (current->flags & PF_RANDOMIZE)
922 load_bias = 0;
923 else
924 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
925 #else
926 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
927 #endif
928 total_size = total_mapping_size(elf_phdata,
929 loc->elf_ex.e_phnum);
930 if (!total_size) {
931 retval = -EINVAL;
932 goto out_free_dentry;
936 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
937 elf_prot, elf_flags, total_size);
938 if (BAD_ADDR(error)) {
939 retval = IS_ERR((void *)error) ?
940 PTR_ERR((void*)error) : -EINVAL;
941 goto out_free_dentry;
944 if (!load_addr_set) {
945 load_addr_set = 1;
946 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
947 if (loc->elf_ex.e_type == ET_DYN) {
948 load_bias += error -
949 ELF_PAGESTART(load_bias + vaddr);
950 load_addr += load_bias;
951 reloc_func_desc = load_bias;
954 k = elf_ppnt->p_vaddr;
955 if (k < start_code)
956 start_code = k;
957 if (start_data < k)
958 start_data = k;
961 * Check to see if the section's size will overflow the
962 * allowed task size. Note that p_filesz must always be
963 * <= p_memsz so it is only necessary to check p_memsz.
965 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
966 elf_ppnt->p_memsz > TASK_SIZE ||
967 TASK_SIZE - elf_ppnt->p_memsz < k) {
968 /* set_brk can never work. Avoid overflows. */
969 retval = -EINVAL;
970 goto out_free_dentry;
973 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
975 if (k > elf_bss)
976 elf_bss = k;
977 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
978 end_code = k;
979 if (end_data < k)
980 end_data = k;
981 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
982 if (k > elf_brk)
983 elf_brk = k;
986 loc->elf_ex.e_entry += load_bias;
987 elf_bss += load_bias;
988 elf_brk += load_bias;
989 start_code += load_bias;
990 end_code += load_bias;
991 start_data += load_bias;
992 end_data += load_bias;
994 /* Calling set_brk effectively mmaps the pages that we need
995 * for the bss and break sections. We must do this before
996 * mapping in the interpreter, to make sure it doesn't wind
997 * up getting placed where the bss needs to go.
999 retval = set_brk(elf_bss, elf_brk);
1000 if (retval)
1001 goto out_free_dentry;
1002 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1003 retval = -EFAULT; /* Nobody gets to see this, but.. */
1004 goto out_free_dentry;
1007 if (elf_interpreter) {
1008 unsigned long interp_map_addr = 0;
1010 elf_entry = load_elf_interp(&loc->interp_elf_ex,
1011 interpreter,
1012 &interp_map_addr,
1013 load_bias, interp_elf_phdata);
1014 if (!IS_ERR((void *)elf_entry)) {
1016 * load_elf_interp() returns relocation
1017 * adjustment
1019 interp_load_addr = elf_entry;
1020 elf_entry += loc->interp_elf_ex.e_entry;
1022 if (BAD_ADDR(elf_entry)) {
1023 retval = IS_ERR((void *)elf_entry) ?
1024 (int)elf_entry : -EINVAL;
1025 goto out_free_dentry;
1027 reloc_func_desc = interp_load_addr;
1029 allow_write_access(interpreter);
1030 fput(interpreter);
1031 kfree(elf_interpreter);
1032 } else {
1033 elf_entry = loc->elf_ex.e_entry;
1034 if (BAD_ADDR(elf_entry)) {
1035 retval = -EINVAL;
1036 goto out_free_dentry;
1040 kfree(interp_elf_phdata);
1041 kfree(elf_phdata);
1043 set_binfmt(&elf_format);
1045 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1046 retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1047 if (retval < 0)
1048 goto out;
1049 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1051 install_exec_creds(bprm);
1052 retval = create_elf_tables(bprm, &loc->elf_ex,
1053 load_addr, interp_load_addr);
1054 if (retval < 0)
1055 goto out;
1056 /* N.B. passed_fileno might not be initialized? */
1057 current->mm->end_code = end_code;
1058 current->mm->start_code = start_code;
1059 current->mm->start_data = start_data;
1060 current->mm->end_data = end_data;
1061 current->mm->start_stack = bprm->p;
1063 #ifdef arch_randomize_brk
1064 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1065 current->mm->brk = current->mm->start_brk =
1066 arch_randomize_brk(current->mm);
1067 #ifdef CONFIG_COMPAT_BRK
1068 current->brk_randomized = 1;
1069 #endif
1071 #endif
1073 if (current->personality & MMAP_PAGE_ZERO) {
1074 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1075 and some applications "depend" upon this behavior.
1076 Since we do not have the power to recompile these, we
1077 emulate the SVr4 behavior. Sigh. */
1078 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1079 MAP_FIXED | MAP_PRIVATE, 0);
1082 #ifdef ELF_PLAT_INIT
1084 * The ABI may specify that certain registers be set up in special
1085 * ways (on i386 %edx is the address of a DT_FINI function, for
1086 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1087 * that the e_entry field is the address of the function descriptor
1088 * for the startup routine, rather than the address of the startup
1089 * routine itself. This macro performs whatever initialization to
1090 * the regs structure is required as well as any relocations to the
1091 * function descriptor entries when executing dynamically links apps.
1093 ELF_PLAT_INIT(regs, reloc_func_desc);
1094 #endif
1096 start_thread(regs, elf_entry, bprm->p);
1097 retval = 0;
1098 out:
1099 kfree(loc);
1100 out_ret:
1101 return retval;
1103 /* error cleanup */
1104 out_free_dentry:
1105 kfree(interp_elf_phdata);
1106 allow_write_access(interpreter);
1107 if (interpreter)
1108 fput(interpreter);
1109 out_free_interp:
1110 kfree(elf_interpreter);
1111 out_free_ph:
1112 kfree(elf_phdata);
1113 goto out;
1116 #ifdef CONFIG_USELIB
1117 /* This is really simpleminded and specialized - we are loading an
1118 a.out library that is given an ELF header. */
1119 static int load_elf_library(struct file *file)
1121 struct elf_phdr *elf_phdata;
1122 struct elf_phdr *eppnt;
1123 unsigned long elf_bss, bss, len;
1124 int retval, error, i, j;
1125 struct elfhdr elf_ex;
1127 error = -ENOEXEC;
1128 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1129 if (retval != sizeof(elf_ex))
1130 goto out;
1132 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1133 goto out;
1135 /* First of all, some simple consistency checks */
1136 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1137 !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1138 goto out;
1140 /* Now read in all of the header information */
1142 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1143 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1145 error = -ENOMEM;
1146 elf_phdata = kmalloc(j, GFP_KERNEL);
1147 if (!elf_phdata)
1148 goto out;
1150 eppnt = elf_phdata;
1151 error = -ENOEXEC;
1152 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1153 if (retval != j)
1154 goto out_free_ph;
1156 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1157 if ((eppnt + i)->p_type == PT_LOAD)
1158 j++;
1159 if (j != 1)
1160 goto out_free_ph;
1162 while (eppnt->p_type != PT_LOAD)
1163 eppnt++;
1165 /* Now use mmap to map the library into memory. */
1166 error = vm_mmap(file,
1167 ELF_PAGESTART(eppnt->p_vaddr),
1168 (eppnt->p_filesz +
1169 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1170 PROT_READ | PROT_WRITE | PROT_EXEC,
1171 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1172 (eppnt->p_offset -
1173 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1174 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1175 goto out_free_ph;
1177 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1178 if (padzero(elf_bss)) {
1179 error = -EFAULT;
1180 goto out_free_ph;
1183 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1184 ELF_MIN_ALIGN - 1);
1185 bss = eppnt->p_memsz + eppnt->p_vaddr;
1186 if (bss > len)
1187 vm_brk(len, bss - len);
1188 error = 0;
1190 out_free_ph:
1191 kfree(elf_phdata);
1192 out:
1193 return error;
1195 #endif /* #ifdef CONFIG_USELIB */
1197 #ifdef CONFIG_ELF_CORE
1199 * ELF core dumper
1201 * Modelled on fs/exec.c:aout_core_dump()
1202 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1206 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1207 * that are useful for post-mortem analysis are included in every core dump.
1208 * In that way we ensure that the core dump is fully interpretable later
1209 * without matching up the same kernel and hardware config to see what PC values
1210 * meant. These special mappings include - vDSO, vsyscall, and other
1211 * architecture specific mappings
1213 static bool always_dump_vma(struct vm_area_struct *vma)
1215 /* Any vsyscall mappings? */
1216 if (vma == get_gate_vma(vma->vm_mm))
1217 return true;
1220 * Assume that all vmas with a .name op should always be dumped.
1221 * If this changes, a new vm_ops field can easily be added.
1223 if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1224 return true;
1227 * arch_vma_name() returns non-NULL for special architecture mappings,
1228 * such as vDSO sections.
1230 if (arch_vma_name(vma))
1231 return true;
1233 return false;
1237 * Decide what to dump of a segment, part, all or none.
1239 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1240 unsigned long mm_flags)
1242 #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1244 /* always dump the vdso and vsyscall sections */
1245 if (always_dump_vma(vma))
1246 goto whole;
1248 if (vma->vm_flags & VM_DONTDUMP)
1249 return 0;
1251 /* Hugetlb memory check */
1252 if (vma->vm_flags & VM_HUGETLB) {
1253 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1254 goto whole;
1255 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1256 goto whole;
1257 return 0;
1260 /* Do not dump I/O mapped devices or special mappings */
1261 if (vma->vm_flags & VM_IO)
1262 return 0;
1264 /* By default, dump shared memory if mapped from an anonymous file. */
1265 if (vma->vm_flags & VM_SHARED) {
1266 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1267 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1268 goto whole;
1269 return 0;
1272 /* Dump segments that have been written to. */
1273 if (vma->anon_vma && FILTER(ANON_PRIVATE))
1274 goto whole;
1275 if (vma->vm_file == NULL)
1276 return 0;
1278 if (FILTER(MAPPED_PRIVATE))
1279 goto whole;
1282 * If this looks like the beginning of a DSO or executable mapping,
1283 * check for an ELF header. If we find one, dump the first page to
1284 * aid in determining what was mapped here.
1286 if (FILTER(ELF_HEADERS) &&
1287 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1288 u32 __user *header = (u32 __user *) vma->vm_start;
1289 u32 word;
1290 mm_segment_t fs = get_fs();
1292 * Doing it this way gets the constant folded by GCC.
1294 union {
1295 u32 cmp;
1296 char elfmag[SELFMAG];
1297 } magic;
1298 BUILD_BUG_ON(SELFMAG != sizeof word);
1299 magic.elfmag[EI_MAG0] = ELFMAG0;
1300 magic.elfmag[EI_MAG1] = ELFMAG1;
1301 magic.elfmag[EI_MAG2] = ELFMAG2;
1302 magic.elfmag[EI_MAG3] = ELFMAG3;
1304 * Switch to the user "segment" for get_user(),
1305 * then put back what elf_core_dump() had in place.
1307 set_fs(USER_DS);
1308 if (unlikely(get_user(word, header)))
1309 word = 0;
1310 set_fs(fs);
1311 if (word == magic.cmp)
1312 return PAGE_SIZE;
1315 #undef FILTER
1317 return 0;
1319 whole:
1320 return vma->vm_end - vma->vm_start;
1323 /* An ELF note in memory */
1324 struct memelfnote
1326 const char *name;
1327 int type;
1328 unsigned int datasz;
1329 void *data;
1332 static int notesize(struct memelfnote *en)
1334 int sz;
1336 sz = sizeof(struct elf_note);
1337 sz += roundup(strlen(en->name) + 1, 4);
1338 sz += roundup(en->datasz, 4);
1340 return sz;
1343 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1345 struct elf_note en;
1346 en.n_namesz = strlen(men->name) + 1;
1347 en.n_descsz = men->datasz;
1348 en.n_type = men->type;
1350 return dump_emit(cprm, &en, sizeof(en)) &&
1351 dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1352 dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1355 static void fill_elf_header(struct elfhdr *elf, int segs,
1356 u16 machine, u32 flags)
1358 memset(elf, 0, sizeof(*elf));
1360 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1361 elf->e_ident[EI_CLASS] = ELF_CLASS;
1362 elf->e_ident[EI_DATA] = ELF_DATA;
1363 elf->e_ident[EI_VERSION] = EV_CURRENT;
1364 elf->e_ident[EI_OSABI] = ELF_OSABI;
1366 elf->e_type = ET_CORE;
1367 elf->e_machine = machine;
1368 elf->e_version = EV_CURRENT;
1369 elf->e_phoff = sizeof(struct elfhdr);
1370 elf->e_flags = flags;
1371 elf->e_ehsize = sizeof(struct elfhdr);
1372 elf->e_phentsize = sizeof(struct elf_phdr);
1373 elf->e_phnum = segs;
1375 return;
1378 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1380 phdr->p_type = PT_NOTE;
1381 phdr->p_offset = offset;
1382 phdr->p_vaddr = 0;
1383 phdr->p_paddr = 0;
1384 phdr->p_filesz = sz;
1385 phdr->p_memsz = 0;
1386 phdr->p_flags = 0;
1387 phdr->p_align = 0;
1388 return;
1391 static void fill_note(struct memelfnote *note, const char *name, int type,
1392 unsigned int sz, void *data)
1394 note->name = name;
1395 note->type = type;
1396 note->datasz = sz;
1397 note->data = data;
1398 return;
1402 * fill up all the fields in prstatus from the given task struct, except
1403 * registers which need to be filled up separately.
1405 static void fill_prstatus(struct elf_prstatus *prstatus,
1406 struct task_struct *p, long signr)
1408 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1409 prstatus->pr_sigpend = p->pending.signal.sig[0];
1410 prstatus->pr_sighold = p->blocked.sig[0];
1411 rcu_read_lock();
1412 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1413 rcu_read_unlock();
1414 prstatus->pr_pid = task_pid_vnr(p);
1415 prstatus->pr_pgrp = task_pgrp_vnr(p);
1416 prstatus->pr_sid = task_session_vnr(p);
1417 if (thread_group_leader(p)) {
1418 struct task_cputime cputime;
1421 * This is the record for the group leader. It shows the
1422 * group-wide total, not its individual thread total.
1424 thread_group_cputime(p, &cputime);
1425 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1426 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1427 } else {
1428 cputime_t utime, stime;
1430 task_cputime(p, &utime, &stime);
1431 cputime_to_timeval(utime, &prstatus->pr_utime);
1432 cputime_to_timeval(stime, &prstatus->pr_stime);
1434 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1435 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1438 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1439 struct mm_struct *mm)
1441 const struct cred *cred;
1442 unsigned int i, len;
1444 /* first copy the parameters from user space */
1445 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1447 len = mm->arg_end - mm->arg_start;
1448 if (len >= ELF_PRARGSZ)
1449 len = ELF_PRARGSZ-1;
1450 if (copy_from_user(&psinfo->pr_psargs,
1451 (const char __user *)mm->arg_start, len))
1452 return -EFAULT;
1453 for(i = 0; i < len; i++)
1454 if (psinfo->pr_psargs[i] == 0)
1455 psinfo->pr_psargs[i] = ' ';
1456 psinfo->pr_psargs[len] = 0;
1458 rcu_read_lock();
1459 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1460 rcu_read_unlock();
1461 psinfo->pr_pid = task_pid_vnr(p);
1462 psinfo->pr_pgrp = task_pgrp_vnr(p);
1463 psinfo->pr_sid = task_session_vnr(p);
1465 i = p->state ? ffz(~p->state) + 1 : 0;
1466 psinfo->pr_state = i;
1467 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1468 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1469 psinfo->pr_nice = task_nice(p);
1470 psinfo->pr_flag = p->flags;
1471 rcu_read_lock();
1472 cred = __task_cred(p);
1473 SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1474 SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1475 rcu_read_unlock();
1476 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1478 return 0;
1481 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1483 elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1484 int i = 0;
1486 i += 2;
1487 while (auxv[i - 2] != AT_NULL);
1488 fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1491 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1492 const siginfo_t *siginfo)
1494 mm_segment_t old_fs = get_fs();
1495 set_fs(KERNEL_DS);
1496 copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1497 set_fs(old_fs);
1498 fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1501 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1503 * Format of NT_FILE note:
1505 * long count -- how many files are mapped
1506 * long page_size -- units for file_ofs
1507 * array of [COUNT] elements of
1508 * long start
1509 * long end
1510 * long file_ofs
1511 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1513 static int fill_files_note(struct memelfnote *note)
1515 struct vm_area_struct *vma;
1516 unsigned count, size, names_ofs, remaining, n;
1517 user_long_t *data;
1518 user_long_t *start_end_ofs;
1519 char *name_base, *name_curpos;
1521 /* *Estimated* file count and total data size needed */
1522 count = current->mm->map_count;
1523 size = count * 64;
1525 names_ofs = (2 + 3 * count) * sizeof(data[0]);
1526 alloc:
1527 if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1528 return -EINVAL;
1529 size = round_up(size, PAGE_SIZE);
1530 data = vmalloc(size);
1531 if (!data)
1532 return -ENOMEM;
1534 start_end_ofs = data + 2;
1535 name_base = name_curpos = ((char *)data) + names_ofs;
1536 remaining = size - names_ofs;
1537 count = 0;
1538 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1539 struct file *file;
1540 const char *filename;
1542 file = vma->vm_file;
1543 if (!file)
1544 continue;
1545 filename = d_path(&file->f_path, name_curpos, remaining);
1546 if (IS_ERR(filename)) {
1547 if (PTR_ERR(filename) == -ENAMETOOLONG) {
1548 vfree(data);
1549 size = size * 5 / 4;
1550 goto alloc;
1552 continue;
1555 /* d_path() fills at the end, move name down */
1556 /* n = strlen(filename) + 1: */
1557 n = (name_curpos + remaining) - filename;
1558 remaining = filename - name_curpos;
1559 memmove(name_curpos, filename, n);
1560 name_curpos += n;
1562 *start_end_ofs++ = vma->vm_start;
1563 *start_end_ofs++ = vma->vm_end;
1564 *start_end_ofs++ = vma->vm_pgoff;
1565 count++;
1568 /* Now we know exact count of files, can store it */
1569 data[0] = count;
1570 data[1] = PAGE_SIZE;
1572 * Count usually is less than current->mm->map_count,
1573 * we need to move filenames down.
1575 n = current->mm->map_count - count;
1576 if (n != 0) {
1577 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1578 memmove(name_base - shift_bytes, name_base,
1579 name_curpos - name_base);
1580 name_curpos -= shift_bytes;
1583 size = name_curpos - (char *)data;
1584 fill_note(note, "CORE", NT_FILE, size, data);
1585 return 0;
1588 #ifdef CORE_DUMP_USE_REGSET
1589 #include <linux/regset.h>
1591 struct elf_thread_core_info {
1592 struct elf_thread_core_info *next;
1593 struct task_struct *task;
1594 struct elf_prstatus prstatus;
1595 struct memelfnote notes[0];
1598 struct elf_note_info {
1599 struct elf_thread_core_info *thread;
1600 struct memelfnote psinfo;
1601 struct memelfnote signote;
1602 struct memelfnote auxv;
1603 struct memelfnote files;
1604 user_siginfo_t csigdata;
1605 size_t size;
1606 int thread_notes;
1610 * When a regset has a writeback hook, we call it on each thread before
1611 * dumping user memory. On register window machines, this makes sure the
1612 * user memory backing the register data is up to date before we read it.
1614 static void do_thread_regset_writeback(struct task_struct *task,
1615 const struct user_regset *regset)
1617 if (regset->writeback)
1618 regset->writeback(task, regset, 1);
1621 #ifndef PR_REG_SIZE
1622 #define PR_REG_SIZE(S) sizeof(S)
1623 #endif
1625 #ifndef PRSTATUS_SIZE
1626 #define PRSTATUS_SIZE(S) sizeof(S)
1627 #endif
1629 #ifndef PR_REG_PTR
1630 #define PR_REG_PTR(S) (&((S)->pr_reg))
1631 #endif
1633 #ifndef SET_PR_FPVALID
1634 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1635 #endif
1637 static int fill_thread_core_info(struct elf_thread_core_info *t,
1638 const struct user_regset_view *view,
1639 long signr, size_t *total)
1641 unsigned int i;
1644 * NT_PRSTATUS is the one special case, because the regset data
1645 * goes into the pr_reg field inside the note contents, rather
1646 * than being the whole note contents. We fill the reset in here.
1647 * We assume that regset 0 is NT_PRSTATUS.
1649 fill_prstatus(&t->prstatus, t->task, signr);
1650 (void) view->regsets[0].get(t->task, &view->regsets[0],
1651 0, PR_REG_SIZE(t->prstatus.pr_reg),
1652 PR_REG_PTR(&t->prstatus), NULL);
1654 fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1655 PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1656 *total += notesize(&t->notes[0]);
1658 do_thread_regset_writeback(t->task, &view->regsets[0]);
1661 * Each other regset might generate a note too. For each regset
1662 * that has no core_note_type or is inactive, we leave t->notes[i]
1663 * all zero and we'll know to skip writing it later.
1665 for (i = 1; i < view->n; ++i) {
1666 const struct user_regset *regset = &view->regsets[i];
1667 do_thread_regset_writeback(t->task, regset);
1668 if (regset->core_note_type && regset->get &&
1669 (!regset->active || regset->active(t->task, regset))) {
1670 int ret;
1671 size_t size = regset->n * regset->size;
1672 void *data = kmalloc(size, GFP_KERNEL);
1673 if (unlikely(!data))
1674 return 0;
1675 ret = regset->get(t->task, regset,
1676 0, size, data, NULL);
1677 if (unlikely(ret))
1678 kfree(data);
1679 else {
1680 if (regset->core_note_type != NT_PRFPREG)
1681 fill_note(&t->notes[i], "LINUX",
1682 regset->core_note_type,
1683 size, data);
1684 else {
1685 SET_PR_FPVALID(&t->prstatus, 1);
1686 fill_note(&t->notes[i], "CORE",
1687 NT_PRFPREG, size, data);
1689 *total += notesize(&t->notes[i]);
1694 return 1;
1697 static int fill_note_info(struct elfhdr *elf, int phdrs,
1698 struct elf_note_info *info,
1699 const siginfo_t *siginfo, struct pt_regs *regs)
1701 struct task_struct *dump_task = current;
1702 const struct user_regset_view *view = task_user_regset_view(dump_task);
1703 struct elf_thread_core_info *t;
1704 struct elf_prpsinfo *psinfo;
1705 struct core_thread *ct;
1706 unsigned int i;
1708 info->size = 0;
1709 info->thread = NULL;
1711 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1712 if (psinfo == NULL) {
1713 info->psinfo.data = NULL; /* So we don't free this wrongly */
1714 return 0;
1717 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1720 * Figure out how many notes we're going to need for each thread.
1722 info->thread_notes = 0;
1723 for (i = 0; i < view->n; ++i)
1724 if (view->regsets[i].core_note_type != 0)
1725 ++info->thread_notes;
1728 * Sanity check. We rely on regset 0 being in NT_PRSTATUS,
1729 * since it is our one special case.
1731 if (unlikely(info->thread_notes == 0) ||
1732 unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1733 WARN_ON(1);
1734 return 0;
1738 * Initialize the ELF file header.
1740 fill_elf_header(elf, phdrs,
1741 view->e_machine, view->e_flags);
1744 * Allocate a structure for each thread.
1746 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1747 t = kzalloc(offsetof(struct elf_thread_core_info,
1748 notes[info->thread_notes]),
1749 GFP_KERNEL);
1750 if (unlikely(!t))
1751 return 0;
1753 t->task = ct->task;
1754 if (ct->task == dump_task || !info->thread) {
1755 t->next = info->thread;
1756 info->thread = t;
1757 } else {
1759 * Make sure to keep the original task at
1760 * the head of the list.
1762 t->next = info->thread->next;
1763 info->thread->next = t;
1768 * Now fill in each thread's information.
1770 for (t = info->thread; t != NULL; t = t->next)
1771 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1772 return 0;
1775 * Fill in the two process-wide notes.
1777 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1778 info->size += notesize(&info->psinfo);
1780 fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1781 info->size += notesize(&info->signote);
1783 fill_auxv_note(&info->auxv, current->mm);
1784 info->size += notesize(&info->auxv);
1786 if (fill_files_note(&info->files) == 0)
1787 info->size += notesize(&info->files);
1789 return 1;
1792 static size_t get_note_info_size(struct elf_note_info *info)
1794 return info->size;
1798 * Write all the notes for each thread. When writing the first thread, the
1799 * process-wide notes are interleaved after the first thread-specific note.
1801 static int write_note_info(struct elf_note_info *info,
1802 struct coredump_params *cprm)
1804 bool first = true;
1805 struct elf_thread_core_info *t = info->thread;
1807 do {
1808 int i;
1810 if (!writenote(&t->notes[0], cprm))
1811 return 0;
1813 if (first && !writenote(&info->psinfo, cprm))
1814 return 0;
1815 if (first && !writenote(&info->signote, cprm))
1816 return 0;
1817 if (first && !writenote(&info->auxv, cprm))
1818 return 0;
1819 if (first && info->files.data &&
1820 !writenote(&info->files, cprm))
1821 return 0;
1823 for (i = 1; i < info->thread_notes; ++i)
1824 if (t->notes[i].data &&
1825 !writenote(&t->notes[i], cprm))
1826 return 0;
1828 first = false;
1829 t = t->next;
1830 } while (t);
1832 return 1;
1835 static void free_note_info(struct elf_note_info *info)
1837 struct elf_thread_core_info *threads = info->thread;
1838 while (threads) {
1839 unsigned int i;
1840 struct elf_thread_core_info *t = threads;
1841 threads = t->next;
1842 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1843 for (i = 1; i < info->thread_notes; ++i)
1844 kfree(t->notes[i].data);
1845 kfree(t);
1847 kfree(info->psinfo.data);
1848 vfree(info->files.data);
1851 #else
1853 /* Here is the structure in which status of each thread is captured. */
1854 struct elf_thread_status
1856 struct list_head list;
1857 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1858 elf_fpregset_t fpu; /* NT_PRFPREG */
1859 struct task_struct *thread;
1860 #ifdef ELF_CORE_COPY_XFPREGS
1861 elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */
1862 #endif
1863 struct memelfnote notes[3];
1864 int num_notes;
1868 * In order to add the specific thread information for the elf file format,
1869 * we need to keep a linked list of every threads pr_status and then create
1870 * a single section for them in the final core file.
1872 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1874 int sz = 0;
1875 struct task_struct *p = t->thread;
1876 t->num_notes = 0;
1878 fill_prstatus(&t->prstatus, p, signr);
1879 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1881 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1882 &(t->prstatus));
1883 t->num_notes++;
1884 sz += notesize(&t->notes[0]);
1886 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1887 &t->fpu))) {
1888 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1889 &(t->fpu));
1890 t->num_notes++;
1891 sz += notesize(&t->notes[1]);
1894 #ifdef ELF_CORE_COPY_XFPREGS
1895 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1896 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1897 sizeof(t->xfpu), &t->xfpu);
1898 t->num_notes++;
1899 sz += notesize(&t->notes[2]);
1901 #endif
1902 return sz;
1905 struct elf_note_info {
1906 struct memelfnote *notes;
1907 struct memelfnote *notes_files;
1908 struct elf_prstatus *prstatus; /* NT_PRSTATUS */
1909 struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
1910 struct list_head thread_list;
1911 elf_fpregset_t *fpu;
1912 #ifdef ELF_CORE_COPY_XFPREGS
1913 elf_fpxregset_t *xfpu;
1914 #endif
1915 user_siginfo_t csigdata;
1916 int thread_status_size;
1917 int numnote;
1920 static int elf_note_info_init(struct elf_note_info *info)
1922 memset(info, 0, sizeof(*info));
1923 INIT_LIST_HEAD(&info->thread_list);
1925 /* Allocate space for ELF notes */
1926 info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1927 if (!info->notes)
1928 return 0;
1929 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1930 if (!info->psinfo)
1931 return 0;
1932 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1933 if (!info->prstatus)
1934 return 0;
1935 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1936 if (!info->fpu)
1937 return 0;
1938 #ifdef ELF_CORE_COPY_XFPREGS
1939 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1940 if (!info->xfpu)
1941 return 0;
1942 #endif
1943 return 1;
1946 static int fill_note_info(struct elfhdr *elf, int phdrs,
1947 struct elf_note_info *info,
1948 const siginfo_t *siginfo, struct pt_regs *regs)
1950 struct list_head *t;
1951 struct core_thread *ct;
1952 struct elf_thread_status *ets;
1954 if (!elf_note_info_init(info))
1955 return 0;
1957 for (ct = current->mm->core_state->dumper.next;
1958 ct; ct = ct->next) {
1959 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1960 if (!ets)
1961 return 0;
1963 ets->thread = ct->task;
1964 list_add(&ets->list, &info->thread_list);
1967 list_for_each(t, &info->thread_list) {
1968 int sz;
1970 ets = list_entry(t, struct elf_thread_status, list);
1971 sz = elf_dump_thread_status(siginfo->si_signo, ets);
1972 info->thread_status_size += sz;
1974 /* now collect the dump for the current */
1975 memset(info->prstatus, 0, sizeof(*info->prstatus));
1976 fill_prstatus(info->prstatus, current, siginfo->si_signo);
1977 elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1979 /* Set up header */
1980 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1983 * Set up the notes in similar form to SVR4 core dumps made
1984 * with info from their /proc.
1987 fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1988 sizeof(*info->prstatus), info->prstatus);
1989 fill_psinfo(info->psinfo, current->group_leader, current->mm);
1990 fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1991 sizeof(*info->psinfo), info->psinfo);
1993 fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1994 fill_auxv_note(info->notes + 3, current->mm);
1995 info->numnote = 4;
1997 if (fill_files_note(info->notes + info->numnote) == 0) {
1998 info->notes_files = info->notes + info->numnote;
1999 info->numnote++;
2002 /* Try to dump the FPU. */
2003 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2004 info->fpu);
2005 if (info->prstatus->pr_fpvalid)
2006 fill_note(info->notes + info->numnote++,
2007 "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2008 #ifdef ELF_CORE_COPY_XFPREGS
2009 if (elf_core_copy_task_xfpregs(current, info->xfpu))
2010 fill_note(info->notes + info->numnote++,
2011 "LINUX", ELF_CORE_XFPREG_TYPE,
2012 sizeof(*info->xfpu), info->xfpu);
2013 #endif
2015 return 1;
2018 static size_t get_note_info_size(struct elf_note_info *info)
2020 int sz = 0;
2021 int i;
2023 for (i = 0; i < info->numnote; i++)
2024 sz += notesize(info->notes + i);
2026 sz += info->thread_status_size;
2028 return sz;
2031 static int write_note_info(struct elf_note_info *info,
2032 struct coredump_params *cprm)
2034 int i;
2035 struct list_head *t;
2037 for (i = 0; i < info->numnote; i++)
2038 if (!writenote(info->notes + i, cprm))
2039 return 0;
2041 /* write out the thread status notes section */
2042 list_for_each(t, &info->thread_list) {
2043 struct elf_thread_status *tmp =
2044 list_entry(t, struct elf_thread_status, list);
2046 for (i = 0; i < tmp->num_notes; i++)
2047 if (!writenote(&tmp->notes[i], cprm))
2048 return 0;
2051 return 1;
2054 static void free_note_info(struct elf_note_info *info)
2056 while (!list_empty(&info->thread_list)) {
2057 struct list_head *tmp = info->thread_list.next;
2058 list_del(tmp);
2059 kfree(list_entry(tmp, struct elf_thread_status, list));
2062 /* Free data possibly allocated by fill_files_note(): */
2063 if (info->notes_files)
2064 vfree(info->notes_files->data);
2066 kfree(info->prstatus);
2067 kfree(info->psinfo);
2068 kfree(info->notes);
2069 kfree(info->fpu);
2070 #ifdef ELF_CORE_COPY_XFPREGS
2071 kfree(info->xfpu);
2072 #endif
2075 #endif
2077 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2078 struct vm_area_struct *gate_vma)
2080 struct vm_area_struct *ret = tsk->mm->mmap;
2082 if (ret)
2083 return ret;
2084 return gate_vma;
2087 * Helper function for iterating across a vma list. It ensures that the caller
2088 * will visit `gate_vma' prior to terminating the search.
2090 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2091 struct vm_area_struct *gate_vma)
2093 struct vm_area_struct *ret;
2095 ret = this_vma->vm_next;
2096 if (ret)
2097 return ret;
2098 if (this_vma == gate_vma)
2099 return NULL;
2100 return gate_vma;
2103 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2104 elf_addr_t e_shoff, int segs)
2106 elf->e_shoff = e_shoff;
2107 elf->e_shentsize = sizeof(*shdr4extnum);
2108 elf->e_shnum = 1;
2109 elf->e_shstrndx = SHN_UNDEF;
2111 memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2113 shdr4extnum->sh_type = SHT_NULL;
2114 shdr4extnum->sh_size = elf->e_shnum;
2115 shdr4extnum->sh_link = elf->e_shstrndx;
2116 shdr4extnum->sh_info = segs;
2120 * Actual dumper
2122 * This is a two-pass process; first we find the offsets of the bits,
2123 * and then they are actually written out. If we run out of core limit
2124 * we just truncate.
2126 static int elf_core_dump(struct coredump_params *cprm)
2128 int has_dumped = 0;
2129 mm_segment_t fs;
2130 int segs, i;
2131 size_t vma_data_size = 0;
2132 struct vm_area_struct *vma, *gate_vma;
2133 struct elfhdr *elf = NULL;
2134 loff_t offset = 0, dataoff;
2135 struct elf_note_info info = { };
2136 struct elf_phdr *phdr4note = NULL;
2137 struct elf_shdr *shdr4extnum = NULL;
2138 Elf_Half e_phnum;
2139 elf_addr_t e_shoff;
2140 elf_addr_t *vma_filesz = NULL;
2143 * We no longer stop all VM operations.
2145 * This is because those proceses that could possibly change map_count
2146 * or the mmap / vma pages are now blocked in do_exit on current
2147 * finishing this core dump.
2149 * Only ptrace can touch these memory addresses, but it doesn't change
2150 * the map_count or the pages allocated. So no possibility of crashing
2151 * exists while dumping the mm->vm_next areas to the core file.
2154 /* alloc memory for large data structures: too large to be on stack */
2155 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2156 if (!elf)
2157 goto out;
2159 * The number of segs are recored into ELF header as 16bit value.
2160 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2162 segs = current->mm->map_count;
2163 segs += elf_core_extra_phdrs();
2165 gate_vma = get_gate_vma(current->mm);
2166 if (gate_vma != NULL)
2167 segs++;
2169 /* for notes section */
2170 segs++;
2172 /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2173 * this, kernel supports extended numbering. Have a look at
2174 * include/linux/elf.h for further information. */
2175 e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2178 * Collect all the non-memory information about the process for the
2179 * notes. This also sets up the file header.
2181 if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2182 goto cleanup;
2184 has_dumped = 1;
2186 fs = get_fs();
2187 set_fs(KERNEL_DS);
2189 offset += sizeof(*elf); /* Elf header */
2190 offset += segs * sizeof(struct elf_phdr); /* Program headers */
2192 /* Write notes phdr entry */
2194 size_t sz = get_note_info_size(&info);
2196 sz += elf_coredump_extra_notes_size();
2198 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2199 if (!phdr4note)
2200 goto end_coredump;
2202 fill_elf_note_phdr(phdr4note, sz, offset);
2203 offset += sz;
2206 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2208 vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL);
2209 if (!vma_filesz)
2210 goto end_coredump;
2212 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2213 vma = next_vma(vma, gate_vma)) {
2214 unsigned long dump_size;
2216 dump_size = vma_dump_size(vma, cprm->mm_flags);
2217 vma_filesz[i++] = dump_size;
2218 vma_data_size += dump_size;
2221 offset += vma_data_size;
2222 offset += elf_core_extra_data_size();
2223 e_shoff = offset;
2225 if (e_phnum == PN_XNUM) {
2226 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2227 if (!shdr4extnum)
2228 goto end_coredump;
2229 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2232 offset = dataoff;
2234 if (!dump_emit(cprm, elf, sizeof(*elf)))
2235 goto end_coredump;
2237 if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2238 goto end_coredump;
2240 /* Write program headers for segments dump */
2241 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2242 vma = next_vma(vma, gate_vma)) {
2243 struct elf_phdr phdr;
2245 phdr.p_type = PT_LOAD;
2246 phdr.p_offset = offset;
2247 phdr.p_vaddr = vma->vm_start;
2248 phdr.p_paddr = 0;
2249 phdr.p_filesz = vma_filesz[i++];
2250 phdr.p_memsz = vma->vm_end - vma->vm_start;
2251 offset += phdr.p_filesz;
2252 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2253 if (vma->vm_flags & VM_WRITE)
2254 phdr.p_flags |= PF_W;
2255 if (vma->vm_flags & VM_EXEC)
2256 phdr.p_flags |= PF_X;
2257 phdr.p_align = ELF_EXEC_PAGESIZE;
2259 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2260 goto end_coredump;
2263 if (!elf_core_write_extra_phdrs(cprm, offset))
2264 goto end_coredump;
2266 /* write out the notes section */
2267 if (!write_note_info(&info, cprm))
2268 goto end_coredump;
2270 if (elf_coredump_extra_notes_write(cprm))
2271 goto end_coredump;
2273 /* Align to page */
2274 if (!dump_skip(cprm, dataoff - cprm->written))
2275 goto end_coredump;
2277 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2278 vma = next_vma(vma, gate_vma)) {
2279 unsigned long addr;
2280 unsigned long end;
2282 end = vma->vm_start + vma_filesz[i++];
2284 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2285 struct page *page;
2286 int stop;
2288 page = get_dump_page(addr);
2289 if (page) {
2290 void *kaddr = kmap(page);
2291 stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2292 kunmap(page);
2293 page_cache_release(page);
2294 } else
2295 stop = !dump_skip(cprm, PAGE_SIZE);
2296 if (stop)
2297 goto end_coredump;
2301 if (!elf_core_write_extra_data(cprm))
2302 goto end_coredump;
2304 if (e_phnum == PN_XNUM) {
2305 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2306 goto end_coredump;
2309 end_coredump:
2310 set_fs(fs);
2312 cleanup:
2313 free_note_info(&info);
2314 kfree(shdr4extnum);
2315 kfree(vma_filesz);
2316 kfree(phdr4note);
2317 kfree(elf);
2318 out:
2319 return has_dumped;
2322 #endif /* CONFIG_ELF_CORE */
2324 static int __init init_elf_binfmt(void)
2326 register_binfmt(&elf_format);
2327 return 0;
2330 static void __exit exit_elf_binfmt(void)
2332 /* Remove the COFF and ELF loaders. */
2333 unregister_binfmt(&elf_format);
2336 core_initcall(init_elf_binfmt);
2337 module_exit(exit_elf_binfmt);
2338 MODULE_LICENSE("GPL");