Linux 4.2.6
[linux/fpc-iii.git] / fs / binfmt_elf.c
blob6b659967898ebc534c1ce8d91ff1179f050e8f34
1 /*
2 * linux/fs/binfmt_elf.c
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7 * Tools".
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/elf-randomize.h>
35 #include <linux/utsname.h>
36 #include <linux/coredump.h>
37 #include <linux/sched.h>
38 #include <asm/uaccess.h>
39 #include <asm/param.h>
40 #include <asm/page.h>
42 #ifndef user_long_t
43 #define user_long_t long
44 #endif
45 #ifndef user_siginfo_t
46 #define user_siginfo_t siginfo_t
47 #endif
49 static int load_elf_binary(struct linux_binprm *bprm);
50 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
51 int, int, unsigned long);
53 #ifdef CONFIG_USELIB
54 static int load_elf_library(struct file *);
55 #else
56 #define load_elf_library NULL
57 #endif
60 * If we don't support core dumping, then supply a NULL so we
61 * don't even try.
63 #ifdef CONFIG_ELF_CORE
64 static int elf_core_dump(struct coredump_params *cprm);
65 #else
66 #define elf_core_dump NULL
67 #endif
69 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
70 #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
71 #else
72 #define ELF_MIN_ALIGN PAGE_SIZE
73 #endif
75 #ifndef ELF_CORE_EFLAGS
76 #define ELF_CORE_EFLAGS 0
77 #endif
79 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
80 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
81 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
83 static struct linux_binfmt elf_format = {
84 .module = THIS_MODULE,
85 .load_binary = load_elf_binary,
86 .load_shlib = load_elf_library,
87 .core_dump = elf_core_dump,
88 .min_coredump = ELF_EXEC_PAGESIZE,
91 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
93 static int set_brk(unsigned long start, unsigned long end)
95 start = ELF_PAGEALIGN(start);
96 end = ELF_PAGEALIGN(end);
97 if (end > start) {
98 unsigned long addr;
99 addr = vm_brk(start, end - start);
100 if (BAD_ADDR(addr))
101 return addr;
103 current->mm->start_brk = current->mm->brk = end;
104 return 0;
107 /* We need to explicitly zero any fractional pages
108 after the data section (i.e. bss). This would
109 contain the junk from the file that should not
110 be in memory
112 static int padzero(unsigned long elf_bss)
114 unsigned long nbyte;
116 nbyte = ELF_PAGEOFFSET(elf_bss);
117 if (nbyte) {
118 nbyte = ELF_MIN_ALIGN - nbyte;
119 if (clear_user((void __user *) elf_bss, nbyte))
120 return -EFAULT;
122 return 0;
125 /* Let's use some macros to make this stack manipulation a little clearer */
126 #ifdef CONFIG_STACK_GROWSUP
127 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
128 #define STACK_ROUND(sp, items) \
129 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
130 #define STACK_ALLOC(sp, len) ({ \
131 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
132 old_sp; })
133 #else
134 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
135 #define STACK_ROUND(sp, items) \
136 (((unsigned long) (sp - items)) &~ 15UL)
137 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
138 #endif
140 #ifndef ELF_BASE_PLATFORM
142 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
143 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
144 * will be copied to the user stack in the same manner as AT_PLATFORM.
146 #define ELF_BASE_PLATFORM NULL
147 #endif
149 static int
150 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
151 unsigned long load_addr, unsigned long interp_load_addr)
153 unsigned long p = bprm->p;
154 int argc = bprm->argc;
155 int envc = bprm->envc;
156 elf_addr_t __user *argv;
157 elf_addr_t __user *envp;
158 elf_addr_t __user *sp;
159 elf_addr_t __user *u_platform;
160 elf_addr_t __user *u_base_platform;
161 elf_addr_t __user *u_rand_bytes;
162 const char *k_platform = ELF_PLATFORM;
163 const char *k_base_platform = ELF_BASE_PLATFORM;
164 unsigned char k_rand_bytes[16];
165 int items;
166 elf_addr_t *elf_info;
167 int ei_index = 0;
168 const struct cred *cred = current_cred();
169 struct vm_area_struct *vma;
172 * In some cases (e.g. Hyper-Threading), we want to avoid L1
173 * evictions by the processes running on the same package. One
174 * thing we can do is to shuffle the initial stack for them.
177 p = arch_align_stack(p);
180 * If this architecture has a platform capability string, copy it
181 * to userspace. In some cases (Sparc), this info is impossible
182 * for userspace to get any other way, in others (i386) it is
183 * merely difficult.
185 u_platform = NULL;
186 if (k_platform) {
187 size_t len = strlen(k_platform) + 1;
189 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
190 if (__copy_to_user(u_platform, k_platform, len))
191 return -EFAULT;
195 * If this architecture has a "base" platform capability
196 * string, copy it to userspace.
198 u_base_platform = NULL;
199 if (k_base_platform) {
200 size_t len = strlen(k_base_platform) + 1;
202 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
203 if (__copy_to_user(u_base_platform, k_base_platform, len))
204 return -EFAULT;
208 * Generate 16 random bytes for userspace PRNG seeding.
210 get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
211 u_rand_bytes = (elf_addr_t __user *)
212 STACK_ALLOC(p, sizeof(k_rand_bytes));
213 if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
214 return -EFAULT;
216 /* Create the ELF interpreter info */
217 elf_info = (elf_addr_t *)current->mm->saved_auxv;
218 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
219 #define NEW_AUX_ENT(id, val) \
220 do { \
221 elf_info[ei_index++] = id; \
222 elf_info[ei_index++] = val; \
223 } while (0)
225 #ifdef ARCH_DLINFO
227 * ARCH_DLINFO must come first so PPC can do its special alignment of
228 * AUXV.
229 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
230 * ARCH_DLINFO changes
232 ARCH_DLINFO;
233 #endif
234 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
235 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
236 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
237 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
238 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
239 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
240 NEW_AUX_ENT(AT_BASE, interp_load_addr);
241 NEW_AUX_ENT(AT_FLAGS, 0);
242 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
243 NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
244 NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
245 NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
246 NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
247 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
248 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
249 #ifdef ELF_HWCAP2
250 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
251 #endif
252 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
253 if (k_platform) {
254 NEW_AUX_ENT(AT_PLATFORM,
255 (elf_addr_t)(unsigned long)u_platform);
257 if (k_base_platform) {
258 NEW_AUX_ENT(AT_BASE_PLATFORM,
259 (elf_addr_t)(unsigned long)u_base_platform);
261 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
262 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
264 #undef NEW_AUX_ENT
265 /* AT_NULL is zero; clear the rest too */
266 memset(&elf_info[ei_index], 0,
267 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
269 /* And advance past the AT_NULL entry. */
270 ei_index += 2;
272 sp = STACK_ADD(p, ei_index);
274 items = (argc + 1) + (envc + 1) + 1;
275 bprm->p = STACK_ROUND(sp, items);
277 /* Point sp at the lowest address on the stack */
278 #ifdef CONFIG_STACK_GROWSUP
279 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
280 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
281 #else
282 sp = (elf_addr_t __user *)bprm->p;
283 #endif
287 * Grow the stack manually; some architectures have a limit on how
288 * far ahead a user-space access may be in order to grow the stack.
290 vma = find_extend_vma(current->mm, bprm->p);
291 if (!vma)
292 return -EFAULT;
294 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
295 if (__put_user(argc, sp++))
296 return -EFAULT;
297 argv = sp;
298 envp = argv + argc + 1;
300 /* Populate argv and envp */
301 p = current->mm->arg_end = current->mm->arg_start;
302 while (argc-- > 0) {
303 size_t len;
304 if (__put_user((elf_addr_t)p, argv++))
305 return -EFAULT;
306 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
307 if (!len || len > MAX_ARG_STRLEN)
308 return -EINVAL;
309 p += len;
311 if (__put_user(0, argv))
312 return -EFAULT;
313 current->mm->arg_end = current->mm->env_start = p;
314 while (envc-- > 0) {
315 size_t len;
316 if (__put_user((elf_addr_t)p, envp++))
317 return -EFAULT;
318 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
319 if (!len || len > MAX_ARG_STRLEN)
320 return -EINVAL;
321 p += len;
323 if (__put_user(0, envp))
324 return -EFAULT;
325 current->mm->env_end = p;
327 /* Put the elf_info on the stack in the right place. */
328 sp = (elf_addr_t __user *)envp + 1;
329 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
330 return -EFAULT;
331 return 0;
334 #ifndef elf_map
336 static unsigned long elf_map(struct file *filep, unsigned long addr,
337 struct elf_phdr *eppnt, int prot, int type,
338 unsigned long total_size)
340 unsigned long map_addr;
341 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
342 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
343 addr = ELF_PAGESTART(addr);
344 size = ELF_PAGEALIGN(size);
346 /* mmap() will return -EINVAL if given a zero size, but a
347 * segment with zero filesize is perfectly valid */
348 if (!size)
349 return addr;
352 * total_size is the size of the ELF (interpreter) image.
353 * The _first_ mmap needs to know the full size, otherwise
354 * randomization might put this image into an overlapping
355 * position with the ELF binary image. (since size < total_size)
356 * So we first map the 'big' image - and unmap the remainder at
357 * the end. (which unmap is needed for ELF images with holes.)
359 if (total_size) {
360 total_size = ELF_PAGEALIGN(total_size);
361 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
362 if (!BAD_ADDR(map_addr))
363 vm_munmap(map_addr+size, total_size-size);
364 } else
365 map_addr = vm_mmap(filep, addr, size, prot, type, off);
367 return(map_addr);
370 #endif /* !elf_map */
372 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
374 int i, first_idx = -1, last_idx = -1;
376 for (i = 0; i < nr; i++) {
377 if (cmds[i].p_type == PT_LOAD) {
378 last_idx = i;
379 if (first_idx == -1)
380 first_idx = i;
383 if (first_idx == -1)
384 return 0;
386 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
387 ELF_PAGESTART(cmds[first_idx].p_vaddr);
391 * load_elf_phdrs() - load ELF program headers
392 * @elf_ex: ELF header of the binary whose program headers should be loaded
393 * @elf_file: the opened ELF binary file
395 * Loads ELF program headers from the binary file elf_file, which has the ELF
396 * header pointed to by elf_ex, into a newly allocated array. The caller is
397 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
399 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
400 struct file *elf_file)
402 struct elf_phdr *elf_phdata = NULL;
403 int retval, size, err = -1;
406 * If the size of this structure has changed, then punt, since
407 * we will be doing the wrong thing.
409 if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
410 goto out;
412 /* Sanity check the number of program headers... */
413 if (elf_ex->e_phnum < 1 ||
414 elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
415 goto out;
417 /* ...and their total size. */
418 size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
419 if (size > ELF_MIN_ALIGN)
420 goto out;
422 elf_phdata = kmalloc(size, GFP_KERNEL);
423 if (!elf_phdata)
424 goto out;
426 /* Read in the program headers */
427 retval = kernel_read(elf_file, elf_ex->e_phoff,
428 (char *)elf_phdata, size);
429 if (retval != size) {
430 err = (retval < 0) ? retval : -EIO;
431 goto out;
434 /* Success! */
435 err = 0;
436 out:
437 if (err) {
438 kfree(elf_phdata);
439 elf_phdata = NULL;
441 return elf_phdata;
444 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
447 * struct arch_elf_state - arch-specific ELF loading state
449 * This structure is used to preserve architecture specific data during
450 * the loading of an ELF file, throughout the checking of architecture
451 * specific ELF headers & through to the point where the ELF load is
452 * known to be proceeding (ie. SET_PERSONALITY).
454 * This implementation is a dummy for architectures which require no
455 * specific state.
457 struct arch_elf_state {
460 #define INIT_ARCH_ELF_STATE {}
463 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
464 * @ehdr: The main ELF header
465 * @phdr: The program header to check
466 * @elf: The open ELF file
467 * @is_interp: True if the phdr is from the interpreter of the ELF being
468 * loaded, else false.
469 * @state: Architecture-specific state preserved throughout the process
470 * of loading the ELF.
472 * Inspects the program header phdr to validate its correctness and/or
473 * suitability for the system. Called once per ELF program header in the
474 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
475 * interpreter.
477 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
478 * with that return code.
480 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
481 struct elf_phdr *phdr,
482 struct file *elf, bool is_interp,
483 struct arch_elf_state *state)
485 /* Dummy implementation, always proceed */
486 return 0;
490 * arch_check_elf() - check a PT_LOPROC..PT_HIPROC ELF program header
491 * @ehdr: The main ELF header
492 * @has_interp: True if the ELF has an interpreter, else false.
493 * @state: Architecture-specific state preserved throughout the process
494 * of loading the ELF.
496 * Provides a final opportunity for architecture code to reject the loading
497 * of the ELF & cause an exec syscall to return an error. This is called after
498 * all program headers to be checked by arch_elf_pt_proc have been.
500 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
501 * with that return code.
503 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
504 struct arch_elf_state *state)
506 /* Dummy implementation, always proceed */
507 return 0;
510 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
512 /* This is much more generalized than the library routine read function,
513 so we keep this separate. Technically the library read function
514 is only provided so that we can read a.out libraries that have
515 an ELF header */
517 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
518 struct file *interpreter, unsigned long *interp_map_addr,
519 unsigned long no_base, struct elf_phdr *interp_elf_phdata)
521 struct elf_phdr *eppnt;
522 unsigned long load_addr = 0;
523 int load_addr_set = 0;
524 unsigned long last_bss = 0, elf_bss = 0;
525 unsigned long error = ~0UL;
526 unsigned long total_size;
527 int i;
529 /* First of all, some simple consistency checks */
530 if (interp_elf_ex->e_type != ET_EXEC &&
531 interp_elf_ex->e_type != ET_DYN)
532 goto out;
533 if (!elf_check_arch(interp_elf_ex))
534 goto out;
535 if (!interpreter->f_op->mmap)
536 goto out;
538 total_size = total_mapping_size(interp_elf_phdata,
539 interp_elf_ex->e_phnum);
540 if (!total_size) {
541 error = -EINVAL;
542 goto out;
545 eppnt = interp_elf_phdata;
546 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
547 if (eppnt->p_type == PT_LOAD) {
548 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
549 int elf_prot = 0;
550 unsigned long vaddr = 0;
551 unsigned long k, map_addr;
553 if (eppnt->p_flags & PF_R)
554 elf_prot = PROT_READ;
555 if (eppnt->p_flags & PF_W)
556 elf_prot |= PROT_WRITE;
557 if (eppnt->p_flags & PF_X)
558 elf_prot |= PROT_EXEC;
559 vaddr = eppnt->p_vaddr;
560 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
561 elf_type |= MAP_FIXED;
562 else if (no_base && interp_elf_ex->e_type == ET_DYN)
563 load_addr = -vaddr;
565 map_addr = elf_map(interpreter, load_addr + vaddr,
566 eppnt, elf_prot, elf_type, total_size);
567 total_size = 0;
568 if (!*interp_map_addr)
569 *interp_map_addr = map_addr;
570 error = map_addr;
571 if (BAD_ADDR(map_addr))
572 goto out;
574 if (!load_addr_set &&
575 interp_elf_ex->e_type == ET_DYN) {
576 load_addr = map_addr - ELF_PAGESTART(vaddr);
577 load_addr_set = 1;
581 * Check to see if the section's size will overflow the
582 * allowed task size. Note that p_filesz must always be
583 * <= p_memsize so it's only necessary to check p_memsz.
585 k = load_addr + eppnt->p_vaddr;
586 if (BAD_ADDR(k) ||
587 eppnt->p_filesz > eppnt->p_memsz ||
588 eppnt->p_memsz > TASK_SIZE ||
589 TASK_SIZE - eppnt->p_memsz < k) {
590 error = -ENOMEM;
591 goto out;
595 * Find the end of the file mapping for this phdr, and
596 * keep track of the largest address we see for this.
598 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
599 if (k > elf_bss)
600 elf_bss = k;
603 * Do the same thing for the memory mapping - between
604 * elf_bss and last_bss is the bss section.
606 k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
607 if (k > last_bss)
608 last_bss = k;
612 if (last_bss > elf_bss) {
614 * Now fill out the bss section. First pad the last page up
615 * to the page boundary, and then perform a mmap to make sure
616 * that there are zero-mapped pages up to and including the
617 * last bss page.
619 if (padzero(elf_bss)) {
620 error = -EFAULT;
621 goto out;
624 /* What we have mapped so far */
625 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
627 /* Map the last of the bss segment */
628 error = vm_brk(elf_bss, last_bss - elf_bss);
629 if (BAD_ADDR(error))
630 goto out;
633 error = load_addr;
634 out:
635 return error;
639 * These are the functions used to load ELF style executables and shared
640 * libraries. There is no binary dependent code anywhere else.
643 #ifndef STACK_RND_MASK
644 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
645 #endif
647 static unsigned long randomize_stack_top(unsigned long stack_top)
649 unsigned long random_variable = 0;
651 if ((current->flags & PF_RANDOMIZE) &&
652 !(current->personality & ADDR_NO_RANDOMIZE)) {
653 random_variable = (unsigned long) get_random_int();
654 random_variable &= STACK_RND_MASK;
655 random_variable <<= PAGE_SHIFT;
657 #ifdef CONFIG_STACK_GROWSUP
658 return PAGE_ALIGN(stack_top) + random_variable;
659 #else
660 return PAGE_ALIGN(stack_top) - random_variable;
661 #endif
664 static int load_elf_binary(struct linux_binprm *bprm)
666 struct file *interpreter = NULL; /* to shut gcc up */
667 unsigned long load_addr = 0, load_bias = 0;
668 int load_addr_set = 0;
669 char * elf_interpreter = NULL;
670 unsigned long error;
671 struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
672 unsigned long elf_bss, elf_brk;
673 int retval, i;
674 unsigned long elf_entry;
675 unsigned long interp_load_addr = 0;
676 unsigned long start_code, end_code, start_data, end_data;
677 unsigned long reloc_func_desc __maybe_unused = 0;
678 int executable_stack = EXSTACK_DEFAULT;
679 struct pt_regs *regs = current_pt_regs();
680 struct {
681 struct elfhdr elf_ex;
682 struct elfhdr interp_elf_ex;
683 } *loc;
684 struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
686 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
687 if (!loc) {
688 retval = -ENOMEM;
689 goto out_ret;
692 /* Get the exec-header */
693 loc->elf_ex = *((struct elfhdr *)bprm->buf);
695 retval = -ENOEXEC;
696 /* First of all, some simple consistency checks */
697 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
698 goto out;
700 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
701 goto out;
702 if (!elf_check_arch(&loc->elf_ex))
703 goto out;
704 if (!bprm->file->f_op->mmap)
705 goto out;
707 elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
708 if (!elf_phdata)
709 goto out;
711 elf_ppnt = elf_phdata;
712 elf_bss = 0;
713 elf_brk = 0;
715 start_code = ~0UL;
716 end_code = 0;
717 start_data = 0;
718 end_data = 0;
720 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
721 if (elf_ppnt->p_type == PT_INTERP) {
722 /* This is the program interpreter used for
723 * shared libraries - for now assume that this
724 * is an a.out format binary
726 retval = -ENOEXEC;
727 if (elf_ppnt->p_filesz > PATH_MAX ||
728 elf_ppnt->p_filesz < 2)
729 goto out_free_ph;
731 retval = -ENOMEM;
732 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
733 GFP_KERNEL);
734 if (!elf_interpreter)
735 goto out_free_ph;
737 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
738 elf_interpreter,
739 elf_ppnt->p_filesz);
740 if (retval != elf_ppnt->p_filesz) {
741 if (retval >= 0)
742 retval = -EIO;
743 goto out_free_interp;
745 /* make sure path is NULL terminated */
746 retval = -ENOEXEC;
747 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
748 goto out_free_interp;
750 interpreter = open_exec(elf_interpreter);
751 retval = PTR_ERR(interpreter);
752 if (IS_ERR(interpreter))
753 goto out_free_interp;
756 * If the binary is not readable then enforce
757 * mm->dumpable = 0 regardless of the interpreter's
758 * permissions.
760 would_dump(bprm, interpreter);
762 retval = kernel_read(interpreter, 0, bprm->buf,
763 BINPRM_BUF_SIZE);
764 if (retval != BINPRM_BUF_SIZE) {
765 if (retval >= 0)
766 retval = -EIO;
767 goto out_free_dentry;
770 /* Get the exec headers */
771 loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
772 break;
774 elf_ppnt++;
777 elf_ppnt = elf_phdata;
778 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
779 switch (elf_ppnt->p_type) {
780 case PT_GNU_STACK:
781 if (elf_ppnt->p_flags & PF_X)
782 executable_stack = EXSTACK_ENABLE_X;
783 else
784 executable_stack = EXSTACK_DISABLE_X;
785 break;
787 case PT_LOPROC ... PT_HIPROC:
788 retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
789 bprm->file, false,
790 &arch_state);
791 if (retval)
792 goto out_free_dentry;
793 break;
796 /* Some simple consistency checks for the interpreter */
797 if (elf_interpreter) {
798 retval = -ELIBBAD;
799 /* Not an ELF interpreter */
800 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
801 goto out_free_dentry;
802 /* Verify the interpreter has a valid arch */
803 if (!elf_check_arch(&loc->interp_elf_ex))
804 goto out_free_dentry;
806 /* Load the interpreter program headers */
807 interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
808 interpreter);
809 if (!interp_elf_phdata)
810 goto out_free_dentry;
812 /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
813 elf_ppnt = interp_elf_phdata;
814 for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
815 switch (elf_ppnt->p_type) {
816 case PT_LOPROC ... PT_HIPROC:
817 retval = arch_elf_pt_proc(&loc->interp_elf_ex,
818 elf_ppnt, interpreter,
819 true, &arch_state);
820 if (retval)
821 goto out_free_dentry;
822 break;
827 * Allow arch code to reject the ELF at this point, whilst it's
828 * still possible to return an error to the code that invoked
829 * the exec syscall.
831 retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state);
832 if (retval)
833 goto out_free_dentry;
835 /* Flush all traces of the currently running executable */
836 retval = flush_old_exec(bprm);
837 if (retval)
838 goto out_free_dentry;
840 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
841 may depend on the personality. */
842 SET_PERSONALITY2(loc->elf_ex, &arch_state);
843 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
844 current->personality |= READ_IMPLIES_EXEC;
846 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
847 current->flags |= PF_RANDOMIZE;
849 setup_new_exec(bprm);
851 /* Do this so that we can load the interpreter, if need be. We will
852 change some of these later */
853 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
854 executable_stack);
855 if (retval < 0)
856 goto out_free_dentry;
858 current->mm->start_stack = bprm->p;
860 /* Now we do a little grungy work by mmapping the ELF image into
861 the correct location in memory. */
862 for(i = 0, elf_ppnt = elf_phdata;
863 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
864 int elf_prot = 0, elf_flags;
865 unsigned long k, vaddr;
866 unsigned long total_size = 0;
868 if (elf_ppnt->p_type != PT_LOAD)
869 continue;
871 if (unlikely (elf_brk > elf_bss)) {
872 unsigned long nbyte;
874 /* There was a PT_LOAD segment with p_memsz > p_filesz
875 before this one. Map anonymous pages, if needed,
876 and clear the area. */
877 retval = set_brk(elf_bss + load_bias,
878 elf_brk + load_bias);
879 if (retval)
880 goto out_free_dentry;
881 nbyte = ELF_PAGEOFFSET(elf_bss);
882 if (nbyte) {
883 nbyte = ELF_MIN_ALIGN - nbyte;
884 if (nbyte > elf_brk - elf_bss)
885 nbyte = elf_brk - elf_bss;
886 if (clear_user((void __user *)elf_bss +
887 load_bias, nbyte)) {
889 * This bss-zeroing can fail if the ELF
890 * file specifies odd protections. So
891 * we don't check the return value
897 if (elf_ppnt->p_flags & PF_R)
898 elf_prot |= PROT_READ;
899 if (elf_ppnt->p_flags & PF_W)
900 elf_prot |= PROT_WRITE;
901 if (elf_ppnt->p_flags & PF_X)
902 elf_prot |= PROT_EXEC;
904 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
906 vaddr = elf_ppnt->p_vaddr;
907 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
908 elf_flags |= MAP_FIXED;
909 } else if (loc->elf_ex.e_type == ET_DYN) {
910 /* Try and get dynamic programs out of the way of the
911 * default mmap base, as well as whatever program they
912 * might try to exec. This is because the brk will
913 * follow the loader, and is not movable. */
914 load_bias = ELF_ET_DYN_BASE - vaddr;
915 if (current->flags & PF_RANDOMIZE)
916 load_bias += arch_mmap_rnd();
917 load_bias = ELF_PAGESTART(load_bias);
918 total_size = total_mapping_size(elf_phdata,
919 loc->elf_ex.e_phnum);
920 if (!total_size) {
921 retval = -EINVAL;
922 goto out_free_dentry;
926 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
927 elf_prot, elf_flags, total_size);
928 if (BAD_ADDR(error)) {
929 retval = IS_ERR((void *)error) ?
930 PTR_ERR((void*)error) : -EINVAL;
931 goto out_free_dentry;
934 if (!load_addr_set) {
935 load_addr_set = 1;
936 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
937 if (loc->elf_ex.e_type == ET_DYN) {
938 load_bias += error -
939 ELF_PAGESTART(load_bias + vaddr);
940 load_addr += load_bias;
941 reloc_func_desc = load_bias;
944 k = elf_ppnt->p_vaddr;
945 if (k < start_code)
946 start_code = k;
947 if (start_data < k)
948 start_data = k;
951 * Check to see if the section's size will overflow the
952 * allowed task size. Note that p_filesz must always be
953 * <= p_memsz so it is only necessary to check p_memsz.
955 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
956 elf_ppnt->p_memsz > TASK_SIZE ||
957 TASK_SIZE - elf_ppnt->p_memsz < k) {
958 /* set_brk can never work. Avoid overflows. */
959 retval = -EINVAL;
960 goto out_free_dentry;
963 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
965 if (k > elf_bss)
966 elf_bss = k;
967 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
968 end_code = k;
969 if (end_data < k)
970 end_data = k;
971 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
972 if (k > elf_brk)
973 elf_brk = k;
976 loc->elf_ex.e_entry += load_bias;
977 elf_bss += load_bias;
978 elf_brk += load_bias;
979 start_code += load_bias;
980 end_code += load_bias;
981 start_data += load_bias;
982 end_data += load_bias;
984 /* Calling set_brk effectively mmaps the pages that we need
985 * for the bss and break sections. We must do this before
986 * mapping in the interpreter, to make sure it doesn't wind
987 * up getting placed where the bss needs to go.
989 retval = set_brk(elf_bss, elf_brk);
990 if (retval)
991 goto out_free_dentry;
992 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
993 retval = -EFAULT; /* Nobody gets to see this, but.. */
994 goto out_free_dentry;
997 if (elf_interpreter) {
998 unsigned long interp_map_addr = 0;
1000 elf_entry = load_elf_interp(&loc->interp_elf_ex,
1001 interpreter,
1002 &interp_map_addr,
1003 load_bias, interp_elf_phdata);
1004 if (!IS_ERR((void *)elf_entry)) {
1006 * load_elf_interp() returns relocation
1007 * adjustment
1009 interp_load_addr = elf_entry;
1010 elf_entry += loc->interp_elf_ex.e_entry;
1012 if (BAD_ADDR(elf_entry)) {
1013 retval = IS_ERR((void *)elf_entry) ?
1014 (int)elf_entry : -EINVAL;
1015 goto out_free_dentry;
1017 reloc_func_desc = interp_load_addr;
1019 allow_write_access(interpreter);
1020 fput(interpreter);
1021 kfree(elf_interpreter);
1022 } else {
1023 elf_entry = loc->elf_ex.e_entry;
1024 if (BAD_ADDR(elf_entry)) {
1025 retval = -EINVAL;
1026 goto out_free_dentry;
1030 kfree(interp_elf_phdata);
1031 kfree(elf_phdata);
1033 set_binfmt(&elf_format);
1035 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1036 retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1037 if (retval < 0)
1038 goto out;
1039 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1041 install_exec_creds(bprm);
1042 retval = create_elf_tables(bprm, &loc->elf_ex,
1043 load_addr, interp_load_addr);
1044 if (retval < 0)
1045 goto out;
1046 /* N.B. passed_fileno might not be initialized? */
1047 current->mm->end_code = end_code;
1048 current->mm->start_code = start_code;
1049 current->mm->start_data = start_data;
1050 current->mm->end_data = end_data;
1051 current->mm->start_stack = bprm->p;
1053 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1054 current->mm->brk = current->mm->start_brk =
1055 arch_randomize_brk(current->mm);
1056 #ifdef compat_brk_randomized
1057 current->brk_randomized = 1;
1058 #endif
1061 if (current->personality & MMAP_PAGE_ZERO) {
1062 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1063 and some applications "depend" upon this behavior.
1064 Since we do not have the power to recompile these, we
1065 emulate the SVr4 behavior. Sigh. */
1066 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1067 MAP_FIXED | MAP_PRIVATE, 0);
1070 #ifdef ELF_PLAT_INIT
1072 * The ABI may specify that certain registers be set up in special
1073 * ways (on i386 %edx is the address of a DT_FINI function, for
1074 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1075 * that the e_entry field is the address of the function descriptor
1076 * for the startup routine, rather than the address of the startup
1077 * routine itself. This macro performs whatever initialization to
1078 * the regs structure is required as well as any relocations to the
1079 * function descriptor entries when executing dynamically links apps.
1081 ELF_PLAT_INIT(regs, reloc_func_desc);
1082 #endif
1084 start_thread(regs, elf_entry, bprm->p);
1085 retval = 0;
1086 out:
1087 kfree(loc);
1088 out_ret:
1089 return retval;
1091 /* error cleanup */
1092 out_free_dentry:
1093 kfree(interp_elf_phdata);
1094 allow_write_access(interpreter);
1095 if (interpreter)
1096 fput(interpreter);
1097 out_free_interp:
1098 kfree(elf_interpreter);
1099 out_free_ph:
1100 kfree(elf_phdata);
1101 goto out;
1104 #ifdef CONFIG_USELIB
1105 /* This is really simpleminded and specialized - we are loading an
1106 a.out library that is given an ELF header. */
1107 static int load_elf_library(struct file *file)
1109 struct elf_phdr *elf_phdata;
1110 struct elf_phdr *eppnt;
1111 unsigned long elf_bss, bss, len;
1112 int retval, error, i, j;
1113 struct elfhdr elf_ex;
1115 error = -ENOEXEC;
1116 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1117 if (retval != sizeof(elf_ex))
1118 goto out;
1120 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1121 goto out;
1123 /* First of all, some simple consistency checks */
1124 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1125 !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1126 goto out;
1128 /* Now read in all of the header information */
1130 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1131 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1133 error = -ENOMEM;
1134 elf_phdata = kmalloc(j, GFP_KERNEL);
1135 if (!elf_phdata)
1136 goto out;
1138 eppnt = elf_phdata;
1139 error = -ENOEXEC;
1140 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1141 if (retval != j)
1142 goto out_free_ph;
1144 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1145 if ((eppnt + i)->p_type == PT_LOAD)
1146 j++;
1147 if (j != 1)
1148 goto out_free_ph;
1150 while (eppnt->p_type != PT_LOAD)
1151 eppnt++;
1153 /* Now use mmap to map the library into memory. */
1154 error = vm_mmap(file,
1155 ELF_PAGESTART(eppnt->p_vaddr),
1156 (eppnt->p_filesz +
1157 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1158 PROT_READ | PROT_WRITE | PROT_EXEC,
1159 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1160 (eppnt->p_offset -
1161 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1162 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1163 goto out_free_ph;
1165 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1166 if (padzero(elf_bss)) {
1167 error = -EFAULT;
1168 goto out_free_ph;
1171 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1172 ELF_MIN_ALIGN - 1);
1173 bss = eppnt->p_memsz + eppnt->p_vaddr;
1174 if (bss > len)
1175 vm_brk(len, bss - len);
1176 error = 0;
1178 out_free_ph:
1179 kfree(elf_phdata);
1180 out:
1181 return error;
1183 #endif /* #ifdef CONFIG_USELIB */
1185 #ifdef CONFIG_ELF_CORE
1187 * ELF core dumper
1189 * Modelled on fs/exec.c:aout_core_dump()
1190 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1194 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1195 * that are useful for post-mortem analysis are included in every core dump.
1196 * In that way we ensure that the core dump is fully interpretable later
1197 * without matching up the same kernel and hardware config to see what PC values
1198 * meant. These special mappings include - vDSO, vsyscall, and other
1199 * architecture specific mappings
1201 static bool always_dump_vma(struct vm_area_struct *vma)
1203 /* Any vsyscall mappings? */
1204 if (vma == get_gate_vma(vma->vm_mm))
1205 return true;
1208 * Assume that all vmas with a .name op should always be dumped.
1209 * If this changes, a new vm_ops field can easily be added.
1211 if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1212 return true;
1215 * arch_vma_name() returns non-NULL for special architecture mappings,
1216 * such as vDSO sections.
1218 if (arch_vma_name(vma))
1219 return true;
1221 return false;
1225 * Decide what to dump of a segment, part, all or none.
1227 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1228 unsigned long mm_flags)
1230 #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1232 /* always dump the vdso and vsyscall sections */
1233 if (always_dump_vma(vma))
1234 goto whole;
1236 if (vma->vm_flags & VM_DONTDUMP)
1237 return 0;
1239 /* Hugetlb memory check */
1240 if (vma->vm_flags & VM_HUGETLB) {
1241 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1242 goto whole;
1243 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1244 goto whole;
1245 return 0;
1248 /* Do not dump I/O mapped devices or special mappings */
1249 if (vma->vm_flags & VM_IO)
1250 return 0;
1252 /* By default, dump shared memory if mapped from an anonymous file. */
1253 if (vma->vm_flags & VM_SHARED) {
1254 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1255 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1256 goto whole;
1257 return 0;
1260 /* Dump segments that have been written to. */
1261 if (vma->anon_vma && FILTER(ANON_PRIVATE))
1262 goto whole;
1263 if (vma->vm_file == NULL)
1264 return 0;
1266 if (FILTER(MAPPED_PRIVATE))
1267 goto whole;
1270 * If this looks like the beginning of a DSO or executable mapping,
1271 * check for an ELF header. If we find one, dump the first page to
1272 * aid in determining what was mapped here.
1274 if (FILTER(ELF_HEADERS) &&
1275 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1276 u32 __user *header = (u32 __user *) vma->vm_start;
1277 u32 word;
1278 mm_segment_t fs = get_fs();
1280 * Doing it this way gets the constant folded by GCC.
1282 union {
1283 u32 cmp;
1284 char elfmag[SELFMAG];
1285 } magic;
1286 BUILD_BUG_ON(SELFMAG != sizeof word);
1287 magic.elfmag[EI_MAG0] = ELFMAG0;
1288 magic.elfmag[EI_MAG1] = ELFMAG1;
1289 magic.elfmag[EI_MAG2] = ELFMAG2;
1290 magic.elfmag[EI_MAG3] = ELFMAG3;
1292 * Switch to the user "segment" for get_user(),
1293 * then put back what elf_core_dump() had in place.
1295 set_fs(USER_DS);
1296 if (unlikely(get_user(word, header)))
1297 word = 0;
1298 set_fs(fs);
1299 if (word == magic.cmp)
1300 return PAGE_SIZE;
1303 #undef FILTER
1305 return 0;
1307 whole:
1308 return vma->vm_end - vma->vm_start;
1311 /* An ELF note in memory */
1312 struct memelfnote
1314 const char *name;
1315 int type;
1316 unsigned int datasz;
1317 void *data;
1320 static int notesize(struct memelfnote *en)
1322 int sz;
1324 sz = sizeof(struct elf_note);
1325 sz += roundup(strlen(en->name) + 1, 4);
1326 sz += roundup(en->datasz, 4);
1328 return sz;
1331 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1333 struct elf_note en;
1334 en.n_namesz = strlen(men->name) + 1;
1335 en.n_descsz = men->datasz;
1336 en.n_type = men->type;
1338 return dump_emit(cprm, &en, sizeof(en)) &&
1339 dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1340 dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1343 static void fill_elf_header(struct elfhdr *elf, int segs,
1344 u16 machine, u32 flags)
1346 memset(elf, 0, sizeof(*elf));
1348 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1349 elf->e_ident[EI_CLASS] = ELF_CLASS;
1350 elf->e_ident[EI_DATA] = ELF_DATA;
1351 elf->e_ident[EI_VERSION] = EV_CURRENT;
1352 elf->e_ident[EI_OSABI] = ELF_OSABI;
1354 elf->e_type = ET_CORE;
1355 elf->e_machine = machine;
1356 elf->e_version = EV_CURRENT;
1357 elf->e_phoff = sizeof(struct elfhdr);
1358 elf->e_flags = flags;
1359 elf->e_ehsize = sizeof(struct elfhdr);
1360 elf->e_phentsize = sizeof(struct elf_phdr);
1361 elf->e_phnum = segs;
1363 return;
1366 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1368 phdr->p_type = PT_NOTE;
1369 phdr->p_offset = offset;
1370 phdr->p_vaddr = 0;
1371 phdr->p_paddr = 0;
1372 phdr->p_filesz = sz;
1373 phdr->p_memsz = 0;
1374 phdr->p_flags = 0;
1375 phdr->p_align = 0;
1376 return;
1379 static void fill_note(struct memelfnote *note, const char *name, int type,
1380 unsigned int sz, void *data)
1382 note->name = name;
1383 note->type = type;
1384 note->datasz = sz;
1385 note->data = data;
1386 return;
1390 * fill up all the fields in prstatus from the given task struct, except
1391 * registers which need to be filled up separately.
1393 static void fill_prstatus(struct elf_prstatus *prstatus,
1394 struct task_struct *p, long signr)
1396 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1397 prstatus->pr_sigpend = p->pending.signal.sig[0];
1398 prstatus->pr_sighold = p->blocked.sig[0];
1399 rcu_read_lock();
1400 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1401 rcu_read_unlock();
1402 prstatus->pr_pid = task_pid_vnr(p);
1403 prstatus->pr_pgrp = task_pgrp_vnr(p);
1404 prstatus->pr_sid = task_session_vnr(p);
1405 if (thread_group_leader(p)) {
1406 struct task_cputime cputime;
1409 * This is the record for the group leader. It shows the
1410 * group-wide total, not its individual thread total.
1412 thread_group_cputime(p, &cputime);
1413 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1414 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1415 } else {
1416 cputime_t utime, stime;
1418 task_cputime(p, &utime, &stime);
1419 cputime_to_timeval(utime, &prstatus->pr_utime);
1420 cputime_to_timeval(stime, &prstatus->pr_stime);
1422 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1423 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1426 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1427 struct mm_struct *mm)
1429 const struct cred *cred;
1430 unsigned int i, len;
1432 /* first copy the parameters from user space */
1433 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1435 len = mm->arg_end - mm->arg_start;
1436 if (len >= ELF_PRARGSZ)
1437 len = ELF_PRARGSZ-1;
1438 if (copy_from_user(&psinfo->pr_psargs,
1439 (const char __user *)mm->arg_start, len))
1440 return -EFAULT;
1441 for(i = 0; i < len; i++)
1442 if (psinfo->pr_psargs[i] == 0)
1443 psinfo->pr_psargs[i] = ' ';
1444 psinfo->pr_psargs[len] = 0;
1446 rcu_read_lock();
1447 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1448 rcu_read_unlock();
1449 psinfo->pr_pid = task_pid_vnr(p);
1450 psinfo->pr_pgrp = task_pgrp_vnr(p);
1451 psinfo->pr_sid = task_session_vnr(p);
1453 i = p->state ? ffz(~p->state) + 1 : 0;
1454 psinfo->pr_state = i;
1455 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1456 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1457 psinfo->pr_nice = task_nice(p);
1458 psinfo->pr_flag = p->flags;
1459 rcu_read_lock();
1460 cred = __task_cred(p);
1461 SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1462 SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1463 rcu_read_unlock();
1464 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1466 return 0;
1469 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1471 elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1472 int i = 0;
1474 i += 2;
1475 while (auxv[i - 2] != AT_NULL);
1476 fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1479 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1480 const siginfo_t *siginfo)
1482 mm_segment_t old_fs = get_fs();
1483 set_fs(KERNEL_DS);
1484 copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1485 set_fs(old_fs);
1486 fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1489 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1491 * Format of NT_FILE note:
1493 * long count -- how many files are mapped
1494 * long page_size -- units for file_ofs
1495 * array of [COUNT] elements of
1496 * long start
1497 * long end
1498 * long file_ofs
1499 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1501 static int fill_files_note(struct memelfnote *note)
1503 struct vm_area_struct *vma;
1504 unsigned count, size, names_ofs, remaining, n;
1505 user_long_t *data;
1506 user_long_t *start_end_ofs;
1507 char *name_base, *name_curpos;
1509 /* *Estimated* file count and total data size needed */
1510 count = current->mm->map_count;
1511 size = count * 64;
1513 names_ofs = (2 + 3 * count) * sizeof(data[0]);
1514 alloc:
1515 if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1516 return -EINVAL;
1517 size = round_up(size, PAGE_SIZE);
1518 data = vmalloc(size);
1519 if (!data)
1520 return -ENOMEM;
1522 start_end_ofs = data + 2;
1523 name_base = name_curpos = ((char *)data) + names_ofs;
1524 remaining = size - names_ofs;
1525 count = 0;
1526 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1527 struct file *file;
1528 const char *filename;
1530 file = vma->vm_file;
1531 if (!file)
1532 continue;
1533 filename = file_path(file, name_curpos, remaining);
1534 if (IS_ERR(filename)) {
1535 if (PTR_ERR(filename) == -ENAMETOOLONG) {
1536 vfree(data);
1537 size = size * 5 / 4;
1538 goto alloc;
1540 continue;
1543 /* file_path() fills at the end, move name down */
1544 /* n = strlen(filename) + 1: */
1545 n = (name_curpos + remaining) - filename;
1546 remaining = filename - name_curpos;
1547 memmove(name_curpos, filename, n);
1548 name_curpos += n;
1550 *start_end_ofs++ = vma->vm_start;
1551 *start_end_ofs++ = vma->vm_end;
1552 *start_end_ofs++ = vma->vm_pgoff;
1553 count++;
1556 /* Now we know exact count of files, can store it */
1557 data[0] = count;
1558 data[1] = PAGE_SIZE;
1560 * Count usually is less than current->mm->map_count,
1561 * we need to move filenames down.
1563 n = current->mm->map_count - count;
1564 if (n != 0) {
1565 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1566 memmove(name_base - shift_bytes, name_base,
1567 name_curpos - name_base);
1568 name_curpos -= shift_bytes;
1571 size = name_curpos - (char *)data;
1572 fill_note(note, "CORE", NT_FILE, size, data);
1573 return 0;
1576 #ifdef CORE_DUMP_USE_REGSET
1577 #include <linux/regset.h>
1579 struct elf_thread_core_info {
1580 struct elf_thread_core_info *next;
1581 struct task_struct *task;
1582 struct elf_prstatus prstatus;
1583 struct memelfnote notes[0];
1586 struct elf_note_info {
1587 struct elf_thread_core_info *thread;
1588 struct memelfnote psinfo;
1589 struct memelfnote signote;
1590 struct memelfnote auxv;
1591 struct memelfnote files;
1592 user_siginfo_t csigdata;
1593 size_t size;
1594 int thread_notes;
1598 * When a regset has a writeback hook, we call it on each thread before
1599 * dumping user memory. On register window machines, this makes sure the
1600 * user memory backing the register data is up to date before we read it.
1602 static void do_thread_regset_writeback(struct task_struct *task,
1603 const struct user_regset *regset)
1605 if (regset->writeback)
1606 regset->writeback(task, regset, 1);
1609 #ifndef PR_REG_SIZE
1610 #define PR_REG_SIZE(S) sizeof(S)
1611 #endif
1613 #ifndef PRSTATUS_SIZE
1614 #define PRSTATUS_SIZE(S) sizeof(S)
1615 #endif
1617 #ifndef PR_REG_PTR
1618 #define PR_REG_PTR(S) (&((S)->pr_reg))
1619 #endif
1621 #ifndef SET_PR_FPVALID
1622 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1623 #endif
1625 static int fill_thread_core_info(struct elf_thread_core_info *t,
1626 const struct user_regset_view *view,
1627 long signr, size_t *total)
1629 unsigned int i;
1632 * NT_PRSTATUS is the one special case, because the regset data
1633 * goes into the pr_reg field inside the note contents, rather
1634 * than being the whole note contents. We fill the reset in here.
1635 * We assume that regset 0 is NT_PRSTATUS.
1637 fill_prstatus(&t->prstatus, t->task, signr);
1638 (void) view->regsets[0].get(t->task, &view->regsets[0],
1639 0, PR_REG_SIZE(t->prstatus.pr_reg),
1640 PR_REG_PTR(&t->prstatus), NULL);
1642 fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1643 PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1644 *total += notesize(&t->notes[0]);
1646 do_thread_regset_writeback(t->task, &view->regsets[0]);
1649 * Each other regset might generate a note too. For each regset
1650 * that has no core_note_type or is inactive, we leave t->notes[i]
1651 * all zero and we'll know to skip writing it later.
1653 for (i = 1; i < view->n; ++i) {
1654 const struct user_regset *regset = &view->regsets[i];
1655 do_thread_regset_writeback(t->task, regset);
1656 if (regset->core_note_type && regset->get &&
1657 (!regset->active || regset->active(t->task, regset))) {
1658 int ret;
1659 size_t size = regset->n * regset->size;
1660 void *data = kmalloc(size, GFP_KERNEL);
1661 if (unlikely(!data))
1662 return 0;
1663 ret = regset->get(t->task, regset,
1664 0, size, data, NULL);
1665 if (unlikely(ret))
1666 kfree(data);
1667 else {
1668 if (regset->core_note_type != NT_PRFPREG)
1669 fill_note(&t->notes[i], "LINUX",
1670 regset->core_note_type,
1671 size, data);
1672 else {
1673 SET_PR_FPVALID(&t->prstatus, 1);
1674 fill_note(&t->notes[i], "CORE",
1675 NT_PRFPREG, size, data);
1677 *total += notesize(&t->notes[i]);
1682 return 1;
1685 static int fill_note_info(struct elfhdr *elf, int phdrs,
1686 struct elf_note_info *info,
1687 const siginfo_t *siginfo, struct pt_regs *regs)
1689 struct task_struct *dump_task = current;
1690 const struct user_regset_view *view = task_user_regset_view(dump_task);
1691 struct elf_thread_core_info *t;
1692 struct elf_prpsinfo *psinfo;
1693 struct core_thread *ct;
1694 unsigned int i;
1696 info->size = 0;
1697 info->thread = NULL;
1699 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1700 if (psinfo == NULL) {
1701 info->psinfo.data = NULL; /* So we don't free this wrongly */
1702 return 0;
1705 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1708 * Figure out how many notes we're going to need for each thread.
1710 info->thread_notes = 0;
1711 for (i = 0; i < view->n; ++i)
1712 if (view->regsets[i].core_note_type != 0)
1713 ++info->thread_notes;
1716 * Sanity check. We rely on regset 0 being in NT_PRSTATUS,
1717 * since it is our one special case.
1719 if (unlikely(info->thread_notes == 0) ||
1720 unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1721 WARN_ON(1);
1722 return 0;
1726 * Initialize the ELF file header.
1728 fill_elf_header(elf, phdrs,
1729 view->e_machine, view->e_flags);
1732 * Allocate a structure for each thread.
1734 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1735 t = kzalloc(offsetof(struct elf_thread_core_info,
1736 notes[info->thread_notes]),
1737 GFP_KERNEL);
1738 if (unlikely(!t))
1739 return 0;
1741 t->task = ct->task;
1742 if (ct->task == dump_task || !info->thread) {
1743 t->next = info->thread;
1744 info->thread = t;
1745 } else {
1747 * Make sure to keep the original task at
1748 * the head of the list.
1750 t->next = info->thread->next;
1751 info->thread->next = t;
1756 * Now fill in each thread's information.
1758 for (t = info->thread; t != NULL; t = t->next)
1759 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1760 return 0;
1763 * Fill in the two process-wide notes.
1765 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1766 info->size += notesize(&info->psinfo);
1768 fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1769 info->size += notesize(&info->signote);
1771 fill_auxv_note(&info->auxv, current->mm);
1772 info->size += notesize(&info->auxv);
1774 if (fill_files_note(&info->files) == 0)
1775 info->size += notesize(&info->files);
1777 return 1;
1780 static size_t get_note_info_size(struct elf_note_info *info)
1782 return info->size;
1786 * Write all the notes for each thread. When writing the first thread, the
1787 * process-wide notes are interleaved after the first thread-specific note.
1789 static int write_note_info(struct elf_note_info *info,
1790 struct coredump_params *cprm)
1792 bool first = true;
1793 struct elf_thread_core_info *t = info->thread;
1795 do {
1796 int i;
1798 if (!writenote(&t->notes[0], cprm))
1799 return 0;
1801 if (first && !writenote(&info->psinfo, cprm))
1802 return 0;
1803 if (first && !writenote(&info->signote, cprm))
1804 return 0;
1805 if (first && !writenote(&info->auxv, cprm))
1806 return 0;
1807 if (first && info->files.data &&
1808 !writenote(&info->files, cprm))
1809 return 0;
1811 for (i = 1; i < info->thread_notes; ++i)
1812 if (t->notes[i].data &&
1813 !writenote(&t->notes[i], cprm))
1814 return 0;
1816 first = false;
1817 t = t->next;
1818 } while (t);
1820 return 1;
1823 static void free_note_info(struct elf_note_info *info)
1825 struct elf_thread_core_info *threads = info->thread;
1826 while (threads) {
1827 unsigned int i;
1828 struct elf_thread_core_info *t = threads;
1829 threads = t->next;
1830 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1831 for (i = 1; i < info->thread_notes; ++i)
1832 kfree(t->notes[i].data);
1833 kfree(t);
1835 kfree(info->psinfo.data);
1836 vfree(info->files.data);
1839 #else
1841 /* Here is the structure in which status of each thread is captured. */
1842 struct elf_thread_status
1844 struct list_head list;
1845 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1846 elf_fpregset_t fpu; /* NT_PRFPREG */
1847 struct task_struct *thread;
1848 #ifdef ELF_CORE_COPY_XFPREGS
1849 elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */
1850 #endif
1851 struct memelfnote notes[3];
1852 int num_notes;
1856 * In order to add the specific thread information for the elf file format,
1857 * we need to keep a linked list of every threads pr_status and then create
1858 * a single section for them in the final core file.
1860 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1862 int sz = 0;
1863 struct task_struct *p = t->thread;
1864 t->num_notes = 0;
1866 fill_prstatus(&t->prstatus, p, signr);
1867 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1869 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1870 &(t->prstatus));
1871 t->num_notes++;
1872 sz += notesize(&t->notes[0]);
1874 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1875 &t->fpu))) {
1876 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1877 &(t->fpu));
1878 t->num_notes++;
1879 sz += notesize(&t->notes[1]);
1882 #ifdef ELF_CORE_COPY_XFPREGS
1883 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1884 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1885 sizeof(t->xfpu), &t->xfpu);
1886 t->num_notes++;
1887 sz += notesize(&t->notes[2]);
1889 #endif
1890 return sz;
1893 struct elf_note_info {
1894 struct memelfnote *notes;
1895 struct memelfnote *notes_files;
1896 struct elf_prstatus *prstatus; /* NT_PRSTATUS */
1897 struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
1898 struct list_head thread_list;
1899 elf_fpregset_t *fpu;
1900 #ifdef ELF_CORE_COPY_XFPREGS
1901 elf_fpxregset_t *xfpu;
1902 #endif
1903 user_siginfo_t csigdata;
1904 int thread_status_size;
1905 int numnote;
1908 static int elf_note_info_init(struct elf_note_info *info)
1910 memset(info, 0, sizeof(*info));
1911 INIT_LIST_HEAD(&info->thread_list);
1913 /* Allocate space for ELF notes */
1914 info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1915 if (!info->notes)
1916 return 0;
1917 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1918 if (!info->psinfo)
1919 return 0;
1920 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1921 if (!info->prstatus)
1922 return 0;
1923 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1924 if (!info->fpu)
1925 return 0;
1926 #ifdef ELF_CORE_COPY_XFPREGS
1927 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1928 if (!info->xfpu)
1929 return 0;
1930 #endif
1931 return 1;
1934 static int fill_note_info(struct elfhdr *elf, int phdrs,
1935 struct elf_note_info *info,
1936 const siginfo_t *siginfo, struct pt_regs *regs)
1938 struct list_head *t;
1939 struct core_thread *ct;
1940 struct elf_thread_status *ets;
1942 if (!elf_note_info_init(info))
1943 return 0;
1945 for (ct = current->mm->core_state->dumper.next;
1946 ct; ct = ct->next) {
1947 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1948 if (!ets)
1949 return 0;
1951 ets->thread = ct->task;
1952 list_add(&ets->list, &info->thread_list);
1955 list_for_each(t, &info->thread_list) {
1956 int sz;
1958 ets = list_entry(t, struct elf_thread_status, list);
1959 sz = elf_dump_thread_status(siginfo->si_signo, ets);
1960 info->thread_status_size += sz;
1962 /* now collect the dump for the current */
1963 memset(info->prstatus, 0, sizeof(*info->prstatus));
1964 fill_prstatus(info->prstatus, current, siginfo->si_signo);
1965 elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1967 /* Set up header */
1968 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1971 * Set up the notes in similar form to SVR4 core dumps made
1972 * with info from their /proc.
1975 fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1976 sizeof(*info->prstatus), info->prstatus);
1977 fill_psinfo(info->psinfo, current->group_leader, current->mm);
1978 fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1979 sizeof(*info->psinfo), info->psinfo);
1981 fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1982 fill_auxv_note(info->notes + 3, current->mm);
1983 info->numnote = 4;
1985 if (fill_files_note(info->notes + info->numnote) == 0) {
1986 info->notes_files = info->notes + info->numnote;
1987 info->numnote++;
1990 /* Try to dump the FPU. */
1991 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1992 info->fpu);
1993 if (info->prstatus->pr_fpvalid)
1994 fill_note(info->notes + info->numnote++,
1995 "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1996 #ifdef ELF_CORE_COPY_XFPREGS
1997 if (elf_core_copy_task_xfpregs(current, info->xfpu))
1998 fill_note(info->notes + info->numnote++,
1999 "LINUX", ELF_CORE_XFPREG_TYPE,
2000 sizeof(*info->xfpu), info->xfpu);
2001 #endif
2003 return 1;
2006 static size_t get_note_info_size(struct elf_note_info *info)
2008 int sz = 0;
2009 int i;
2011 for (i = 0; i < info->numnote; i++)
2012 sz += notesize(info->notes + i);
2014 sz += info->thread_status_size;
2016 return sz;
2019 static int write_note_info(struct elf_note_info *info,
2020 struct coredump_params *cprm)
2022 int i;
2023 struct list_head *t;
2025 for (i = 0; i < info->numnote; i++)
2026 if (!writenote(info->notes + i, cprm))
2027 return 0;
2029 /* write out the thread status notes section */
2030 list_for_each(t, &info->thread_list) {
2031 struct elf_thread_status *tmp =
2032 list_entry(t, struct elf_thread_status, list);
2034 for (i = 0; i < tmp->num_notes; i++)
2035 if (!writenote(&tmp->notes[i], cprm))
2036 return 0;
2039 return 1;
2042 static void free_note_info(struct elf_note_info *info)
2044 while (!list_empty(&info->thread_list)) {
2045 struct list_head *tmp = info->thread_list.next;
2046 list_del(tmp);
2047 kfree(list_entry(tmp, struct elf_thread_status, list));
2050 /* Free data possibly allocated by fill_files_note(): */
2051 if (info->notes_files)
2052 vfree(info->notes_files->data);
2054 kfree(info->prstatus);
2055 kfree(info->psinfo);
2056 kfree(info->notes);
2057 kfree(info->fpu);
2058 #ifdef ELF_CORE_COPY_XFPREGS
2059 kfree(info->xfpu);
2060 #endif
2063 #endif
2065 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2066 struct vm_area_struct *gate_vma)
2068 struct vm_area_struct *ret = tsk->mm->mmap;
2070 if (ret)
2071 return ret;
2072 return gate_vma;
2075 * Helper function for iterating across a vma list. It ensures that the caller
2076 * will visit `gate_vma' prior to terminating the search.
2078 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2079 struct vm_area_struct *gate_vma)
2081 struct vm_area_struct *ret;
2083 ret = this_vma->vm_next;
2084 if (ret)
2085 return ret;
2086 if (this_vma == gate_vma)
2087 return NULL;
2088 return gate_vma;
2091 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2092 elf_addr_t e_shoff, int segs)
2094 elf->e_shoff = e_shoff;
2095 elf->e_shentsize = sizeof(*shdr4extnum);
2096 elf->e_shnum = 1;
2097 elf->e_shstrndx = SHN_UNDEF;
2099 memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2101 shdr4extnum->sh_type = SHT_NULL;
2102 shdr4extnum->sh_size = elf->e_shnum;
2103 shdr4extnum->sh_link = elf->e_shstrndx;
2104 shdr4extnum->sh_info = segs;
2108 * Actual dumper
2110 * This is a two-pass process; first we find the offsets of the bits,
2111 * and then they are actually written out. If we run out of core limit
2112 * we just truncate.
2114 static int elf_core_dump(struct coredump_params *cprm)
2116 int has_dumped = 0;
2117 mm_segment_t fs;
2118 int segs, i;
2119 size_t vma_data_size = 0;
2120 struct vm_area_struct *vma, *gate_vma;
2121 struct elfhdr *elf = NULL;
2122 loff_t offset = 0, dataoff;
2123 struct elf_note_info info = { };
2124 struct elf_phdr *phdr4note = NULL;
2125 struct elf_shdr *shdr4extnum = NULL;
2126 Elf_Half e_phnum;
2127 elf_addr_t e_shoff;
2128 elf_addr_t *vma_filesz = NULL;
2131 * We no longer stop all VM operations.
2133 * This is because those proceses that could possibly change map_count
2134 * or the mmap / vma pages are now blocked in do_exit on current
2135 * finishing this core dump.
2137 * Only ptrace can touch these memory addresses, but it doesn't change
2138 * the map_count or the pages allocated. So no possibility of crashing
2139 * exists while dumping the mm->vm_next areas to the core file.
2142 /* alloc memory for large data structures: too large to be on stack */
2143 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2144 if (!elf)
2145 goto out;
2147 * The number of segs are recored into ELF header as 16bit value.
2148 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2150 segs = current->mm->map_count;
2151 segs += elf_core_extra_phdrs();
2153 gate_vma = get_gate_vma(current->mm);
2154 if (gate_vma != NULL)
2155 segs++;
2157 /* for notes section */
2158 segs++;
2160 /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2161 * this, kernel supports extended numbering. Have a look at
2162 * include/linux/elf.h for further information. */
2163 e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2166 * Collect all the non-memory information about the process for the
2167 * notes. This also sets up the file header.
2169 if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2170 goto cleanup;
2172 has_dumped = 1;
2174 fs = get_fs();
2175 set_fs(KERNEL_DS);
2177 offset += sizeof(*elf); /* Elf header */
2178 offset += segs * sizeof(struct elf_phdr); /* Program headers */
2180 /* Write notes phdr entry */
2182 size_t sz = get_note_info_size(&info);
2184 sz += elf_coredump_extra_notes_size();
2186 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2187 if (!phdr4note)
2188 goto end_coredump;
2190 fill_elf_note_phdr(phdr4note, sz, offset);
2191 offset += sz;
2194 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2196 vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL);
2197 if (!vma_filesz)
2198 goto end_coredump;
2200 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2201 vma = next_vma(vma, gate_vma)) {
2202 unsigned long dump_size;
2204 dump_size = vma_dump_size(vma, cprm->mm_flags);
2205 vma_filesz[i++] = dump_size;
2206 vma_data_size += dump_size;
2209 offset += vma_data_size;
2210 offset += elf_core_extra_data_size();
2211 e_shoff = offset;
2213 if (e_phnum == PN_XNUM) {
2214 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2215 if (!shdr4extnum)
2216 goto end_coredump;
2217 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2220 offset = dataoff;
2222 if (!dump_emit(cprm, elf, sizeof(*elf)))
2223 goto end_coredump;
2225 if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2226 goto end_coredump;
2228 /* Write program headers for segments dump */
2229 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2230 vma = next_vma(vma, gate_vma)) {
2231 struct elf_phdr phdr;
2233 phdr.p_type = PT_LOAD;
2234 phdr.p_offset = offset;
2235 phdr.p_vaddr = vma->vm_start;
2236 phdr.p_paddr = 0;
2237 phdr.p_filesz = vma_filesz[i++];
2238 phdr.p_memsz = vma->vm_end - vma->vm_start;
2239 offset += phdr.p_filesz;
2240 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2241 if (vma->vm_flags & VM_WRITE)
2242 phdr.p_flags |= PF_W;
2243 if (vma->vm_flags & VM_EXEC)
2244 phdr.p_flags |= PF_X;
2245 phdr.p_align = ELF_EXEC_PAGESIZE;
2247 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2248 goto end_coredump;
2251 if (!elf_core_write_extra_phdrs(cprm, offset))
2252 goto end_coredump;
2254 /* write out the notes section */
2255 if (!write_note_info(&info, cprm))
2256 goto end_coredump;
2258 if (elf_coredump_extra_notes_write(cprm))
2259 goto end_coredump;
2261 /* Align to page */
2262 if (!dump_skip(cprm, dataoff - cprm->written))
2263 goto end_coredump;
2265 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2266 vma = next_vma(vma, gate_vma)) {
2267 unsigned long addr;
2268 unsigned long end;
2270 end = vma->vm_start + vma_filesz[i++];
2272 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2273 struct page *page;
2274 int stop;
2276 page = get_dump_page(addr);
2277 if (page) {
2278 void *kaddr = kmap(page);
2279 stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2280 kunmap(page);
2281 page_cache_release(page);
2282 } else
2283 stop = !dump_skip(cprm, PAGE_SIZE);
2284 if (stop)
2285 goto end_coredump;
2289 if (!elf_core_write_extra_data(cprm))
2290 goto end_coredump;
2292 if (e_phnum == PN_XNUM) {
2293 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2294 goto end_coredump;
2297 end_coredump:
2298 set_fs(fs);
2300 cleanup:
2301 free_note_info(&info);
2302 kfree(shdr4extnum);
2303 kfree(vma_filesz);
2304 kfree(phdr4note);
2305 kfree(elf);
2306 out:
2307 return has_dumped;
2310 #endif /* CONFIG_ELF_CORE */
2312 static int __init init_elf_binfmt(void)
2314 register_binfmt(&elf_format);
2315 return 0;
2318 static void __exit exit_elf_binfmt(void)
2320 /* Remove the COFF and ELF loaders. */
2321 unregister_binfmt(&elf_format);
2324 core_initcall(init_elf_binfmt);
2325 module_exit(exit_elf_binfmt);
2326 MODULE_LICENSE("GPL");