Merge HEAD from ../linux-2.6
[linux-2.6/verdex.git] / fs / binfmt_elf.c
blob537893a16014cbbdd6d72259e795030625e91a77
1 /*
2 * linux/fs/binfmt_elf.c
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7 * Tools".
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/a.out.h>
20 #include <linux/errno.h>
21 #include <linux/signal.h>
22 #include <linux/binfmts.h>
23 #include <linux/string.h>
24 #include <linux/file.h>
25 #include <linux/fcntl.h>
26 #include <linux/ptrace.h>
27 #include <linux/slab.h>
28 #include <linux/shm.h>
29 #include <linux/personality.h>
30 #include <linux/elfcore.h>
31 #include <linux/init.h>
32 #include <linux/highuid.h>
33 #include <linux/smp.h>
34 #include <linux/smp_lock.h>
35 #include <linux/compiler.h>
36 #include <linux/highmem.h>
37 #include <linux/pagemap.h>
38 #include <linux/security.h>
39 #include <linux/syscalls.h>
40 #include <linux/random.h>
42 #include <asm/uaccess.h>
43 #include <asm/param.h>
44 #include <asm/page.h>
46 #include <linux/elf.h>
48 static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs);
49 static int load_elf_library(struct file*);
50 static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
51 extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
53 #ifndef elf_addr_t
54 #define elf_addr_t unsigned long
55 #endif
58 * If we don't support core dumping, then supply a NULL so we
59 * don't even try.
61 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
62 static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file);
63 #else
64 #define elf_core_dump NULL
65 #endif
67 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
68 # define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
69 #else
70 # define ELF_MIN_ALIGN PAGE_SIZE
71 #endif
73 #ifndef ELF_CORE_EFLAGS
74 #define ELF_CORE_EFLAGS 0
75 #endif
77 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
78 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
79 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
81 static struct linux_binfmt elf_format = {
82 .module = THIS_MODULE,
83 .load_binary = load_elf_binary,
84 .load_shlib = load_elf_library,
85 .core_dump = elf_core_dump,
86 .min_coredump = ELF_EXEC_PAGESIZE
89 #define BAD_ADDR(x) ((unsigned long)(x) > TASK_SIZE)
91 static int set_brk(unsigned long start, unsigned long end)
93 start = ELF_PAGEALIGN(start);
94 end = ELF_PAGEALIGN(end);
95 if (end > start) {
96 unsigned long addr;
97 down_write(&current->mm->mmap_sem);
98 addr = do_brk(start, end - start);
99 up_write(&current->mm->mmap_sem);
100 if (BAD_ADDR(addr))
101 return addr;
103 current->mm->start_brk = current->mm->brk = end;
104 return 0;
108 /* We need to explicitly zero any fractional pages
109 after the data section (i.e. bss). This would
110 contain the junk from the file that should not
111 be in memory */
114 static int padzero(unsigned long elf_bss)
116 unsigned long nbyte;
118 nbyte = ELF_PAGEOFFSET(elf_bss);
119 if (nbyte) {
120 nbyte = ELF_MIN_ALIGN - nbyte;
121 if (clear_user((void __user *) elf_bss, nbyte))
122 return -EFAULT;
124 return 0;
127 /* Let's use some macros to make this stack manipulation a litle clearer */
128 #ifdef CONFIG_STACK_GROWSUP
129 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
130 #define STACK_ROUND(sp, items) \
131 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
132 #define STACK_ALLOC(sp, len) ({ elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; old_sp; })
133 #else
134 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
135 #define STACK_ROUND(sp, items) \
136 (((unsigned long) (sp - items)) &~ 15UL)
137 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
138 #endif
140 static int
141 create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec,
142 int interp_aout, unsigned long load_addr,
143 unsigned long interp_load_addr)
145 unsigned long p = bprm->p;
146 int argc = bprm->argc;
147 int envc = bprm->envc;
148 elf_addr_t __user *argv;
149 elf_addr_t __user *envp;
150 elf_addr_t __user *sp;
151 elf_addr_t __user *u_platform;
152 const char *k_platform = ELF_PLATFORM;
153 int items;
154 elf_addr_t *elf_info;
155 int ei_index = 0;
156 struct task_struct *tsk = current;
159 * If this architecture has a platform capability string, copy it
160 * to userspace. In some cases (Sparc), this info is impossible
161 * for userspace to get any other way, in others (i386) it is
162 * merely difficult.
165 u_platform = NULL;
166 if (k_platform) {
167 size_t len = strlen(k_platform) + 1;
170 * In some cases (e.g. Hyper-Threading), we want to avoid L1
171 * evictions by the processes running on the same package. One
172 * thing we can do is to shuffle the initial stack for them.
175 p = arch_align_stack(p);
177 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
178 if (__copy_to_user(u_platform, k_platform, len))
179 return -EFAULT;
182 /* Create the ELF interpreter info */
183 elf_info = (elf_addr_t *) current->mm->saved_auxv;
184 #define NEW_AUX_ENT(id, val) \
185 do { elf_info[ei_index++] = id; elf_info[ei_index++] = val; } while (0)
187 #ifdef ARCH_DLINFO
189 * ARCH_DLINFO must come first so PPC can do its special alignment of
190 * AUXV.
192 ARCH_DLINFO;
193 #endif
194 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
195 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
196 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
197 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
198 NEW_AUX_ENT(AT_PHENT, sizeof (struct elf_phdr));
199 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
200 NEW_AUX_ENT(AT_BASE, interp_load_addr);
201 NEW_AUX_ENT(AT_FLAGS, 0);
202 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
203 NEW_AUX_ENT(AT_UID, (elf_addr_t) tsk->uid);
204 NEW_AUX_ENT(AT_EUID, (elf_addr_t) tsk->euid);
205 NEW_AUX_ENT(AT_GID, (elf_addr_t) tsk->gid);
206 NEW_AUX_ENT(AT_EGID, (elf_addr_t) tsk->egid);
207 NEW_AUX_ENT(AT_SECURE, (elf_addr_t) security_bprm_secureexec(bprm));
208 if (k_platform) {
209 NEW_AUX_ENT(AT_PLATFORM, (elf_addr_t)(unsigned long)u_platform);
211 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
212 NEW_AUX_ENT(AT_EXECFD, (elf_addr_t) bprm->interp_data);
214 #undef NEW_AUX_ENT
215 /* AT_NULL is zero; clear the rest too */
216 memset(&elf_info[ei_index], 0,
217 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
219 /* And advance past the AT_NULL entry. */
220 ei_index += 2;
222 sp = STACK_ADD(p, ei_index);
224 items = (argc + 1) + (envc + 1);
225 if (interp_aout) {
226 items += 3; /* a.out interpreters require argv & envp too */
227 } else {
228 items += 1; /* ELF interpreters only put argc on the stack */
230 bprm->p = STACK_ROUND(sp, items);
232 /* Point sp at the lowest address on the stack */
233 #ifdef CONFIG_STACK_GROWSUP
234 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
235 bprm->exec = (unsigned long) sp; /* XXX: PARISC HACK */
236 #else
237 sp = (elf_addr_t __user *)bprm->p;
238 #endif
240 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
241 if (__put_user(argc, sp++))
242 return -EFAULT;
243 if (interp_aout) {
244 argv = sp + 2;
245 envp = argv + argc + 1;
246 __put_user((elf_addr_t)(unsigned long)argv, sp++);
247 __put_user((elf_addr_t)(unsigned long)envp, sp++);
248 } else {
249 argv = sp;
250 envp = argv + argc + 1;
253 /* Populate argv and envp */
254 p = current->mm->arg_end = current->mm->arg_start;
255 while (argc-- > 0) {
256 size_t len;
257 __put_user((elf_addr_t)p, argv++);
258 len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
259 if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
260 return 0;
261 p += len;
263 if (__put_user(0, argv))
264 return -EFAULT;
265 current->mm->arg_end = current->mm->env_start = p;
266 while (envc-- > 0) {
267 size_t len;
268 __put_user((elf_addr_t)p, envp++);
269 len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
270 if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
271 return 0;
272 p += len;
274 if (__put_user(0, envp))
275 return -EFAULT;
276 current->mm->env_end = p;
278 /* Put the elf_info on the stack in the right place. */
279 sp = (elf_addr_t __user *)envp + 1;
280 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
281 return -EFAULT;
282 return 0;
285 #ifndef elf_map
287 static unsigned long elf_map(struct file *filep, unsigned long addr,
288 struct elf_phdr *eppnt, int prot, int type)
290 unsigned long map_addr;
291 unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
293 down_write(&current->mm->mmap_sem);
294 /* mmap() will return -EINVAL if given a zero size, but a
295 * segment with zero filesize is perfectly valid */
296 if (eppnt->p_filesz + pageoffset)
297 map_addr = do_mmap(filep, ELF_PAGESTART(addr),
298 eppnt->p_filesz + pageoffset, prot, type,
299 eppnt->p_offset - pageoffset);
300 else
301 map_addr = ELF_PAGESTART(addr);
302 up_write(&current->mm->mmap_sem);
303 return(map_addr);
306 #endif /* !elf_map */
308 /* This is much more generalized than the library routine read function,
309 so we keep this separate. Technically the library read function
310 is only provided so that we can read a.out libraries that have
311 an ELF header */
313 static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
314 struct file * interpreter,
315 unsigned long *interp_load_addr)
317 struct elf_phdr *elf_phdata;
318 struct elf_phdr *eppnt;
319 unsigned long load_addr = 0;
320 int load_addr_set = 0;
321 unsigned long last_bss = 0, elf_bss = 0;
322 unsigned long error = ~0UL;
323 int retval, i, size;
325 /* First of all, some simple consistency checks */
326 if (interp_elf_ex->e_type != ET_EXEC &&
327 interp_elf_ex->e_type != ET_DYN)
328 goto out;
329 if (!elf_check_arch(interp_elf_ex))
330 goto out;
331 if (!interpreter->f_op || !interpreter->f_op->mmap)
332 goto out;
335 * If the size of this structure has changed, then punt, since
336 * we will be doing the wrong thing.
338 if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
339 goto out;
340 if (interp_elf_ex->e_phnum < 1 ||
341 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
342 goto out;
344 /* Now read in all of the header information */
346 size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
347 if (size > ELF_MIN_ALIGN)
348 goto out;
349 elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL);
350 if (!elf_phdata)
351 goto out;
353 retval = kernel_read(interpreter,interp_elf_ex->e_phoff,(char *)elf_phdata,size);
354 error = -EIO;
355 if (retval != size) {
356 if (retval < 0)
357 error = retval;
358 goto out_close;
361 eppnt = elf_phdata;
362 for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) {
363 if (eppnt->p_type == PT_LOAD) {
364 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
365 int elf_prot = 0;
366 unsigned long vaddr = 0;
367 unsigned long k, map_addr;
369 if (eppnt->p_flags & PF_R) elf_prot = PROT_READ;
370 if (eppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
371 if (eppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
372 vaddr = eppnt->p_vaddr;
373 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
374 elf_type |= MAP_FIXED;
376 map_addr = elf_map(interpreter, load_addr + vaddr, eppnt, elf_prot, elf_type);
377 error = map_addr;
378 if (BAD_ADDR(map_addr))
379 goto out_close;
381 if (!load_addr_set && interp_elf_ex->e_type == ET_DYN) {
382 load_addr = map_addr - ELF_PAGESTART(vaddr);
383 load_addr_set = 1;
387 * Check to see if the section's size will overflow the
388 * allowed task size. Note that p_filesz must always be
389 * <= p_memsize so it is only necessary to check p_memsz.
391 k = load_addr + eppnt->p_vaddr;
392 if (k > TASK_SIZE || eppnt->p_filesz > eppnt->p_memsz ||
393 eppnt->p_memsz > TASK_SIZE || TASK_SIZE - eppnt->p_memsz < k) {
394 error = -ENOMEM;
395 goto out_close;
399 * Find the end of the file mapping for this phdr, and keep
400 * track of the largest address we see for this.
402 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
403 if (k > elf_bss)
404 elf_bss = k;
407 * Do the same thing for the memory mapping - between
408 * elf_bss and last_bss is the bss section.
410 k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
411 if (k > last_bss)
412 last_bss = k;
417 * Now fill out the bss section. First pad the last page up
418 * to the page boundary, and then perform a mmap to make sure
419 * that there are zero-mapped pages up to and including the
420 * last bss page.
422 if (padzero(elf_bss)) {
423 error = -EFAULT;
424 goto out_close;
427 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1); /* What we have mapped so far */
429 /* Map the last of the bss segment */
430 if (last_bss > elf_bss) {
431 down_write(&current->mm->mmap_sem);
432 error = do_brk(elf_bss, last_bss - elf_bss);
433 up_write(&current->mm->mmap_sem);
434 if (BAD_ADDR(error))
435 goto out_close;
438 *interp_load_addr = load_addr;
439 error = ((unsigned long) interp_elf_ex->e_entry) + load_addr;
441 out_close:
442 kfree(elf_phdata);
443 out:
444 return error;
447 static unsigned long load_aout_interp(struct exec * interp_ex,
448 struct file * interpreter)
450 unsigned long text_data, elf_entry = ~0UL;
451 char __user * addr;
452 loff_t offset;
454 current->mm->end_code = interp_ex->a_text;
455 text_data = interp_ex->a_text + interp_ex->a_data;
456 current->mm->end_data = text_data;
457 current->mm->brk = interp_ex->a_bss + text_data;
459 switch (N_MAGIC(*interp_ex)) {
460 case OMAGIC:
461 offset = 32;
462 addr = (char __user *)0;
463 break;
464 case ZMAGIC:
465 case QMAGIC:
466 offset = N_TXTOFF(*interp_ex);
467 addr = (char __user *) N_TXTADDR(*interp_ex);
468 break;
469 default:
470 goto out;
473 down_write(&current->mm->mmap_sem);
474 do_brk(0, text_data);
475 up_write(&current->mm->mmap_sem);
476 if (!interpreter->f_op || !interpreter->f_op->read)
477 goto out;
478 if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
479 goto out;
480 flush_icache_range((unsigned long)addr,
481 (unsigned long)addr + text_data);
484 down_write(&current->mm->mmap_sem);
485 do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
486 interp_ex->a_bss);
487 up_write(&current->mm->mmap_sem);
488 elf_entry = interp_ex->a_entry;
490 out:
491 return elf_entry;
495 * These are the functions used to load ELF style executables and shared
496 * libraries. There is no binary dependent code anywhere else.
499 #define INTERPRETER_NONE 0
500 #define INTERPRETER_AOUT 1
501 #define INTERPRETER_ELF 2
503 #ifndef STACK_RND_MASK
504 #define STACK_RND_MASK 0x7ff /* with 4K pages 8MB of VA */
505 #endif
507 static unsigned long randomize_stack_top(unsigned long stack_top)
509 unsigned int random_variable = 0;
511 if (current->flags & PF_RANDOMIZE) {
512 random_variable = get_random_int() & STACK_RND_MASK;
513 random_variable <<= PAGE_SHIFT;
515 #ifdef CONFIG_STACK_GROWSUP
516 return PAGE_ALIGN(stack_top) + random_variable;
517 #else
518 return PAGE_ALIGN(stack_top) - random_variable;
519 #endif
522 static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
524 struct file *interpreter = NULL; /* to shut gcc up */
525 unsigned long load_addr = 0, load_bias = 0;
526 int load_addr_set = 0;
527 char * elf_interpreter = NULL;
528 unsigned int interpreter_type = INTERPRETER_NONE;
529 unsigned char ibcs2_interpreter = 0;
530 unsigned long error;
531 struct elf_phdr * elf_ppnt, *elf_phdata;
532 unsigned long elf_bss, elf_brk;
533 int elf_exec_fileno;
534 int retval, i;
535 unsigned int size;
536 unsigned long elf_entry, interp_load_addr = 0;
537 unsigned long start_code, end_code, start_data, end_data;
538 unsigned long reloc_func_desc = 0;
539 char passed_fileno[6];
540 struct files_struct *files;
541 int have_pt_gnu_stack, executable_stack = EXSTACK_DEFAULT;
542 unsigned long def_flags = 0;
543 struct {
544 struct elfhdr elf_ex;
545 struct elfhdr interp_elf_ex;
546 struct exec interp_ex;
547 } *loc;
549 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
550 if (!loc) {
551 retval = -ENOMEM;
552 goto out_ret;
555 /* Get the exec-header */
556 loc->elf_ex = *((struct elfhdr *) bprm->buf);
558 retval = -ENOEXEC;
559 /* First of all, some simple consistency checks */
560 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
561 goto out;
563 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
564 goto out;
565 if (!elf_check_arch(&loc->elf_ex))
566 goto out;
567 if (!bprm->file->f_op||!bprm->file->f_op->mmap)
568 goto out;
570 /* Now read in all of the header information */
572 if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
573 goto out;
574 if (loc->elf_ex.e_phnum < 1 ||
575 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
576 goto out;
577 size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
578 retval = -ENOMEM;
579 elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL);
580 if (!elf_phdata)
581 goto out;
583 retval = kernel_read(bprm->file, loc->elf_ex.e_phoff, (char *) elf_phdata, size);
584 if (retval != size) {
585 if (retval >= 0)
586 retval = -EIO;
587 goto out_free_ph;
590 files = current->files; /* Refcounted so ok */
591 retval = unshare_files();
592 if (retval < 0)
593 goto out_free_ph;
594 if (files == current->files) {
595 put_files_struct(files);
596 files = NULL;
599 /* exec will make our files private anyway, but for the a.out
600 loader stuff we need to do it earlier */
602 retval = get_unused_fd();
603 if (retval < 0)
604 goto out_free_fh;
605 get_file(bprm->file);
606 fd_install(elf_exec_fileno = retval, bprm->file);
608 elf_ppnt = elf_phdata;
609 elf_bss = 0;
610 elf_brk = 0;
612 start_code = ~0UL;
613 end_code = 0;
614 start_data = 0;
615 end_data = 0;
617 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
618 if (elf_ppnt->p_type == PT_INTERP) {
619 /* This is the program interpreter used for
620 * shared libraries - for now assume that this
621 * is an a.out format binary
624 retval = -ENOEXEC;
625 if (elf_ppnt->p_filesz > PATH_MAX ||
626 elf_ppnt->p_filesz < 2)
627 goto out_free_file;
629 retval = -ENOMEM;
630 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
631 GFP_KERNEL);
632 if (!elf_interpreter)
633 goto out_free_file;
635 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
636 elf_interpreter,
637 elf_ppnt->p_filesz);
638 if (retval != elf_ppnt->p_filesz) {
639 if (retval >= 0)
640 retval = -EIO;
641 goto out_free_interp;
643 /* make sure path is NULL terminated */
644 retval = -ENOEXEC;
645 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
646 goto out_free_interp;
648 /* If the program interpreter is one of these two,
649 * then assume an iBCS2 image. Otherwise assume
650 * a native linux image.
652 if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
653 strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
654 ibcs2_interpreter = 1;
657 * The early SET_PERSONALITY here is so that the lookup
658 * for the interpreter happens in the namespace of the
659 * to-be-execed image. SET_PERSONALITY can select an
660 * alternate root.
662 * However, SET_PERSONALITY is NOT allowed to switch
663 * this task into the new images's memory mapping
664 * policy - that is, TASK_SIZE must still evaluate to
665 * that which is appropriate to the execing application.
666 * This is because exit_mmap() needs to have TASK_SIZE
667 * evaluate to the size of the old image.
669 * So if (say) a 64-bit application is execing a 32-bit
670 * application it is the architecture's responsibility
671 * to defer changing the value of TASK_SIZE until the
672 * switch really is going to happen - do this in
673 * flush_thread(). - akpm
675 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
677 interpreter = open_exec(elf_interpreter);
678 retval = PTR_ERR(interpreter);
679 if (IS_ERR(interpreter))
680 goto out_free_interp;
681 retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE);
682 if (retval != BINPRM_BUF_SIZE) {
683 if (retval >= 0)
684 retval = -EIO;
685 goto out_free_dentry;
688 /* Get the exec headers */
689 loc->interp_ex = *((struct exec *) bprm->buf);
690 loc->interp_elf_ex = *((struct elfhdr *) bprm->buf);
691 break;
693 elf_ppnt++;
696 elf_ppnt = elf_phdata;
697 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
698 if (elf_ppnt->p_type == PT_GNU_STACK) {
699 if (elf_ppnt->p_flags & PF_X)
700 executable_stack = EXSTACK_ENABLE_X;
701 else
702 executable_stack = EXSTACK_DISABLE_X;
703 break;
705 have_pt_gnu_stack = (i < loc->elf_ex.e_phnum);
707 /* Some simple consistency checks for the interpreter */
708 if (elf_interpreter) {
709 interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
711 /* Now figure out which format our binary is */
712 if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
713 (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
714 (N_MAGIC(loc->interp_ex) != QMAGIC))
715 interpreter_type = INTERPRETER_ELF;
717 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
718 interpreter_type &= ~INTERPRETER_ELF;
720 retval = -ELIBBAD;
721 if (!interpreter_type)
722 goto out_free_dentry;
724 /* Make sure only one type was selected */
725 if ((interpreter_type & INTERPRETER_ELF) &&
726 interpreter_type != INTERPRETER_ELF) {
727 // FIXME - ratelimit this before re-enabling
728 // printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
729 interpreter_type = INTERPRETER_ELF;
731 /* Verify the interpreter has a valid arch */
732 if ((interpreter_type == INTERPRETER_ELF) &&
733 !elf_check_arch(&loc->interp_elf_ex))
734 goto out_free_dentry;
735 } else {
736 /* Executables without an interpreter also need a personality */
737 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
740 /* OK, we are done with that, now set up the arg stuff,
741 and then start this sucker up */
743 if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
744 char *passed_p = passed_fileno;
745 sprintf(passed_fileno, "%d", elf_exec_fileno);
747 if (elf_interpreter) {
748 retval = copy_strings_kernel(1, &passed_p, bprm);
749 if (retval)
750 goto out_free_dentry;
751 bprm->argc++;
755 /* Flush all traces of the currently running executable */
756 retval = flush_old_exec(bprm);
757 if (retval)
758 goto out_free_dentry;
760 /* Discard our unneeded old files struct */
761 if (files) {
762 steal_locks(files);
763 put_files_struct(files);
764 files = NULL;
767 /* OK, This is the point of no return */
768 current->mm->start_data = 0;
769 current->mm->end_data = 0;
770 current->mm->end_code = 0;
771 current->mm->mmap = NULL;
772 current->flags &= ~PF_FORKNOEXEC;
773 current->mm->def_flags = def_flags;
775 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
776 may depend on the personality. */
777 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
778 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
779 current->personality |= READ_IMPLIES_EXEC;
781 if ( !(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
782 current->flags |= PF_RANDOMIZE;
783 arch_pick_mmap_layout(current->mm);
785 /* Do this so that we can load the interpreter, if need be. We will
786 change some of these later */
787 current->mm->free_area_cache = current->mm->mmap_base;
788 current->mm->cached_hole_size = 0;
789 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
790 executable_stack);
791 if (retval < 0) {
792 send_sig(SIGKILL, current, 0);
793 goto out_free_dentry;
796 current->mm->start_stack = bprm->p;
798 /* Now we do a little grungy work by mmaping the ELF image into
799 the correct location in memory. At this point, we assume that
800 the image should be loaded at fixed address, not at a variable
801 address. */
803 for(i = 0, elf_ppnt = elf_phdata; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
804 int elf_prot = 0, elf_flags;
805 unsigned long k, vaddr;
807 if (elf_ppnt->p_type != PT_LOAD)
808 continue;
810 if (unlikely (elf_brk > elf_bss)) {
811 unsigned long nbyte;
813 /* There was a PT_LOAD segment with p_memsz > p_filesz
814 before this one. Map anonymous pages, if needed,
815 and clear the area. */
816 retval = set_brk (elf_bss + load_bias,
817 elf_brk + load_bias);
818 if (retval) {
819 send_sig(SIGKILL, current, 0);
820 goto out_free_dentry;
822 nbyte = ELF_PAGEOFFSET(elf_bss);
823 if (nbyte) {
824 nbyte = ELF_MIN_ALIGN - nbyte;
825 if (nbyte > elf_brk - elf_bss)
826 nbyte = elf_brk - elf_bss;
827 if (clear_user((void __user *)elf_bss +
828 load_bias, nbyte)) {
830 * This bss-zeroing can fail if the ELF
831 * file specifies odd protections. So
832 * we don't check the return value
838 if (elf_ppnt->p_flags & PF_R) elf_prot |= PROT_READ;
839 if (elf_ppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
840 if (elf_ppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
842 elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE;
844 vaddr = elf_ppnt->p_vaddr;
845 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
846 elf_flags |= MAP_FIXED;
847 } else if (loc->elf_ex.e_type == ET_DYN) {
848 /* Try and get dynamic programs out of the way of the default mmap
849 base, as well as whatever program they might try to exec. This
850 is because the brk will follow the loader, and is not movable. */
851 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
854 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags);
855 if (BAD_ADDR(error)) {
856 send_sig(SIGKILL, current, 0);
857 goto out_free_dentry;
860 if (!load_addr_set) {
861 load_addr_set = 1;
862 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
863 if (loc->elf_ex.e_type == ET_DYN) {
864 load_bias += error -
865 ELF_PAGESTART(load_bias + vaddr);
866 load_addr += load_bias;
867 reloc_func_desc = load_bias;
870 k = elf_ppnt->p_vaddr;
871 if (k < start_code) start_code = k;
872 if (start_data < k) start_data = k;
875 * Check to see if the section's size will overflow the
876 * allowed task size. Note that p_filesz must always be
877 * <= p_memsz so it is only necessary to check p_memsz.
879 if (k > TASK_SIZE || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
880 elf_ppnt->p_memsz > TASK_SIZE ||
881 TASK_SIZE - elf_ppnt->p_memsz < k) {
882 /* set_brk can never work. Avoid overflows. */
883 send_sig(SIGKILL, current, 0);
884 goto out_free_dentry;
887 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
889 if (k > elf_bss)
890 elf_bss = k;
891 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
892 end_code = k;
893 if (end_data < k)
894 end_data = k;
895 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
896 if (k > elf_brk)
897 elf_brk = k;
900 loc->elf_ex.e_entry += load_bias;
901 elf_bss += load_bias;
902 elf_brk += load_bias;
903 start_code += load_bias;
904 end_code += load_bias;
905 start_data += load_bias;
906 end_data += load_bias;
908 /* Calling set_brk effectively mmaps the pages that we need
909 * for the bss and break sections. We must do this before
910 * mapping in the interpreter, to make sure it doesn't wind
911 * up getting placed where the bss needs to go.
913 retval = set_brk(elf_bss, elf_brk);
914 if (retval) {
915 send_sig(SIGKILL, current, 0);
916 goto out_free_dentry;
918 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
919 send_sig(SIGSEGV, current, 0);
920 retval = -EFAULT; /* Nobody gets to see this, but.. */
921 goto out_free_dentry;
924 if (elf_interpreter) {
925 if (interpreter_type == INTERPRETER_AOUT)
926 elf_entry = load_aout_interp(&loc->interp_ex,
927 interpreter);
928 else
929 elf_entry = load_elf_interp(&loc->interp_elf_ex,
930 interpreter,
931 &interp_load_addr);
932 if (BAD_ADDR(elf_entry)) {
933 printk(KERN_ERR "Unable to load interpreter %.128s\n",
934 elf_interpreter);
935 force_sig(SIGSEGV, current);
936 retval = -ENOEXEC; /* Nobody gets to see this, but.. */
937 goto out_free_dentry;
939 reloc_func_desc = interp_load_addr;
941 allow_write_access(interpreter);
942 fput(interpreter);
943 kfree(elf_interpreter);
944 } else {
945 elf_entry = loc->elf_ex.e_entry;
946 if (BAD_ADDR(elf_entry)) {
947 send_sig(SIGSEGV, current, 0);
948 retval = -ENOEXEC; /* Nobody gets to see this, but.. */
949 goto out_free_dentry;
953 kfree(elf_phdata);
955 if (interpreter_type != INTERPRETER_AOUT)
956 sys_close(elf_exec_fileno);
958 set_binfmt(&elf_format);
960 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
961 retval = arch_setup_additional_pages(bprm, executable_stack);
962 if (retval < 0) {
963 send_sig(SIGKILL, current, 0);
964 goto out;
966 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
968 compute_creds(bprm);
969 current->flags &= ~PF_FORKNOEXEC;
970 create_elf_tables(bprm, &loc->elf_ex, (interpreter_type == INTERPRETER_AOUT),
971 load_addr, interp_load_addr);
972 /* N.B. passed_fileno might not be initialized? */
973 if (interpreter_type == INTERPRETER_AOUT)
974 current->mm->arg_start += strlen(passed_fileno) + 1;
975 current->mm->end_code = end_code;
976 current->mm->start_code = start_code;
977 current->mm->start_data = start_data;
978 current->mm->end_data = end_data;
979 current->mm->start_stack = bprm->p;
981 if (current->personality & MMAP_PAGE_ZERO) {
982 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
983 and some applications "depend" upon this behavior.
984 Since we do not have the power to recompile these, we
985 emulate the SVr4 behavior. Sigh. */
986 down_write(&current->mm->mmap_sem);
987 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
988 MAP_FIXED | MAP_PRIVATE, 0);
989 up_write(&current->mm->mmap_sem);
992 #ifdef ELF_PLAT_INIT
994 * The ABI may specify that certain registers be set up in special
995 * ways (on i386 %edx is the address of a DT_FINI function, for
996 * example. In addition, it may also specify (eg, PowerPC64 ELF)
997 * that the e_entry field is the address of the function descriptor
998 * for the startup routine, rather than the address of the startup
999 * routine itself. This macro performs whatever initialization to
1000 * the regs structure is required as well as any relocations to the
1001 * function descriptor entries when executing dynamically links apps.
1003 ELF_PLAT_INIT(regs, reloc_func_desc);
1004 #endif
1006 start_thread(regs, elf_entry, bprm->p);
1007 if (unlikely(current->ptrace & PT_PTRACED)) {
1008 if (current->ptrace & PT_TRACE_EXEC)
1009 ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1010 else
1011 send_sig(SIGTRAP, current, 0);
1013 retval = 0;
1014 out:
1015 kfree(loc);
1016 out_ret:
1017 return retval;
1019 /* error cleanup */
1020 out_free_dentry:
1021 allow_write_access(interpreter);
1022 if (interpreter)
1023 fput(interpreter);
1024 out_free_interp:
1025 kfree(elf_interpreter);
1026 out_free_file:
1027 sys_close(elf_exec_fileno);
1028 out_free_fh:
1029 if (files) {
1030 put_files_struct(current->files);
1031 current->files = files;
1033 out_free_ph:
1034 kfree(elf_phdata);
1035 goto out;
1038 /* This is really simpleminded and specialized - we are loading an
1039 a.out library that is given an ELF header. */
1041 static int load_elf_library(struct file *file)
1043 struct elf_phdr *elf_phdata;
1044 struct elf_phdr *eppnt;
1045 unsigned long elf_bss, bss, len;
1046 int retval, error, i, j;
1047 struct elfhdr elf_ex;
1049 error = -ENOEXEC;
1050 retval = kernel_read(file, 0, (char *) &elf_ex, sizeof(elf_ex));
1051 if (retval != sizeof(elf_ex))
1052 goto out;
1054 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1055 goto out;
1057 /* First of all, some simple consistency checks */
1058 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1059 !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1060 goto out;
1062 /* Now read in all of the header information */
1064 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1065 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1067 error = -ENOMEM;
1068 elf_phdata = kmalloc(j, GFP_KERNEL);
1069 if (!elf_phdata)
1070 goto out;
1072 eppnt = elf_phdata;
1073 error = -ENOEXEC;
1074 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1075 if (retval != j)
1076 goto out_free_ph;
1078 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1079 if ((eppnt + i)->p_type == PT_LOAD)
1080 j++;
1081 if (j != 1)
1082 goto out_free_ph;
1084 while (eppnt->p_type != PT_LOAD)
1085 eppnt++;
1087 /* Now use mmap to map the library into memory. */
1088 down_write(&current->mm->mmap_sem);
1089 error = do_mmap(file,
1090 ELF_PAGESTART(eppnt->p_vaddr),
1091 (eppnt->p_filesz +
1092 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1093 PROT_READ | PROT_WRITE | PROT_EXEC,
1094 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1095 (eppnt->p_offset -
1096 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1097 up_write(&current->mm->mmap_sem);
1098 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1099 goto out_free_ph;
1101 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1102 if (padzero(elf_bss)) {
1103 error = -EFAULT;
1104 goto out_free_ph;
1107 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr + ELF_MIN_ALIGN - 1);
1108 bss = eppnt->p_memsz + eppnt->p_vaddr;
1109 if (bss > len) {
1110 down_write(&current->mm->mmap_sem);
1111 do_brk(len, bss - len);
1112 up_write(&current->mm->mmap_sem);
1114 error = 0;
1116 out_free_ph:
1117 kfree(elf_phdata);
1118 out:
1119 return error;
1123 * Note that some platforms still use traditional core dumps and not
1124 * the ELF core dump. Each platform can select it as appropriate.
1126 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1129 * ELF core dumper
1131 * Modelled on fs/exec.c:aout_core_dump()
1132 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1135 * These are the only things you should do on a core-file: use only these
1136 * functions to write out all the necessary info.
1138 static int dump_write(struct file *file, const void *addr, int nr)
1140 return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1143 static int dump_seek(struct file *file, loff_t off)
1145 if (file->f_op->llseek) {
1146 if (file->f_op->llseek(file, off, 0) != off)
1147 return 0;
1148 } else
1149 file->f_pos = off;
1150 return 1;
1154 * Decide whether a segment is worth dumping; default is yes to be
1155 * sure (missing info is worse than too much; etc).
1156 * Personally I'd include everything, and use the coredump limit...
1158 * I think we should skip something. But I am not sure how. H.J.
1160 static int maydump(struct vm_area_struct *vma)
1162 /* Do not dump I/O mapped devices or special mappings */
1163 if (vma->vm_flags & (VM_IO | VM_RESERVED))
1164 return 0;
1166 /* Dump shared memory only if mapped from an anonymous file. */
1167 if (vma->vm_flags & VM_SHARED)
1168 return vma->vm_file->f_dentry->d_inode->i_nlink == 0;
1170 /* If it hasn't been written to, don't write it out */
1171 if (!vma->anon_vma)
1172 return 0;
1174 return 1;
1177 #define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
1179 /* An ELF note in memory */
1180 struct memelfnote
1182 const char *name;
1183 int type;
1184 unsigned int datasz;
1185 void *data;
1188 static int notesize(struct memelfnote *en)
1190 int sz;
1192 sz = sizeof(struct elf_note);
1193 sz += roundup(strlen(en->name) + 1, 4);
1194 sz += roundup(en->datasz, 4);
1196 return sz;
1199 #define DUMP_WRITE(addr, nr) \
1200 do { if (!dump_write(file, (addr), (nr))) return 0; } while(0)
1201 #define DUMP_SEEK(off) \
1202 do { if (!dump_seek(file, (off))) return 0; } while(0)
1204 static int writenote(struct memelfnote *men, struct file *file)
1206 struct elf_note en;
1208 en.n_namesz = strlen(men->name) + 1;
1209 en.n_descsz = men->datasz;
1210 en.n_type = men->type;
1212 DUMP_WRITE(&en, sizeof(en));
1213 DUMP_WRITE(men->name, en.n_namesz);
1214 /* XXX - cast from long long to long to avoid need for libgcc.a */
1215 DUMP_SEEK(roundup((unsigned long)file->f_pos, 4)); /* XXX */
1216 DUMP_WRITE(men->data, men->datasz);
1217 DUMP_SEEK(roundup((unsigned long)file->f_pos, 4)); /* XXX */
1219 return 1;
1221 #undef DUMP_WRITE
1222 #undef DUMP_SEEK
1224 #define DUMP_WRITE(addr, nr) \
1225 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1226 goto end_coredump;
1227 #define DUMP_SEEK(off) \
1228 if (!dump_seek(file, (off))) \
1229 goto end_coredump;
1231 static void fill_elf_header(struct elfhdr *elf, int segs)
1233 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1234 elf->e_ident[EI_CLASS] = ELF_CLASS;
1235 elf->e_ident[EI_DATA] = ELF_DATA;
1236 elf->e_ident[EI_VERSION] = EV_CURRENT;
1237 elf->e_ident[EI_OSABI] = ELF_OSABI;
1238 memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1240 elf->e_type = ET_CORE;
1241 elf->e_machine = ELF_ARCH;
1242 elf->e_version = EV_CURRENT;
1243 elf->e_entry = 0;
1244 elf->e_phoff = sizeof(struct elfhdr);
1245 elf->e_shoff = 0;
1246 elf->e_flags = ELF_CORE_EFLAGS;
1247 elf->e_ehsize = sizeof(struct elfhdr);
1248 elf->e_phentsize = sizeof(struct elf_phdr);
1249 elf->e_phnum = segs;
1250 elf->e_shentsize = 0;
1251 elf->e_shnum = 0;
1252 elf->e_shstrndx = 0;
1253 return;
1256 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, off_t offset)
1258 phdr->p_type = PT_NOTE;
1259 phdr->p_offset = offset;
1260 phdr->p_vaddr = 0;
1261 phdr->p_paddr = 0;
1262 phdr->p_filesz = sz;
1263 phdr->p_memsz = 0;
1264 phdr->p_flags = 0;
1265 phdr->p_align = 0;
1266 return;
1269 static void fill_note(struct memelfnote *note, const char *name, int type,
1270 unsigned int sz, void *data)
1272 note->name = name;
1273 note->type = type;
1274 note->datasz = sz;
1275 note->data = data;
1276 return;
1280 * fill up all the fields in prstatus from the given task struct, except registers
1281 * which need to be filled up separately.
1283 static void fill_prstatus(struct elf_prstatus *prstatus,
1284 struct task_struct *p, long signr)
1286 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1287 prstatus->pr_sigpend = p->pending.signal.sig[0];
1288 prstatus->pr_sighold = p->blocked.sig[0];
1289 prstatus->pr_pid = p->pid;
1290 prstatus->pr_ppid = p->parent->pid;
1291 prstatus->pr_pgrp = process_group(p);
1292 prstatus->pr_sid = p->signal->session;
1293 if (thread_group_leader(p)) {
1295 * This is the record for the group leader. Add in the
1296 * cumulative times of previous dead threads. This total
1297 * won't include the time of each live thread whose state
1298 * is included in the core dump. The final total reported
1299 * to our parent process when it calls wait4 will include
1300 * those sums as well as the little bit more time it takes
1301 * this and each other thread to finish dying after the
1302 * core dump synchronization phase.
1304 cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1305 &prstatus->pr_utime);
1306 cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1307 &prstatus->pr_stime);
1308 } else {
1309 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1310 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1312 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1313 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1316 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1317 struct mm_struct *mm)
1319 unsigned int i, len;
1321 /* first copy the parameters from user space */
1322 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1324 len = mm->arg_end - mm->arg_start;
1325 if (len >= ELF_PRARGSZ)
1326 len = ELF_PRARGSZ-1;
1327 if (copy_from_user(&psinfo->pr_psargs,
1328 (const char __user *)mm->arg_start, len))
1329 return -EFAULT;
1330 for(i = 0; i < len; i++)
1331 if (psinfo->pr_psargs[i] == 0)
1332 psinfo->pr_psargs[i] = ' ';
1333 psinfo->pr_psargs[len] = 0;
1335 psinfo->pr_pid = p->pid;
1336 psinfo->pr_ppid = p->parent->pid;
1337 psinfo->pr_pgrp = process_group(p);
1338 psinfo->pr_sid = p->signal->session;
1340 i = p->state ? ffz(~p->state) + 1 : 0;
1341 psinfo->pr_state = i;
1342 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1343 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1344 psinfo->pr_nice = task_nice(p);
1345 psinfo->pr_flag = p->flags;
1346 SET_UID(psinfo->pr_uid, p->uid);
1347 SET_GID(psinfo->pr_gid, p->gid);
1348 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1350 return 0;
1353 /* Here is the structure in which status of each thread is captured. */
1354 struct elf_thread_status
1356 struct list_head list;
1357 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1358 elf_fpregset_t fpu; /* NT_PRFPREG */
1359 struct task_struct *thread;
1360 #ifdef ELF_CORE_COPY_XFPREGS
1361 elf_fpxregset_t xfpu; /* NT_PRXFPREG */
1362 #endif
1363 struct memelfnote notes[3];
1364 int num_notes;
1368 * In order to add the specific thread information for the elf file format,
1369 * we need to keep a linked list of every threads pr_status and then
1370 * create a single section for them in the final core file.
1372 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1374 int sz = 0;
1375 struct task_struct *p = t->thread;
1376 t->num_notes = 0;
1378 fill_prstatus(&t->prstatus, p, signr);
1379 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1381 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus), &(t->prstatus));
1382 t->num_notes++;
1383 sz += notesize(&t->notes[0]);
1385 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL, &t->fpu))) {
1386 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu), &(t->fpu));
1387 t->num_notes++;
1388 sz += notesize(&t->notes[1]);
1391 #ifdef ELF_CORE_COPY_XFPREGS
1392 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1393 fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu), &t->xfpu);
1394 t->num_notes++;
1395 sz += notesize(&t->notes[2]);
1397 #endif
1398 return sz;
1402 * Actual dumper
1404 * This is a two-pass process; first we find the offsets of the bits,
1405 * and then they are actually written out. If we run out of core limit
1406 * we just truncate.
1408 static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
1410 #define NUM_NOTES 6
1411 int has_dumped = 0;
1412 mm_segment_t fs;
1413 int segs;
1414 size_t size = 0;
1415 int i;
1416 struct vm_area_struct *vma;
1417 struct elfhdr *elf = NULL;
1418 off_t offset = 0, dataoff;
1419 unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1420 int numnote;
1421 struct memelfnote *notes = NULL;
1422 struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */
1423 struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */
1424 struct task_struct *g, *p;
1425 LIST_HEAD(thread_list);
1426 struct list_head *t;
1427 elf_fpregset_t *fpu = NULL;
1428 #ifdef ELF_CORE_COPY_XFPREGS
1429 elf_fpxregset_t *xfpu = NULL;
1430 #endif
1431 int thread_status_size = 0;
1432 elf_addr_t *auxv;
1435 * We no longer stop all VM operations.
1437 * This is because those proceses that could possibly change map_count or
1438 * the mmap / vma pages are now blocked in do_exit on current finishing
1439 * this core dump.
1441 * Only ptrace can touch these memory addresses, but it doesn't change
1442 * the map_count or the pages allocated. So no possibility of crashing
1443 * exists while dumping the mm->vm_next areas to the core file.
1446 /* alloc memory for large data structures: too large to be on stack */
1447 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1448 if (!elf)
1449 goto cleanup;
1450 prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1451 if (!prstatus)
1452 goto cleanup;
1453 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1454 if (!psinfo)
1455 goto cleanup;
1456 notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1457 if (!notes)
1458 goto cleanup;
1459 fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1460 if (!fpu)
1461 goto cleanup;
1462 #ifdef ELF_CORE_COPY_XFPREGS
1463 xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1464 if (!xfpu)
1465 goto cleanup;
1466 #endif
1468 if (signr) {
1469 struct elf_thread_status *tmp;
1470 read_lock(&tasklist_lock);
1471 do_each_thread(g,p)
1472 if (current->mm == p->mm && current != p) {
1473 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1474 if (!tmp) {
1475 read_unlock(&tasklist_lock);
1476 goto cleanup;
1478 INIT_LIST_HEAD(&tmp->list);
1479 tmp->thread = p;
1480 list_add(&tmp->list, &thread_list);
1482 while_each_thread(g,p);
1483 read_unlock(&tasklist_lock);
1484 list_for_each(t, &thread_list) {
1485 struct elf_thread_status *tmp;
1486 int sz;
1488 tmp = list_entry(t, struct elf_thread_status, list);
1489 sz = elf_dump_thread_status(signr, tmp);
1490 thread_status_size += sz;
1493 /* now collect the dump for the current */
1494 memset(prstatus, 0, sizeof(*prstatus));
1495 fill_prstatus(prstatus, current, signr);
1496 elf_core_copy_regs(&prstatus->pr_reg, regs);
1498 segs = current->mm->map_count;
1499 #ifdef ELF_CORE_EXTRA_PHDRS
1500 segs += ELF_CORE_EXTRA_PHDRS;
1501 #endif
1503 /* Set up header */
1504 fill_elf_header(elf, segs+1); /* including notes section */
1506 has_dumped = 1;
1507 current->flags |= PF_DUMPCORE;
1510 * Set up the notes in similar form to SVR4 core dumps made
1511 * with info from their /proc.
1514 fill_note(notes +0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1516 fill_psinfo(psinfo, current->group_leader, current->mm);
1517 fill_note(notes +1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1519 numnote = 2;
1521 auxv = (elf_addr_t *) current->mm->saved_auxv;
1523 i = 0;
1525 i += 2;
1526 while (auxv[i - 2] != AT_NULL);
1527 fill_note(&notes[numnote++], "CORE", NT_AUXV,
1528 i * sizeof (elf_addr_t), auxv);
1530 /* Try to dump the FPU. */
1531 if ((prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, fpu)))
1532 fill_note(notes + numnote++,
1533 "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1534 #ifdef ELF_CORE_COPY_XFPREGS
1535 if (elf_core_copy_task_xfpregs(current, xfpu))
1536 fill_note(notes + numnote++,
1537 "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1538 #endif
1540 fs = get_fs();
1541 set_fs(KERNEL_DS);
1543 DUMP_WRITE(elf, sizeof(*elf));
1544 offset += sizeof(*elf); /* Elf header */
1545 offset += (segs+1) * sizeof(struct elf_phdr); /* Program headers */
1547 /* Write notes phdr entry */
1549 struct elf_phdr phdr;
1550 int sz = 0;
1552 for (i = 0; i < numnote; i++)
1553 sz += notesize(notes + i);
1555 sz += thread_status_size;
1557 fill_elf_note_phdr(&phdr, sz, offset);
1558 offset += sz;
1559 DUMP_WRITE(&phdr, sizeof(phdr));
1562 /* Page-align dumped data */
1563 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1565 /* Write program headers for segments dump */
1566 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1567 struct elf_phdr phdr;
1568 size_t sz;
1570 sz = vma->vm_end - vma->vm_start;
1572 phdr.p_type = PT_LOAD;
1573 phdr.p_offset = offset;
1574 phdr.p_vaddr = vma->vm_start;
1575 phdr.p_paddr = 0;
1576 phdr.p_filesz = maydump(vma) ? sz : 0;
1577 phdr.p_memsz = sz;
1578 offset += phdr.p_filesz;
1579 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1580 if (vma->vm_flags & VM_WRITE) phdr.p_flags |= PF_W;
1581 if (vma->vm_flags & VM_EXEC) phdr.p_flags |= PF_X;
1582 phdr.p_align = ELF_EXEC_PAGESIZE;
1584 DUMP_WRITE(&phdr, sizeof(phdr));
1587 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1588 ELF_CORE_WRITE_EXTRA_PHDRS;
1589 #endif
1591 /* write out the notes section */
1592 for (i = 0; i < numnote; i++)
1593 if (!writenote(notes + i, file))
1594 goto end_coredump;
1596 /* write out the thread status notes section */
1597 list_for_each(t, &thread_list) {
1598 struct elf_thread_status *tmp = list_entry(t, struct elf_thread_status, list);
1599 for (i = 0; i < tmp->num_notes; i++)
1600 if (!writenote(&tmp->notes[i], file))
1601 goto end_coredump;
1604 DUMP_SEEK(dataoff);
1606 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1607 unsigned long addr;
1609 if (!maydump(vma))
1610 continue;
1612 for (addr = vma->vm_start;
1613 addr < vma->vm_end;
1614 addr += PAGE_SIZE) {
1615 struct page* page;
1616 struct vm_area_struct *vma;
1618 if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1619 &page, &vma) <= 0) {
1620 DUMP_SEEK (file->f_pos + PAGE_SIZE);
1621 } else {
1622 if (page == ZERO_PAGE(addr)) {
1623 DUMP_SEEK (file->f_pos + PAGE_SIZE);
1624 } else {
1625 void *kaddr;
1626 flush_cache_page(vma, addr, page_to_pfn(page));
1627 kaddr = kmap(page);
1628 if ((size += PAGE_SIZE) > limit ||
1629 !dump_write(file, kaddr,
1630 PAGE_SIZE)) {
1631 kunmap(page);
1632 page_cache_release(page);
1633 goto end_coredump;
1635 kunmap(page);
1637 page_cache_release(page);
1642 #ifdef ELF_CORE_WRITE_EXTRA_DATA
1643 ELF_CORE_WRITE_EXTRA_DATA;
1644 #endif
1646 if ((off_t)file->f_pos != offset) {
1647 /* Sanity check */
1648 printk(KERN_WARNING "elf_core_dump: file->f_pos (%ld) != offset (%ld)\n",
1649 (off_t)file->f_pos, offset);
1652 end_coredump:
1653 set_fs(fs);
1655 cleanup:
1656 while (!list_empty(&thread_list)) {
1657 struct list_head *tmp = thread_list.next;
1658 list_del(tmp);
1659 kfree(list_entry(tmp, struct elf_thread_status, list));
1662 kfree(elf);
1663 kfree(prstatus);
1664 kfree(psinfo);
1665 kfree(notes);
1666 kfree(fpu);
1667 #ifdef ELF_CORE_COPY_XFPREGS
1668 kfree(xfpu);
1669 #endif
1670 return has_dumped;
1671 #undef NUM_NOTES
1674 #endif /* USE_ELF_CORE_DUMP */
1676 static int __init init_elf_binfmt(void)
1678 return register_binfmt(&elf_format);
1681 static void __exit exit_elf_binfmt(void)
1683 /* Remove the COFF and ELF loaders. */
1684 unregister_binfmt(&elf_format);
1687 core_initcall(init_elf_binfmt);
1688 module_exit(exit_elf_binfmt);
1689 MODULE_LICENSE("GPL");