4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
29 * Copyright (c) 2018, Joyent, Inc.
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/thread.h>
35 #include <sys/sysmacros.h>
36 #include <sys/signal.h>
39 #include <sys/errno.h>
40 #include <sys/vnode.h>
44 #include <sys/pathname.h>
45 #include <sys/policy.h>
46 #include <sys/cmn_err.h>
47 #include <sys/systm.h>
49 #include <sys/vmsystm.h>
50 #include <sys/debug.h>
53 #include <sys/prsystm.h>
57 #include <vm/seg_vn.h>
58 #include <sys/modctl.h>
59 #include <sys/systeminfo.h>
60 #include <sys/vmparam.h>
61 #include <sys/machelf.h>
62 #include <sys/shm_impl.h>
63 #include <sys/archsystm.h>
64 #include <sys/fasttrap.h>
65 #include <sys/brand.h>
68 #include <sys/siginfo.h>
69 #include <sys/random.h>
72 #include <sys/comm_page_util.h>
74 #endif /* defined(__x86) */
78 extern volatile size_t aslr_max_brk_skew
;
80 #define ORIGIN_STR "ORIGIN"
81 #define ORIGIN_STR_SIZE 6
83 static int getelfhead(vnode_t
*, cred_t
*, Ehdr
*, int *, int *, int *);
84 static int getelfphdr(vnode_t
*, cred_t
*, const Ehdr
*, int, caddr_t
*,
86 static int getelfshdr(vnode_t
*, cred_t
*, const Ehdr
*, int, int, caddr_t
*,
87 ssize_t
*, caddr_t
*, ssize_t
*);
88 static size_t elfsize(Ehdr
*, int, caddr_t
, uintptr_t *);
89 static int mapelfexec(vnode_t
*, Ehdr
*, int, caddr_t
,
90 Phdr
**, Phdr
**, Phdr
**, Phdr
**, Phdr
*,
91 caddr_t
*, caddr_t
*, intptr_t *, intptr_t *, size_t, long *, size_t *,
104 static const char *shstrtab_data
[] = {
113 typedef struct shstrtab
{
114 int sst_ndx
[STR_NUM
];
119 shstrtab_init(shstrtab_t
*s
)
121 bzero(&s
->sst_ndx
, sizeof (s
->sst_ndx
));
126 shstrtab_ndx(shstrtab_t
*s
, shstrtype_t type
)
130 if ((ret
= s
->sst_ndx
[type
]) != 0)
133 ret
= s
->sst_ndx
[type
] = s
->sst_cur
;
134 s
->sst_cur
+= strlen(shstrtab_data
[type
]) + 1;
140 shstrtab_size(const shstrtab_t
*s
)
146 shstrtab_dump(const shstrtab_t
*s
, char *buf
)
151 for (i
= 0; i
< STR_NUM
; i
++) {
152 if ((ndx
= s
->sst_ndx
[i
]) != 0)
153 (void) strcpy(buf
+ ndx
, shstrtab_data
[i
]);
158 dtrace_safe_phdr(Phdr
*phdrp
, struct uarg
*args
, uintptr_t base
)
160 ASSERT(phdrp
->p_type
== PT_SUNWDTRACE
);
163 * See the comment in fasttrap.h for information on how to safely
164 * update this program header.
166 if (phdrp
->p_memsz
< PT_SUNWDTRACE_SIZE
||
167 (phdrp
->p_flags
& (PF_R
| PF_W
| PF_X
)) != (PF_R
| PF_W
| PF_X
))
170 args
->thrptr
= phdrp
->p_vaddr
+ base
;
176 handle_secflag_dt(proc_t
*p
, uint_t dt
, uint_t val
)
182 flag
= PROC_SEC_ASLR
;
189 if (secflag_isset(p
->p_secflags
.psf_lower
, flag
))
191 if ((secpolicy_psecflags(CRED(), p
, p
) != 0) &&
192 secflag_isset(p
->p_secflags
.psf_inherit
, flag
))
195 secflag_clear(&p
->p_secflags
.psf_effective
, flag
);
197 if (!secflag_isset(p
->p_secflags
.psf_upper
, flag
))
200 if ((secpolicy_psecflags(CRED(), p
, p
) != 0) &&
201 !secflag_isset(p
->p_secflags
.psf_inherit
, flag
))
204 secflag_set(&p
->p_secflags
.psf_effective
, flag
);
211 * Map in the executable pointed to by vp. Returns 0 on success.
214 mapexec_brand(vnode_t
*vp
, uarg_t
*args
, Ehdr
*ehdr
, Addr
*uphdr_vaddr
,
215 intptr_t *voffset
, caddr_t exec_file
, int *interp
, caddr_t
*bssbase
,
216 caddr_t
*brkbase
, size_t *brksize
, uintptr_t *lddatap
)
220 caddr_t phdrbase
= NULL
;
222 int nshdrs
, shstrndx
, nphdrs
;
226 Phdr
*dynphdr
= NULL
;
227 Phdr
*dtrphdr
= NULL
;
233 *lddatap
= (uintptr_t)NULL
;
235 if (error
= execpermissions(vp
, &vat
, args
)) {
236 uprintf("%s: Cannot execute %s\n", exec_file
, args
->pathname
);
240 if ((error
= getelfhead(vp
, CRED(), ehdr
, &nshdrs
, &shstrndx
,
242 (error
= getelfphdr(vp
, CRED(), ehdr
, nphdrs
, &phdrbase
,
244 uprintf("%s: Cannot read %s\n", exec_file
, args
->pathname
);
248 if ((len
= elfsize(ehdr
, nphdrs
, phdrbase
, &lddata
)) == 0) {
249 uprintf("%s: Nothing to load in %s", exec_file
, args
->pathname
);
250 kmem_free(phdrbase
, phdrsize
);
256 if (error
= mapelfexec(vp
, ehdr
, nphdrs
, phdrbase
, &uphdr
, &dynphdr
,
257 &junk
, &dtrphdr
, NULL
, bssbase
, brkbase
, voffset
, &minaddr
,
258 len
, &execsz
, brksize
, B_TRUE
)) {
259 uprintf("%s: Cannot map %s\n", exec_file
, args
->pathname
);
260 kmem_free(phdrbase
, phdrsize
);
265 * Inform our caller if the executable needs an interpreter.
267 *interp
= (dynphdr
== NULL
) ? 0 : 1;
270 * If this is a statically linked executable, voffset should indicate
271 * the address of the executable itself (it normally holds the address
272 * of the interpreter).
274 if (ehdr
->e_type
== ET_EXEC
&& *interp
== 0)
278 *uphdr_vaddr
= uphdr
->p_vaddr
;
280 *uphdr_vaddr
= (Addr
)-1;
283 kmem_free(phdrbase
, phdrsize
);
289 elfexec(vnode_t
*vp
, execa_t
*uap
, uarg_t
*args
, intpdata_t
*idatap
,
290 int level
, long *execsz
, int setid
, caddr_t exec_file
, cred_t
*cred
,
293 caddr_t phdrbase
= NULL
;
303 Phdr
*intphdr
= NULL
;
304 Phdr
*dynamicphdr
= NULL
;
313 Phdr
*dataphdrp
= NULL
;
315 Phdr
*capphdr
= NULL
;
324 struct proc
*p
= ttoproc(curthread
);
325 struct user
*up
= PTOU(p
);
328 aux_entry_t elfargs
[__KERN_NAUXV_IMPL
];
329 char dl_name
[MAXPATHLEN
];
330 char pathbuf
[MAXPATHLEN
];
332 struct execenv exenv
;
333 } *bigwad
; /* kmem_alloc this behemoth so we don't blow stack */
335 int nshdrs
, shstrndx
, nphdrs
;
341 ASSERT(p
->p_model
== DATAMODEL_ILP32
|| p
->p_model
== DATAMODEL_LP64
);
343 bigwad
= kmem_alloc(sizeof (struct bigwad
), KM_SLEEP
);
344 ehdrp
= &bigwad
->ehdr
;
345 dlnp
= bigwad
->dl_name
;
346 pathbufp
= bigwad
->pathbuf
;
349 * Obtain ELF and program header information.
351 if ((error
= getelfhead(vp
, CRED(), ehdrp
, &nshdrs
, &shstrndx
,
353 (error
= getelfphdr(vp
, CRED(), ehdrp
, nphdrs
, &phdrbase
,
358 * Prevent executing an ELF file that has no entry point.
360 if (ehdrp
->e_entry
== 0) {
361 uprintf("%s: Bad entry point\n", exec_file
);
366 * Put data model that we're exec-ing to into the args passed to
367 * exec_args(), so it will know what it is copying to on new stack.
368 * Now that we know whether we are exec-ing a 32-bit or 64-bit
369 * executable, we can set execsz with the appropriate NCARGS.
372 if (ehdrp
->e_ident
[EI_CLASS
] == ELFCLASS32
) {
373 args
->to_model
= DATAMODEL_ILP32
;
374 *execsz
= btopr(SINCR
) + btopr(SSIZE
) + btopr(NCARGS32
-1);
376 args
->to_model
= DATAMODEL_LP64
;
377 args
->stk_prot
&= ~PROT_EXEC
;
378 #if defined(__i386) || defined(__amd64)
379 args
->dat_prot
&= ~PROT_EXEC
;
381 *execsz
= btopr(SINCR
) + btopr(SSIZE
) + btopr(NCARGS64
-1);
384 args
->to_model
= DATAMODEL_ILP32
;
385 *execsz
= btopr(SINCR
) + btopr(SSIZE
) + btopr(NCARGS
-1);
389 * We delay invoking the brand callback until we've figured out
390 * what kind of elf binary we're trying to run, 32-bit or 64-bit.
391 * We do this because now the brand library can just check
392 * args->to_model to see if the target is 32-bit or 64-bit without
393 * having do duplicate all the code above.
395 * The level checks associated with brand handling below are used to
396 * prevent a loop since the brand elfexec function typically comes back
397 * through this function. We must check <= here since the nested
398 * handling in the #! interpreter code will increment the level before
399 * calling gexec to run the final elfexec interpreter.
401 if ((level
<= INTP_MAXDEPTH
) &&
402 (brand_action
!= EBA_NATIVE
) && (PROC_IS_BRANDED(p
))) {
403 error
= BROP(p
)->b_elfexec(vp
, uap
, args
,
404 idatap
, level
+ 1, execsz
, setid
, exec_file
, cred
,
410 * Determine aux size now so that stack can be built
411 * in one shot (except actual copyout of aux image),
412 * determine any non-default stack protections,
413 * and still have this code be machine independent.
415 hsize
= ehdrp
->e_phentsize
;
416 phdrp
= (Phdr
*)phdrbase
;
417 for (i
= nphdrs
; i
> 0; i
--) {
418 switch (phdrp
->p_type
) {
420 hasauxv
= hasintp
= 1;
426 args
->stk_prot
= PROT_USER
;
427 if (phdrp
->p_flags
& PF_R
)
428 args
->stk_prot
|= PROT_READ
;
429 if (phdrp
->p_flags
& PF_W
)
430 args
->stk_prot
|= PROT_WRITE
;
431 if (phdrp
->p_flags
& PF_X
)
432 args
->stk_prot
|= PROT_EXEC
;
444 phdrp
= (Phdr
*)((caddr_t
)phdrp
+ hsize
);
447 if (ehdrp
->e_type
!= ET_EXEC
) {
452 /* Copy BSS permissions to args->dat_prot */
453 if (dataphdrp
!= NULL
) {
454 args
->dat_prot
= PROT_USER
;
455 if (dataphdrp
->p_flags
& PF_R
)
456 args
->dat_prot
|= PROT_READ
;
457 if (dataphdrp
->p_flags
& PF_W
)
458 args
->dat_prot
|= PROT_WRITE
;
459 if (dataphdrp
->p_flags
& PF_X
)
460 args
->dat_prot
|= PROT_EXEC
;
464 * If a auxvector will be required - reserve the space for
465 * it now. This may be increased by exec_args if there are
466 * ISA-specific types (included in __KERN_NAUXV_IMPL).
470 * If a AUX vector is being built - the base AUX
479 * AT_SUN_PLATFORM (added in stk_copyout)
480 * AT_SUN_EXECNAME (added in stk_copyout)
485 if (hasintp
&& hasu
) {
487 * Has PT_INTERP & PT_PHDR - the auxvectors that
498 args
->auxsize
= (9 + 5) * sizeof (aux_entry_t
);
499 } else if (hasintp
) {
501 * Has PT_INTERP but no PT_PHDR
508 args
->auxsize
= (9 + 2) * sizeof (aux_entry_t
);
510 args
->auxsize
= 9 * sizeof (aux_entry_t
);
517 * If this binary is using an emulator, we need to add an
518 * AT_SUN_EMULATOR aux entry.
520 if (args
->emulator
!= NULL
)
521 args
->auxsize
+= sizeof (aux_entry_t
);
524 * On supported kernels (x86_64) make room in the auxv for the
525 * AT_SUN_COMMPAGE entry. This will go unpopulated on i86xpv systems
526 * which do not provide such functionality.
528 * Additionally cover the floating point information AT_SUN_FPSIZE and
532 args
->auxsize
+= 3 * sizeof (aux_entry_t
);
533 #endif /* defined(__amd64) */
535 if ((brand_action
!= EBA_NATIVE
) && (PROC_IS_BRANDED(p
))) {
538 * We will be adding 4 entries to the aux vectors. One for
539 * the the brandname and 3 for the brand specific aux vectors.
541 args
->auxsize
+= 4 * sizeof (aux_entry_t
);
544 /* If the binary has an explicit ASLR flag, it must be honoured */
545 if ((dynamicphdr
!= NULL
) &&
546 (dynamicphdr
->p_filesz
> 0)) {
550 #define DYN_STRIDE 100
551 for (i
= 0; i
< dynamicphdr
->p_filesz
;
552 i
+= sizeof (*dyn
) * DYN_STRIDE
) {
553 int ndyns
= (dynamicphdr
->p_filesz
- i
) / sizeof (*dyn
);
556 ndyns
= MIN(DYN_STRIDE
, ndyns
);
557 dynsize
= ndyns
* sizeof (*dyn
);
559 dyn
= kmem_alloc(dynsize
, KM_SLEEP
);
561 if ((error
= vn_rdwr(UIO_READ
, vp
, (caddr_t
)dyn
,
562 dynsize
, (offset_t
)(dynamicphdr
->p_offset
+ i
),
564 CRED(), &resid
)) != 0) {
565 uprintf("%s: cannot read .dynamic section\n",
570 for (dp
= dyn
; dp
< (dyn
+ ndyns
); dp
++) {
571 if (dp
->d_tag
== DT_SUNW_ASLR
) {
572 if ((error
= handle_secflag_dt(p
,
574 dp
->d_un
.d_val
)) != 0) {
575 uprintf("%s: error setting "
576 "security-flag from "
577 "DT_SUNW_ASLR: %d\n",
584 kmem_free(dyn
, dynsize
);
588 /* Hardware/Software capabilities */
589 if (capphdr
!= NULL
&&
590 (capsize
= capphdr
->p_filesz
) > 0 &&
591 capsize
<= 16 * sizeof (*cap
)) {
592 int ncaps
= capsize
/ sizeof (*cap
);
595 cap
= kmem_alloc(capsize
, KM_SLEEP
);
596 if ((error
= vn_rdwr(UIO_READ
, vp
, (caddr_t
)cap
,
597 capsize
, (offset_t
)capphdr
->p_offset
,
598 UIO_SYSSPACE
, 0, 0, CRED(), &resid
)) != 0) {
599 uprintf("%s: Cannot read capabilities section\n",
603 for (cp
= cap
; cp
< cap
+ ncaps
; cp
++) {
604 if (cp
->c_tag
== CA_SUNW_SF_1
&&
605 (cp
->c_un
.c_val
& SF1_SUNW_ADDR32
)) {
606 if (args
->to_model
== DATAMODEL_LP64
)
613 aux
= bigwad
->elfargs
;
615 * Move args to the user's stack.
616 * This can fill in the AT_SUN_PLATFORM and AT_SUN_EXECNAME aux entries.
618 if ((error
= exec_args(uap
, args
, idatap
, (void **)&aux
)) != 0) {
625 /* we're single threaded after this point */
628 * If this is an ET_DYN executable (shared object),
629 * determine its memory size so that mapelfexec() can load it.
631 if (ehdrp
->e_type
== ET_DYN
)
632 len
= elfsize(ehdrp
, nphdrs
, phdrbase
, NULL
);
638 if ((error
= mapelfexec(vp
, ehdrp
, nphdrs
, phdrbase
, &uphdr
, &intphdr
,
639 &stphdr
, &dtrphdr
, dataphdrp
, &bssbase
, &brkbase
, &voffset
, NULL
,
640 len
, execsz
, &brksize
, B_TRUE
)) != 0)
643 if (uphdr
!= NULL
&& intphdr
== NULL
)
646 if (dtrphdr
!= NULL
&& dtrace_safe_phdr(dtrphdr
, args
, voffset
) != 0) {
647 uprintf("%s: Bad DTrace phdr in %s\n", exec_file
, exec_file
);
651 if (intphdr
!= NULL
) {
657 dlnsize
= intphdr
->p_filesz
;
659 if (dlnsize
> MAXPATHLEN
|| dlnsize
<= 0)
663 * Read in "interpreter" pathname.
665 if ((error
= vn_rdwr(UIO_READ
, vp
, dlnp
, intphdr
->p_filesz
,
666 (offset_t
)intphdr
->p_offset
, UIO_SYSSPACE
, 0, 0,
667 CRED(), &resid
)) != 0) {
668 uprintf("%s: Cannot obtain interpreter pathname\n",
673 if (resid
!= 0 || dlnp
[dlnsize
- 1] != '\0')
677 * Search for '$ORIGIN' token in interpreter path.
678 * If found, expand it.
680 for (p
= dlnp
; p
= strchr(p
, '$'); ) {
684 if (strncmp(++p
, ORIGIN_STR
, ORIGIN_STR_SIZE
))
688 * We don't support $ORIGIN on setid programs to close
689 * a potential attack vector.
691 if ((setid
& EXECSETID_SETID
) != 0) {
699 bcopy(dlnp
, pathbufp
, len
);
702 if (_ptr
= strrchr(args
->pathname
, '/')) {
703 len
= _ptr
- args
->pathname
;
704 if ((curlen
+ len
) > MAXPATHLEN
)
707 bcopy(args
->pathname
, &pathbufp
[curlen
], len
);
711 * executable is a basename found in the
712 * current directory. So - just substitue
715 pathbufp
[curlen
] = '.';
718 p
+= ORIGIN_STR_SIZE
;
721 if ((curlen
+ len
) > MAXPATHLEN
)
723 bcopy(p
, &pathbufp
[curlen
], len
);
725 pathbufp
[curlen
++] = '\0';
726 bcopy(pathbufp
, dlnp
, curlen
);
730 * /usr/lib/ld.so.1 is known to be a symlink to /lib/ld.so.1
731 * (and /usr/lib/64/ld.so.1 is a symlink to /lib/64/ld.so.1).
732 * Just in case /usr is not mounted, change it now.
734 if (strcmp(dlnp
, USR_LIB_RTLD
) == 0)
736 error
= lookupname(dlnp
, UIO_SYSSPACE
, FOLLOW
, NULLVPP
, &nvp
);
737 if (error
&& dlnp
!= bigwad
->dl_name
) {
738 /* new kernel, old user-level */
739 error
= lookupname(dlnp
-= 4, UIO_SYSSPACE
, FOLLOW
,
743 uprintf("%s: Cannot find %s\n", exec_file
, dlnp
);
748 * Setup the "aux" vector.
751 if ((ehdrp
->e_type
== ET_DYN
) &&
752 (ehdrp
->e_entry
== 0)) {
753 /* don't use the first page */
754 bigwad
->exenv
.ex_brkbase
= (caddr_t
)PAGESIZE
;
755 bigwad
->exenv
.ex_bssbase
= (caddr_t
)PAGESIZE
;
757 bigwad
->exenv
.ex_bssbase
= bssbase
;
758 bigwad
->exenv
.ex_brkbase
= brkbase
;
760 bigwad
->exenv
.ex_brksize
= brksize
;
761 bigwad
->exenv
.ex_magic
= elfmagic
;
762 bigwad
->exenv
.ex_vp
= vp
;
763 setexecenv(&bigwad
->exenv
);
765 ADDAUX(aux
, AT_PHDR
, uphdr
->p_vaddr
+ voffset
)
766 ADDAUX(aux
, AT_PHENT
, ehdrp
->e_phentsize
)
767 ADDAUX(aux
, AT_PHNUM
, nphdrs
)
768 ADDAUX(aux
, AT_ENTRY
, ehdrp
->e_entry
+ voffset
)
770 if ((error
= execopen(&vp
, &fd
)) != 0) {
775 ADDAUX(aux
, AT_EXECFD
, fd
)
778 if ((error
= execpermissions(nvp
, &bigwad
->vattr
, args
)) != 0) {
780 uprintf("%s: Cannot execute %s\n", exec_file
, dlnp
);
785 * Now obtain the ELF header along with the entire program
786 * header contained in "nvp".
788 kmem_free(phdrbase
, phdrsize
);
790 if ((error
= getelfhead(nvp
, CRED(), ehdrp
, &nshdrs
,
791 &shstrndx
, &nphdrs
)) != 0 ||
792 (error
= getelfphdr(nvp
, CRED(), ehdrp
, nphdrs
, &phdrbase
,
795 uprintf("%s: Cannot read %s\n", exec_file
, dlnp
);
800 * Determine memory size of the "interpreter's" loadable
801 * sections. This size is then used to obtain the virtual
802 * address of a hole, in the user's address space, large
803 * enough to map the "interpreter".
805 if ((len
= elfsize(ehdrp
, nphdrs
, phdrbase
, &lddata
)) == 0) {
807 uprintf("%s: Nothing to load in %s\n", exec_file
, dlnp
);
813 error
= mapelfexec(nvp
, ehdrp
, nphdrs
, phdrbase
, &junk
, &junk
,
814 &junk
, &dtrphdr
, NULL
, NULL
, NULL
, &voffset
, NULL
, len
,
815 execsz
, NULL
, B_FALSE
);
816 if (error
|| junk
!= NULL
) {
818 uprintf("%s: Cannot map %s\n", exec_file
, dlnp
);
823 * We use the DTrace program header to initialize the
824 * architecture-specific user per-LWP location. The dtrace
825 * fasttrap provider requires ready access to per-LWP scratch
826 * space. We assume that there is only one such program header
827 * in the interpreter.
829 if (dtrphdr
!= NULL
&&
830 dtrace_safe_phdr(dtrphdr
, args
, voffset
) != 0) {
832 uprintf("%s: Bad DTrace phdr in %s\n", exec_file
, dlnp
);
837 ADDAUX(aux
, AT_SUN_LDDATA
, voffset
+ lddata
)
841 int auxf
= AF_SUN_HWCAPVERIFY
;
846 * Note: AT_SUN_PLATFORM and AT_SUN_EXECNAME were filled in via
849 ADDAUX(aux
, AT_BASE
, voffset
)
850 ADDAUX(aux
, AT_FLAGS
, at_flags
)
851 ADDAUX(aux
, AT_PAGESZ
, PAGESIZE
)
853 * Linker flags. (security)
854 * p_flag not yet set at this time.
855 * We rely on gexec() to provide us with the information.
856 * If the application is set-uid but this is not reflected
857 * in a mismatch between real/effective uids/gids, then
858 * don't treat this as a set-uid exec. So we care about
859 * the EXECSETID_UGIDS flag but not the ...SETID flag.
861 if ((setid
&= ~EXECSETID_SETID
) != 0)
862 auxf
|= AF_SUN_SETUGID
;
865 * If we're running a native process from within a branded
866 * zone under pfexec then we clear the AF_SUN_SETUGID flag so
867 * that the native ld.so.1 is able to link with the native
868 * libraries instead of using the brand libraries that are
869 * installed in the zone. We only do this for processes
870 * which we trust because we see they are already running
871 * under pfexec (where uid != euid). This prevents a
872 * malicious user within the zone from crafting a wrapper to
873 * run native suid commands with unsecure libraries interposed.
875 if ((brand_action
== EBA_NATIVE
) && (PROC_IS_BRANDED(p
) &&
876 (setid
&= ~EXECSETID_SETID
) != 0))
877 auxf
&= ~AF_SUN_SETUGID
;
880 * Record the user addr of the auxflags aux vector entry
881 * since brands may optionally want to manipulate this field.
883 args
->auxp_auxflags
=
884 (char *)((char *)args
->stackend
+
885 ((char *)&aux
->a_type
-
886 (char *)bigwad
->elfargs
));
887 ADDAUX(aux
, AT_SUN_AUXFLAGS
, auxf
);
890 * Hardware capability flag word (performance hints)
891 * Used for choosing faster library routines.
892 * (Potentially different between 32-bit and 64-bit ABIs)
895 if (args
->to_model
== DATAMODEL_NATIVE
) {
896 ADDAUX(aux
, AT_SUN_HWCAP
, auxv_hwcap
)
897 ADDAUX(aux
, AT_SUN_HWCAP2
, auxv_hwcap_2
)
899 ADDAUX(aux
, AT_SUN_HWCAP
, auxv_hwcap32
)
900 ADDAUX(aux
, AT_SUN_HWCAP2
, auxv_hwcap32_2
)
903 ADDAUX(aux
, AT_SUN_HWCAP
, auxv_hwcap
)
904 ADDAUX(aux
, AT_SUN_HWCAP2
, auxv_hwcap_2
)
908 * Reserve space for the brand-private aux vectors,
909 * and record the user addr of that space.
912 (char *)((char *)args
->stackend
+
913 ((char *)&aux
->a_type
-
914 (char *)bigwad
->elfargs
));
915 ADDAUX(aux
, AT_SUN_BRAND_AUX1
, 0)
916 ADDAUX(aux
, AT_SUN_BRAND_AUX2
, 0)
917 ADDAUX(aux
, AT_SUN_BRAND_AUX3
, 0)
921 * Add the comm page auxv entry, mapping it in if needed. Also
922 * take care of the FPU entries.
925 if (args
->commpage
!= 0 ||
926 (args
->commpage
= (uintptr_t)comm_page_mapin()) != 0) {
927 ADDAUX(aux
, AT_SUN_COMMPAGE
, args
->commpage
)
930 * If the comm page cannot be mapped, pad out the auxv
931 * to satisfy later size checks.
933 ADDAUX(aux
, AT_NULL
, 0)
936 fptype
= AT_386_FPINFO_NONE
;
937 fpu_auxv_info(&fptype
, &fpsize
);
938 if (fptype
!= AT_386_FPINFO_NONE
) {
939 ADDAUX(aux
, AT_SUN_FPTYPE
, fptype
)
940 ADDAUX(aux
, AT_SUN_FPSIZE
, fpsize
)
942 ADDAUX(aux
, AT_NULL
, 0)
943 ADDAUX(aux
, AT_NULL
, 0)
945 #endif /* defined(__amd64) */
947 ADDAUX(aux
, AT_NULL
, 0)
948 postfixsize
= (char *)aux
- (char *)bigwad
->elfargs
;
951 * We make assumptions above when we determine how many aux
952 * vector entries we will be adding. However, if we have an
953 * invalid elf file, it is possible that mapelfexec might
954 * behave differently (but not return an error), in which case
955 * the number of aux entries we actually add will be different.
956 * We detect that now and error out.
958 if (postfixsize
!= args
->auxsize
) {
959 DTRACE_PROBE2(elfexec_badaux
, int, postfixsize
,
963 ASSERT(postfixsize
<= __KERN_NAUXV_IMPL
* sizeof (aux_entry_t
));
967 * For the 64-bit kernel, the limit is big enough that rounding it up
968 * to a page can overflow the 64-bit limit, so we check for btopr()
969 * overflowing here by comparing it with the unrounded limit in pages.
970 * If it hasn't overflowed, compare the exec size with the rounded up
971 * limit in pages. Otherwise, just compare with the unrounded limit.
973 limit
= btop(p
->p_vmem_ctl
);
974 roundlimit
= btopr(p
->p_vmem_ctl
);
975 if ((roundlimit
> limit
&& *execsz
> roundlimit
) ||
976 (roundlimit
< limit
&& *execsz
> limit
)) {
977 mutex_enter(&p
->p_lock
);
978 (void) rctl_action(rctlproc_legacy
[RLIMIT_VMEM
], p
->p_rctls
, p
,
980 mutex_exit(&p
->p_lock
);
985 bzero(up
->u_auxv
, sizeof (up
->u_auxv
));
986 up
->u_commpagep
= args
->commpage
;
991 * Copy the aux vector to the user stack.
993 error
= execpoststack(args
, bigwad
->elfargs
, postfixsize
);
998 * Copy auxv to the process's user structure for use by /proc.
999 * If this is a branded process, the brand's exec routine will
1000 * copy it's private entries to the user structure later. It
1001 * relies on the fact that the blank entries are at the end.
1003 num_auxv
= postfixsize
/ sizeof (aux_entry_t
);
1004 ASSERT(num_auxv
<= sizeof (up
->u_auxv
) / sizeof (auxv_t
));
1005 aux
= bigwad
->elfargs
;
1006 for (i
= 0; i
< num_auxv
; i
++) {
1007 up
->u_auxv
[i
].a_type
= aux
[i
].a_type
;
1008 up
->u_auxv
[i
].a_un
.a_val
= (aux_val_t
)aux
[i
].a_un
.a_val
;
1013 * Pass back the starting address so we can set the program counter.
1015 args
->entry
= (uintptr_t)(ehdrp
->e_entry
+ voffset
);
1018 if (ehdrp
->e_type
== ET_DYN
) {
1020 * If we are executing a shared library which doesn't
1021 * have a interpreter (probably ld.so.1) then
1022 * we don't set the brkbase now. Instead we
1023 * delay it's setting until the first call
1024 * via grow.c::brk(). This permits ld.so.1 to
1025 * initialize brkbase to the tail of the executable it
1026 * loads (which is where it needs to be).
1028 bigwad
->exenv
.ex_brkbase
= (caddr_t
)0;
1029 bigwad
->exenv
.ex_bssbase
= (caddr_t
)0;
1030 bigwad
->exenv
.ex_brksize
= 0;
1032 bigwad
->exenv
.ex_brkbase
= brkbase
;
1033 bigwad
->exenv
.ex_bssbase
= bssbase
;
1034 bigwad
->exenv
.ex_brksize
= brksize
;
1036 bigwad
->exenv
.ex_magic
= elfmagic
;
1037 bigwad
->exenv
.ex_vp
= vp
;
1038 setexecenv(&bigwad
->exenv
);
1045 if (fd
!= -1) /* did we open the a.out yet */
1046 (void) execclose(fd
);
1048 psignal(p
, SIGKILL
);
1053 if (phdrbase
!= NULL
)
1054 kmem_free(phdrbase
, phdrsize
);
1056 kmem_free(cap
, capsize
);
1057 kmem_free(bigwad
, sizeof (struct bigwad
));
1062 * Compute the memory size requirement for the ELF file.
1065 elfsize(Ehdr
*ehdrp
, int nphdrs
, caddr_t phdrbase
, uintptr_t *lddata
)
1068 Phdr
*phdrp
= (Phdr
*)phdrbase
;
1069 int hsize
= ehdrp
->e_phentsize
;
1071 int dfirst
= 1; /* first data segment */
1072 uintptr_t loaddr
= 0;
1073 uintptr_t hiaddr
= 0;
1077 for (i
= nphdrs
; i
> 0; i
--) {
1078 if (phdrp
->p_type
== PT_LOAD
) {
1079 lo
= phdrp
->p_vaddr
;
1080 hi
= lo
+ phdrp
->p_memsz
;
1093 * save the address of the first data segment
1094 * of a object - used for the AT_SUNW_LDDATA
1097 if ((lddata
!= NULL
) && dfirst
&&
1098 (phdrp
->p_flags
& PF_W
)) {
1103 phdrp
= (Phdr
*)((caddr_t
)phdrp
+ hsize
);
1106 len
= hiaddr
- (loaddr
& PAGEMASK
);
1107 len
= roundup(len
, PAGESIZE
);
1113 * Read in the ELF header and program header table.
1115 * ENOEXEC File format is not recognized
1116 * EINVAL Format recognized but execution not supported
1119 getelfhead(vnode_t
*vp
, cred_t
*credp
, Ehdr
*ehdr
, int *nshdrs
, int *shstrndx
,
1126 * We got here by the first two bytes in ident,
1127 * now read the entire ELF header.
1129 if ((error
= vn_rdwr(UIO_READ
, vp
, (caddr_t
)ehdr
,
1130 sizeof (Ehdr
), 0, UIO_SYSSPACE
, 0,
1131 0, credp
, &resid
)) != 0)
1135 * Since a separate version is compiled for handling 32-bit and
1136 * 64-bit ELF executables on a 64-bit kernel, the 64-bit version
1137 * doesn't need to be able to deal with 32-bit ELF files.
1140 ehdr
->e_ident
[EI_MAG2
] != ELFMAG2
||
1141 ehdr
->e_ident
[EI_MAG3
] != ELFMAG3
)
1144 if ((ehdr
->e_type
!= ET_EXEC
&& ehdr
->e_type
!= ET_DYN
) ||
1145 #if defined(_ILP32) || defined(_ELF32_COMPAT)
1146 ehdr
->e_ident
[EI_CLASS
] != ELFCLASS32
||
1148 ehdr
->e_ident
[EI_CLASS
] != ELFCLASS64
||
1150 !elfheadcheck(ehdr
->e_ident
[EI_DATA
], ehdr
->e_machine
,
1154 *nshdrs
= ehdr
->e_shnum
;
1155 *shstrndx
= ehdr
->e_shstrndx
;
1156 *nphdrs
= ehdr
->e_phnum
;
1159 * If e_shnum, e_shstrndx, or e_phnum is its sentinel value, we need
1160 * to read in the section header at index zero to acces the true
1161 * values for those fields.
1163 if ((*nshdrs
== 0 && ehdr
->e_shoff
!= 0) ||
1164 *shstrndx
== SHN_XINDEX
|| *nphdrs
== PN_XNUM
) {
1167 if (ehdr
->e_shoff
== 0)
1170 if ((error
= vn_rdwr(UIO_READ
, vp
, (caddr_t
)&shdr
,
1171 sizeof (shdr
), (offset_t
)ehdr
->e_shoff
, UIO_SYSSPACE
, 0,
1172 0, credp
, &resid
)) != 0)
1176 *nshdrs
= shdr
.sh_size
;
1177 if (*shstrndx
== SHN_XINDEX
)
1178 *shstrndx
= shdr
.sh_link
;
1179 if (*nphdrs
== PN_XNUM
&& shdr
.sh_info
!= 0)
1180 *nphdrs
= shdr
.sh_info
;
1186 #ifdef _ELF32_COMPAT
1187 extern size_t elf_nphdr_max
;
1189 size_t elf_nphdr_max
= 1000;
1193 getelfphdr(vnode_t
*vp
, cred_t
*credp
, const Ehdr
*ehdr
, int nphdrs
,
1194 caddr_t
*phbasep
, ssize_t
*phsizep
)
1196 ssize_t resid
, minsize
;
1200 * Since we're going to be using e_phentsize to iterate down the
1201 * array of program headers, it must be 8-byte aligned or else
1202 * a we might cause a misaligned access. We use all members through
1203 * p_flags on 32-bit ELF files and p_memsz on 64-bit ELF files so
1204 * e_phentsize must be at least large enough to include those
1207 #if !defined(_LP64) || defined(_ELF32_COMPAT)
1208 minsize
= offsetof(Phdr
, p_flags
) + sizeof (((Phdr
*)NULL
)->p_flags
);
1210 minsize
= offsetof(Phdr
, p_memsz
) + sizeof (((Phdr
*)NULL
)->p_memsz
);
1212 if (ehdr
->e_phentsize
< minsize
|| (ehdr
->e_phentsize
& 3))
1215 *phsizep
= nphdrs
* ehdr
->e_phentsize
;
1217 if (*phsizep
> sizeof (Phdr
) * elf_nphdr_max
) {
1218 if ((*phbasep
= kmem_alloc(*phsizep
, KM_NOSLEEP
)) == NULL
)
1221 *phbasep
= kmem_alloc(*phsizep
, KM_SLEEP
);
1224 if ((err
= vn_rdwr(UIO_READ
, vp
, *phbasep
, *phsizep
,
1225 (offset_t
)ehdr
->e_phoff
, UIO_SYSSPACE
, 0, 0,
1226 credp
, &resid
)) != 0) {
1227 kmem_free(*phbasep
, *phsizep
);
1235 #ifdef _ELF32_COMPAT
1236 extern size_t elf_nshdr_max
;
1237 extern size_t elf_shstrtab_max
;
1239 size_t elf_nshdr_max
= 10000;
1240 size_t elf_shstrtab_max
= 100 * 1024;
1245 getelfshdr(vnode_t
*vp
, cred_t
*credp
, const Ehdr
*ehdr
,
1246 int nshdrs
, int shstrndx
, caddr_t
*shbasep
, ssize_t
*shsizep
,
1247 char **shstrbasep
, ssize_t
*shstrsizep
)
1249 ssize_t resid
, minsize
;
1254 * Since we're going to be using e_shentsize to iterate down the
1255 * array of section headers, it must be 8-byte aligned or else
1256 * a we might cause a misaligned access. We use all members through
1257 * sh_entsize (on both 32- and 64-bit ELF files) so e_shentsize
1258 * must be at least large enough to include that member. The index
1259 * of the string table section must also be valid.
1261 minsize
= offsetof(Shdr
, sh_entsize
) + sizeof (shdr
->sh_entsize
);
1262 if (ehdr
->e_shentsize
< minsize
|| (ehdr
->e_shentsize
& 3) ||
1266 *shsizep
= nshdrs
* ehdr
->e_shentsize
;
1268 if (*shsizep
> sizeof (Shdr
) * elf_nshdr_max
) {
1269 if ((*shbasep
= kmem_alloc(*shsizep
, KM_NOSLEEP
)) == NULL
)
1272 *shbasep
= kmem_alloc(*shsizep
, KM_SLEEP
);
1275 if ((err
= vn_rdwr(UIO_READ
, vp
, *shbasep
, *shsizep
,
1276 (offset_t
)ehdr
->e_shoff
, UIO_SYSSPACE
, 0, 0,
1277 credp
, &resid
)) != 0) {
1278 kmem_free(*shbasep
, *shsizep
);
1283 * Pull the section string table out of the vnode; fail if the size
1286 shdr
= (Shdr
*)(*shbasep
+ shstrndx
* ehdr
->e_shentsize
);
1287 if ((*shstrsizep
= shdr
->sh_size
) == 0) {
1288 kmem_free(*shbasep
, *shsizep
);
1292 if (*shstrsizep
> elf_shstrtab_max
) {
1293 if ((*shstrbasep
= kmem_alloc(*shstrsizep
,
1294 KM_NOSLEEP
)) == NULL
) {
1295 kmem_free(*shbasep
, *shsizep
);
1299 *shstrbasep
= kmem_alloc(*shstrsizep
, KM_SLEEP
);
1302 if ((err
= vn_rdwr(UIO_READ
, vp
, *shstrbasep
, *shstrsizep
,
1303 (offset_t
)shdr
->sh_offset
, UIO_SYSSPACE
, 0, 0,
1304 credp
, &resid
)) != 0) {
1305 kmem_free(*shbasep
, *shsizep
);
1306 kmem_free(*shstrbasep
, *shstrsizep
);
1311 * Make sure the strtab is null-terminated to make sure we
1312 * don't run off the end of the table.
1314 (*shstrbasep
)[*shstrsizep
- 1] = '\0';
1341 caddr_t addr
= NULL
;
1346 int hsize
= ehdr
->e_phentsize
;
1347 caddr_t mintmp
= (caddr_t
)-1;
1348 extern int use_brk_lpg
;
1350 if (ehdr
->e_type
== ET_DYN
) {
1351 secflagset_t flags
= 0;
1353 * Obtain the virtual address of a hole in the
1354 * address space to map the "interpreter".
1356 if (secflag_enabled(curproc
, PROC_SEC_ASLR
))
1357 flags
|= _MAP_RANDOMIZE
;
1360 flags
|= _MAP_STARTLOW
;
1362 map_addr(&addr
, len
, 0, 1, flags
);
1365 *voffset
= (intptr_t)addr
;
1368 * Calculate the minimum vaddr so it can be subtracted out.
1369 * According to the ELF specification, since PT_LOAD sections
1370 * must be sorted by increasing p_vaddr values, this is
1371 * guaranteed to be the first PT_LOAD section.
1373 phdr
= (Phdr
*)phdrbase
;
1374 for (i
= nphdrs
; i
> 0; i
--) {
1375 if (phdr
->p_type
== PT_LOAD
) {
1376 *voffset
-= (uintptr_t)phdr
->p_vaddr
;
1379 phdr
= (Phdr
*)((caddr_t
)phdr
+ hsize
);
1385 phdr
= (Phdr
*)phdrbase
;
1386 for (i
= nphdrs
; i
> 0; i
--) {
1387 switch (phdr
->p_type
) {
1389 if ((*intphdr
!= NULL
) && (*uphdr
== NULL
))
1394 if (phdr
->p_flags
& PF_R
)
1396 if (phdr
->p_flags
& PF_W
)
1398 if (phdr
->p_flags
& PF_X
)
1401 addr
= (caddr_t
)((uintptr_t)phdr
->p_vaddr
+ *voffset
);
1404 * Keep track of the segment with the lowest starting
1410 zfodsz
= (size_t)phdr
->p_memsz
- phdr
->p_filesz
;
1412 offset
= phdr
->p_offset
;
1413 if (((uintptr_t)offset
& PAGEOFFSET
) ==
1414 ((uintptr_t)addr
& PAGEOFFSET
) &&
1415 (!(vp
->v_flag
& VNOMAP
))) {
1422 * Set the heap pagesize for OOB when the bss size
1423 * is known and use_brk_lpg is not 0.
1425 if (brksize
!= NULL
&& use_brk_lpg
&&
1426 zfodsz
!= 0 && phdr
== dataphdrp
&&
1427 (prot
& PROT_WRITE
)) {
1428 size_t tlen
= P2NPHASE((uintptr_t)addr
+
1429 phdr
->p_filesz
, PAGESIZE
);
1431 if (zfodsz
> tlen
) {
1432 curproc
->p_brkpageszc
=
1433 page_szc(map_pgsz(MAPPGSZ_HEAP
,
1434 curproc
, addr
+ phdr
->p_filesz
+
1435 tlen
, zfodsz
- tlen
, 0));
1439 if (curproc
->p_brkpageszc
!= 0 && phdr
== dataphdrp
&&
1440 (prot
& PROT_WRITE
)) {
1441 uint_t szc
= curproc
->p_brkpageszc
;
1442 size_t pgsz
= page_get_pagesize(szc
);
1443 caddr_t ebss
= addr
+ phdr
->p_memsz
;
1445 * If we need extra space to keep the BSS an
1446 * integral number of pages in size, some of
1447 * that space may fall beyond p_brkbase, so we
1448 * need to set p_brksize to account for it
1449 * being (logically) part of the brk.
1451 size_t extra_zfodsz
;
1453 ASSERT(pgsz
> PAGESIZE
);
1455 extra_zfodsz
= P2NPHASE((uintptr_t)ebss
, pgsz
);
1457 if (error
= execmap(vp
, addr
, phdr
->p_filesz
,
1458 zfodsz
+ extra_zfodsz
, phdr
->p_offset
,
1461 if (brksize
!= NULL
)
1462 *brksize
= extra_zfodsz
;
1464 if (error
= execmap(vp
, addr
, phdr
->p_filesz
,
1465 zfodsz
, phdr
->p_offset
, prot
, page
, 0))
1469 if (bssbase
!= NULL
&& addr
>= *bssbase
&&
1470 phdr
== dataphdrp
) {
1471 *bssbase
= addr
+ phdr
->p_filesz
;
1473 if (brkbase
!= NULL
&& addr
>= *brkbase
) {
1474 *brkbase
= addr
+ phdr
->p_memsz
;
1477 *execsz
+= btopr(phdr
->p_memsz
);
1509 phdr
= (Phdr
*)((caddr_t
)phdr
+ hsize
);
1512 if (minaddr
!= NULL
) {
1513 ASSERT(mintmp
!= (caddr_t
)-1);
1514 *minaddr
= (intptr_t)mintmp
;
1517 if (brkbase
!= NULL
&& secflag_enabled(curproc
, PROC_SEC_ASLR
)) {
1519 uintptr_t base
= (uintptr_t)*brkbase
;
1520 uintptr_t oend
= base
+ *brksize
;
1522 ASSERT(ISP2(aslr_max_brk_skew
));
1524 (void) random_get_pseudo_bytes((uint8_t *)&off
, sizeof (off
));
1525 base
+= P2PHASE(off
, aslr_max_brk_skew
);
1526 base
= P2ROUNDUP(base
, PAGESIZE
);
1527 *brkbase
= (caddr_t
)base
;
1529 * Above, we set *brksize to account for the possibility we
1530 * had to grow the 'brk' in padding out the BSS to a page
1533 * We now need to adjust that based on where we now are
1534 * actually putting the brk.
1537 *brksize
= oend
- base
;
1550 elfnote(vnode_t
*vp
, offset_t
*offsetp
, int type
, int descsz
, void *desc
,
1551 rlim_t rlimit
, cred_t
*credp
)
1556 bzero(¬e
, sizeof (note
));
1557 bcopy("CORE", note
.name
, 4);
1558 note
.nhdr
.n_type
= type
;
1560 * The System V ABI states that n_namesz must be the length of the
1561 * string that follows the Nhdr structure including the terminating
1562 * null. The ABI also specifies that sufficient padding should be
1563 * included so that the description that follows the name string
1564 * begins on a 4- or 8-byte boundary for 32- and 64-bit binaries
1565 * respectively. However, since this change was not made correctly
1566 * at the time of the 64-bit port, both 32- and 64-bit binaries
1567 * descriptions are only guaranteed to begin on a 4-byte boundary.
1569 note
.nhdr
.n_namesz
= 5;
1570 note
.nhdr
.n_descsz
= roundup(descsz
, sizeof (Word
));
1572 if (error
= core_write(vp
, UIO_SYSSPACE
, *offsetp
, ¬e
,
1573 sizeof (note
), rlimit
, credp
))
1576 *offsetp
+= sizeof (note
);
1578 if (error
= core_write(vp
, UIO_SYSSPACE
, *offsetp
, desc
,
1579 note
.nhdr
.n_descsz
, rlimit
, credp
))
1582 *offsetp
+= note
.nhdr
.n_descsz
;
1587 * Copy the section data from one vnode to the section of another vnode.
1590 copy_scn(Shdr
*src
, vnode_t
*src_vp
, Shdr
*dst
, vnode_t
*dst_vp
, Off
*doffset
,
1591 void *buf
, size_t size
, cred_t
*credp
, rlim_t rlimit
)
1594 size_t len
, n
= src
->sh_size
;
1599 if (vn_rdwr(UIO_READ
, src_vp
, buf
, len
, src
->sh_offset
+ off
,
1600 UIO_SYSSPACE
, 0, 0, credp
, &resid
) != 0 ||
1602 core_write(dst_vp
, UIO_SYSSPACE
, *doffset
+ off
,
1603 buf
, len
- resid
, rlimit
, credp
) != 0) {
1609 ASSERT(n
>= len
- resid
);
1615 *doffset
+= src
->sh_size
;
1618 #ifdef _ELF32_COMPAT
1619 extern size_t elf_datasz_max
;
1621 size_t elf_datasz_max
= 1 * 1024 * 1024;
1625 * This function processes mappings that correspond to load objects to
1626 * examine their respective sections for elfcore(). It's called once with
1627 * v set to NULL to count the number of sections that we're going to need
1628 * and then again with v set to some allocated buffer that we fill in with
1629 * all the section data.
1632 process_scns(core_content_t content
, proc_t
*p
, cred_t
*credp
, vnode_t
*vp
,
1633 Shdr
*v
, int nv
, rlim_t rlimit
, Off
*doffsetp
, int *nshdrsp
)
1635 vnode_t
*lastvp
= NULL
;
1640 shstrtab_t shstrtab
;
1641 struct as
*as
= p
->p_as
;
1645 shstrtab_init(&shstrtab
);
1648 for (seg
= AS_SEGFIRST(as
); seg
!= NULL
; seg
= AS_SEGNEXT(as
, seg
)) {
1652 caddr_t saddr
= seg
->s_base
;
1658 int nshdrs
, shstrndx
, nphdrs
;
1673 * Since we're just looking for text segments of load
1674 * objects, we only care about the protection bits; we don't
1675 * care about the actual size of the segment so we use the
1676 * reserved size. If the segment's size is zero, there's
1677 * something fishy going on so we ignore this segment.
1679 if (seg
->s_ops
!= &segvn_ops
||
1680 segop_getvp(seg
, seg
->s_base
, &mvp
) != 0 ||
1681 mvp
== lastvp
|| mvp
== NULL
|| mvp
->v_type
!= VREG
||
1682 (segsize
= pr_getsegsize(seg
, 1)) == 0)
1685 eaddr
= saddr
+ segsize
;
1686 prot
= pr_getprot(seg
, 1, &tmp
, &saddr
, &naddr
, eaddr
);
1687 pr_getprot_done(&tmp
);
1690 * Skip this segment unless the protection bits look like
1691 * what we'd expect for a text segment.
1693 if ((prot
& (PROT_WRITE
| PROT_EXEC
)) != PROT_EXEC
)
1696 if (getelfhead(mvp
, credp
, &ehdr
, &nshdrs
, &shstrndx
,
1698 getelfshdr(mvp
, credp
, &ehdr
, nshdrs
, shstrndx
,
1699 &shbase
, &shsize
, &shstrbase
, &shstrsize
) != 0)
1702 off
= ehdr
.e_shentsize
;
1703 for (j
= 1; j
< nshdrs
; j
++, off
+= ehdr
.e_shentsize
) {
1704 Shdr
*symtab
= NULL
, *strtab
;
1706 shdr
= (Shdr
*)(shbase
+ off
);
1708 if (shdr
->sh_name
>= shstrsize
)
1711 name
= shstrbase
+ shdr
->sh_name
;
1713 if (strcmp(name
, shstrtab_data
[STR_CTF
]) == 0) {
1714 if ((content
& CC_CONTENT_CTF
) == 0 ||
1718 if (shdr
->sh_link
> 0 &&
1719 shdr
->sh_link
< nshdrs
) {
1720 symtab
= (Shdr
*)(shbase
+
1721 shdr
->sh_link
* ehdr
.e_shentsize
);
1724 if (v
!= NULL
&& i
< nv
- 1) {
1725 if (shdr
->sh_size
> datasz
&&
1726 shdr
->sh_size
<= elf_datasz_max
) {
1728 kmem_free(data
, datasz
);
1730 datasz
= shdr
->sh_size
;
1731 data
= kmem_alloc(datasz
,
1735 v
[i
].sh_name
= shstrtab_ndx(&shstrtab
,
1737 v
[i
].sh_addr
= (Addr
)(uintptr_t)saddr
;
1738 v
[i
].sh_type
= SHT_PROGBITS
;
1739 v
[i
].sh_addralign
= 4;
1740 *doffsetp
= roundup(*doffsetp
,
1742 v
[i
].sh_offset
= *doffsetp
;
1743 v
[i
].sh_size
= shdr
->sh_size
;
1744 if (symtab
== NULL
) {
1746 } else if (symtab
->sh_type
==
1752 v
[i
].sh_link
= i
+ 1;
1755 copy_scn(shdr
, mvp
, &v
[i
], vp
,
1756 doffsetp
, data
, datasz
, credp
,
1763 * We've already dumped the symtab.
1765 if (symtab
!= NULL
&&
1766 symtab
->sh_type
== SHT_SYMTAB
&&
1770 } else if (strcmp(name
,
1771 shstrtab_data
[STR_SYMTAB
]) == 0) {
1772 if ((content
& CC_CONTENT_SYMTAB
) == 0 ||
1779 if (symtab
!= NULL
) {
1780 if ((symtab
->sh_type
!= SHT_DYNSYM
&&
1781 symtab
->sh_type
!= SHT_SYMTAB
) ||
1782 symtab
->sh_link
== 0 ||
1783 symtab
->sh_link
>= nshdrs
)
1786 strtab
= (Shdr
*)(shbase
+
1787 symtab
->sh_link
* ehdr
.e_shentsize
);
1789 if (strtab
->sh_type
!= SHT_STRTAB
)
1792 if (v
!= NULL
&& i
< nv
- 2) {
1793 sz
= MAX(symtab
->sh_size
,
1796 sz
<= elf_datasz_max
) {
1798 kmem_free(data
, datasz
);
1801 data
= kmem_alloc(datasz
,
1805 if (symtab
->sh_type
== SHT_DYNSYM
) {
1806 v
[i
].sh_name
= shstrtab_ndx(
1807 &shstrtab
, STR_DYNSYM
);
1808 v
[i
+ 1].sh_name
= shstrtab_ndx(
1809 &shstrtab
, STR_DYNSTR
);
1811 v
[i
].sh_name
= shstrtab_ndx(
1812 &shstrtab
, STR_SYMTAB
);
1813 v
[i
+ 1].sh_name
= shstrtab_ndx(
1814 &shstrtab
, STR_STRTAB
);
1817 v
[i
].sh_type
= symtab
->sh_type
;
1818 v
[i
].sh_addr
= symtab
->sh_addr
;
1819 if (ehdr
.e_type
== ET_DYN
||
1822 (Addr
)(uintptr_t)saddr
;
1824 symtab
->sh_addralign
;
1825 *doffsetp
= roundup(*doffsetp
,
1827 v
[i
].sh_offset
= *doffsetp
;
1828 v
[i
].sh_size
= symtab
->sh_size
;
1829 v
[i
].sh_link
= i
+ 1;
1830 v
[i
].sh_entsize
= symtab
->sh_entsize
;
1831 v
[i
].sh_info
= symtab
->sh_info
;
1833 copy_scn(symtab
, mvp
, &v
[i
], vp
,
1834 doffsetp
, data
, datasz
, credp
,
1837 v
[i
+ 1].sh_type
= SHT_STRTAB
;
1838 v
[i
+ 1].sh_flags
= SHF_STRINGS
;
1839 v
[i
+ 1].sh_addr
= symtab
->sh_addr
;
1840 if (ehdr
.e_type
== ET_DYN
||
1841 v
[i
+ 1].sh_addr
== 0)
1843 (Addr
)(uintptr_t)saddr
;
1844 v
[i
+ 1].sh_addralign
=
1845 strtab
->sh_addralign
;
1846 *doffsetp
= roundup(*doffsetp
,
1847 v
[i
+ 1].sh_addralign
);
1848 v
[i
+ 1].sh_offset
= *doffsetp
;
1849 v
[i
+ 1].sh_size
= strtab
->sh_size
;
1851 copy_scn(strtab
, mvp
, &v
[i
+ 1], vp
,
1852 doffsetp
, data
, datasz
, credp
,
1856 if (symtab
->sh_type
== SHT_SYMTAB
)
1862 kmem_free(shstrbase
, shstrsize
);
1863 kmem_free(shbase
, shsize
);
1877 cmn_err(CE_WARN
, "elfcore: core dump failed for "
1878 "process %d; address space is changing", p
->p_pid
);
1883 v
[i
].sh_name
= shstrtab_ndx(&shstrtab
, STR_SHSTRTAB
);
1884 v
[i
].sh_size
= shstrtab_size(&shstrtab
);
1885 v
[i
].sh_addralign
= 1;
1886 *doffsetp
= roundup(*doffsetp
, v
[i
].sh_addralign
);
1887 v
[i
].sh_offset
= *doffsetp
;
1888 v
[i
].sh_flags
= SHF_STRINGS
;
1889 v
[i
].sh_type
= SHT_STRTAB
;
1891 if (v
[i
].sh_size
> datasz
) {
1893 kmem_free(data
, datasz
);
1895 datasz
= v
[i
].sh_size
;
1896 data
= kmem_alloc(datasz
,
1900 shstrtab_dump(&shstrtab
, data
);
1902 if ((error
= core_write(vp
, UIO_SYSSPACE
, *doffsetp
,
1903 data
, v
[i
].sh_size
, rlimit
, credp
)) != 0)
1906 *doffsetp
+= v
[i
].sh_size
;
1910 kmem_free(data
, datasz
);
1916 elfcore(vnode_t
*vp
, proc_t
*p
, cred_t
*credp
, rlim_t rlimit
, int sig
,
1917 core_content_t content
)
1919 offset_t poffset
, soffset
;
1921 int error
, i
, nphdrs
, nshdrs
;
1924 struct as
*as
= p
->p_as
;
1931 size_t phdrsz
, shdrsz
;
1939 klwp_t
*lwp
= ttolwp(curthread
);
1943 * Make sure we have everything we need (registers, etc.).
1944 * All other lwps have already stopped and are in an orderly state.
1946 ASSERT(p
== ttoproc(curthread
));
1949 AS_LOCK_ENTER(as
, RW_WRITER
);
1950 nphdrs
= prnsegs(as
, 0) + 2; /* two CORE note sections */
1953 * Count the number of section headers we're going to need.
1956 if (content
& (CC_CONTENT_CTF
| CC_CONTENT_SYMTAB
)) {
1957 (void) process_scns(content
, p
, credp
, NULL
, NULL
, 0, 0,
1962 ASSERT(nshdrs
== 0 || nshdrs
> 1);
1965 * The core file contents may required zero section headers, but if
1966 * we overflow the 16 bits allotted to the program header count in
1967 * the ELF header, we'll need that program header at index zero.
1969 if (nshdrs
== 0 && nphdrs
>= PN_XNUM
)
1972 phdrsz
= nphdrs
* sizeof (Phdr
);
1973 shdrsz
= nshdrs
* sizeof (Shdr
);
1975 bigsize
= MAX(sizeof (*bigwad
), MAX(phdrsz
, shdrsz
));
1976 bigwad
= kmem_alloc(bigsize
, KM_SLEEP
);
1978 ehdr
= &bigwad
->ehdr
;
1979 bzero(ehdr
, sizeof (*ehdr
));
1981 ehdr
->e_ident
[EI_MAG0
] = ELFMAG0
;
1982 ehdr
->e_ident
[EI_MAG1
] = ELFMAG1
;
1983 ehdr
->e_ident
[EI_MAG2
] = ELFMAG2
;
1984 ehdr
->e_ident
[EI_MAG3
] = ELFMAG3
;
1985 ehdr
->e_ident
[EI_CLASS
] = ELFCLASS
;
1986 ehdr
->e_type
= ET_CORE
;
1988 #if !defined(_LP64) || defined(_ELF32_COMPAT)
1990 #if defined(__sparc)
1991 ehdr
->e_ident
[EI_DATA
] = ELFDATA2MSB
;
1992 ehdr
->e_machine
= EM_SPARC
;
1993 #elif defined(__i386) || defined(__i386_COMPAT)
1994 ehdr
->e_ident
[EI_DATA
] = ELFDATA2LSB
;
1995 ehdr
->e_machine
= EM_386
;
1997 #error "no recognized machine type is defined"
2000 #else /* !defined(_LP64) || defined(_ELF32_COMPAT) */
2002 #if defined(__sparc)
2003 ehdr
->e_ident
[EI_DATA
] = ELFDATA2MSB
;
2004 ehdr
->e_machine
= EM_SPARCV9
;
2005 #elif defined(__amd64)
2006 ehdr
->e_ident
[EI_DATA
] = ELFDATA2LSB
;
2007 ehdr
->e_machine
= EM_AMD64
;
2009 #error "no recognized 64-bit machine type is defined"
2012 #endif /* !defined(_LP64) || defined(_ELF32_COMPAT) */
2015 * If the count of program headers or section headers or the index
2016 * of the section string table can't fit in the mere 16 bits
2017 * shortsightedly allotted to them in the ELF header, we use the
2018 * extended formats and put the real values in the section header
2021 ehdr
->e_version
= EV_CURRENT
;
2022 ehdr
->e_ehsize
= sizeof (Ehdr
);
2024 if (nphdrs
>= PN_XNUM
)
2025 ehdr
->e_phnum
= PN_XNUM
;
2027 ehdr
->e_phnum
= (unsigned short)nphdrs
;
2029 ehdr
->e_phoff
= sizeof (Ehdr
);
2030 ehdr
->e_phentsize
= sizeof (Phdr
);
2033 if (nshdrs
>= SHN_LORESERVE
)
2036 ehdr
->e_shnum
= (unsigned short)nshdrs
;
2038 if (nshdrs
- 1 >= SHN_LORESERVE
)
2039 ehdr
->e_shstrndx
= SHN_XINDEX
;
2041 ehdr
->e_shstrndx
= (unsigned short)(nshdrs
- 1);
2043 ehdr
->e_shoff
= ehdr
->e_phoff
+ ehdr
->e_phentsize
* nphdrs
;
2044 ehdr
->e_shentsize
= sizeof (Shdr
);
2047 if (error
= core_write(vp
, UIO_SYSSPACE
, 0, ehdr
,
2048 sizeof (Ehdr
), rlimit
, credp
))
2051 poffset
= sizeof (Ehdr
);
2052 soffset
= sizeof (Ehdr
) + phdrsz
;
2053 doffset
= sizeof (Ehdr
) + phdrsz
+ shdrsz
;
2055 v
= &bigwad
->phdr
[0];
2058 setup_old_note_header(&v
[0], p
);
2059 v
[0].p_offset
= doffset
= roundup(doffset
, sizeof (Word
));
2060 doffset
+= v
[0].p_filesz
;
2062 setup_note_header(&v
[1], p
);
2063 v
[1].p_offset
= doffset
= roundup(doffset
, sizeof (Word
));
2064 doffset
+= v
[1].p_filesz
;
2066 mutex_enter(&p
->p_lock
);
2068 brkbase
= p
->p_brkbase
;
2069 brksize
= p
->p_brksize
;
2071 stkbase
= p
->p_usrstack
- p
->p_stksize
;
2072 stksize
= p
->p_stksize
;
2074 mutex_exit(&p
->p_lock
);
2076 AS_LOCK_ENTER(as
, RW_WRITER
);
2078 for (seg
= AS_SEGFIRST(as
); seg
!= NULL
; seg
= AS_SEGNEXT(as
, seg
)) {
2079 caddr_t eaddr
= seg
->s_base
+ pr_getsegsize(seg
, 0);
2080 caddr_t saddr
, naddr
;
2082 extern const struct seg_ops segspt_shmops
;
2084 if ((seg
->s_flags
& S_HOLE
) != 0) {
2088 for (saddr
= seg
->s_base
; saddr
< eaddr
; saddr
= naddr
) {
2094 prot
= pr_getprot(seg
, 0, &tmp
, &saddr
, &naddr
, eaddr
);
2095 prot
&= PROT_READ
| PROT_WRITE
| PROT_EXEC
;
2096 if ((size
= (size_t)(naddr
- saddr
)) == 0)
2102 v
[i
].p_type
= PT_LOAD
;
2103 v
[i
].p_vaddr
= (Addr
)(uintptr_t)saddr
;
2104 v
[i
].p_memsz
= size
;
2105 if (prot
& PROT_READ
)
2106 v
[i
].p_flags
|= PF_R
;
2107 if (prot
& PROT_WRITE
)
2108 v
[i
].p_flags
|= PF_W
;
2109 if (prot
& PROT_EXEC
)
2110 v
[i
].p_flags
|= PF_X
;
2113 * Figure out which mappings to include in the core.
2115 type
= segop_gettype(seg
, saddr
);
2117 if (saddr
== stkbase
&& size
== stksize
) {
2118 if (!(content
& CC_CONTENT_STACK
))
2121 } else if (saddr
== brkbase
&& size
== brksize
) {
2122 if (!(content
& CC_CONTENT_HEAP
))
2125 } else if (seg
->s_ops
== &segspt_shmops
) {
2126 if (type
& MAP_NORESERVE
) {
2127 if (!(content
& CC_CONTENT_DISM
))
2130 if (!(content
& CC_CONTENT_ISM
))
2134 } else if (seg
->s_ops
!= &segvn_ops
) {
2137 } else if (type
& MAP_SHARED
) {
2138 if (shmgetid(p
, saddr
) != SHMID_NONE
) {
2139 if (!(content
& CC_CONTENT_SHM
))
2142 } else if (segop_getvp(seg
, seg
->s_base
,
2143 &mvp
) != 0 || mvp
== NULL
||
2144 mvp
->v_type
!= VREG
) {
2145 if (!(content
& CC_CONTENT_SHANON
))
2149 if (!(content
& CC_CONTENT_SHFILE
))
2153 } else if (segop_getvp(seg
, seg
->s_base
, &mvp
) != 0 ||
2154 mvp
== NULL
|| mvp
->v_type
!= VREG
) {
2155 if (!(content
& CC_CONTENT_ANON
))
2158 } else if (prot
== (PROT_READ
| PROT_EXEC
)) {
2159 if (!(content
& CC_CONTENT_TEXT
))
2162 } else if (prot
== PROT_READ
) {
2163 if (!(content
& CC_CONTENT_RODATA
))
2167 if (!(content
& CC_CONTENT_DATA
))
2171 doffset
= roundup(doffset
, sizeof (Word
));
2172 v
[i
].p_offset
= doffset
;
2173 v
[i
].p_filesz
= size
;
2178 ASSERT(tmp
== NULL
);
2182 if (overflow
|| i
!= nphdrs
) {
2183 if (ntries
++ == 0) {
2184 kmem_free(bigwad
, bigsize
);
2188 cmn_err(CE_WARN
, "elfcore: core dump failed for "
2189 "process %d; address space is changing", p
->p_pid
);
2194 if ((error
= core_write(vp
, UIO_SYSSPACE
, poffset
,
2195 v
, phdrsz
, rlimit
, credp
)) != 0)
2198 if ((error
= write_old_elfnotes(p
, sig
, vp
, v
[0].p_offset
, rlimit
,
2202 if ((error
= write_elfnotes(p
, sig
, vp
, v
[1].p_offset
, rlimit
,
2203 credp
, content
)) != 0)
2206 for (i
= 2; i
< nphdrs
; i
++) {
2207 prkillinfo_t killinfo
;
2211 if (v
[i
].p_filesz
== 0)
2215 * If dumping out this segment fails, rather than failing
2216 * the core dump entirely, we reset the size of the mapping
2217 * to zero to indicate that the data is absent from the core
2218 * file and or in the PF_SUNW_FAILURE flag to differentiate
2219 * this from mappings that were excluded due to the core file
2222 if ((error
= core_seg(p
, vp
, v
[i
].p_offset
,
2223 (caddr_t
)(uintptr_t)v
[i
].p_vaddr
, v
[i
].p_filesz
,
2224 rlimit
, credp
)) == 0) {
2228 if ((sig
= lwp
->lwp_cursig
) == 0) {
2230 * We failed due to something other than a signal.
2231 * Since the space reserved for the segment is now
2232 * unused, we stash the errno in the first four
2233 * bytes. This undocumented interface will let us
2234 * understand the nature of the failure.
2236 (void) core_write(vp
, UIO_SYSSPACE
, v
[i
].p_offset
,
2237 &error
, sizeof (error
), rlimit
, credp
);
2240 v
[i
].p_flags
|= PF_SUNW_FAILURE
;
2241 if ((error
= core_write(vp
, UIO_SYSSPACE
,
2242 poffset
+ sizeof (v
[i
]) * i
, &v
[i
], sizeof (v
[i
]),
2243 rlimit
, credp
)) != 0)
2250 * We took a signal. We want to abort the dump entirely, but
2251 * we also want to indicate what failed and why. We therefore
2252 * use the space reserved for the first failing segment to
2253 * write our error (which, for purposes of compatability with
2254 * older core dump readers, we set to EINTR) followed by any
2255 * siginfo associated with the signal.
2257 bzero(&killinfo
, sizeof (killinfo
));
2258 killinfo
.prk_error
= EINTR
;
2260 sq
= sig
== SIGKILL
? curproc
->p_killsqp
: lwp
->lwp_curinfo
;
2263 bcopy(&sq
->sq_info
, &killinfo
.prk_info
,
2264 sizeof (sq
->sq_info
));
2266 killinfo
.prk_info
.si_signo
= lwp
->lwp_cursig
;
2267 killinfo
.prk_info
.si_code
= SI_NOINFO
;
2270 #if (defined(_SYSCALL32_IMPL) || defined(_LP64))
2272 * If this is a 32-bit process, we need to translate from the
2273 * native siginfo to the 32-bit variant. (Core readers must
2274 * always have the same data model as their target or must
2275 * be aware of -- and compensate for -- data model differences.)
2277 if (curproc
->p_model
== DATAMODEL_ILP32
) {
2280 siginfo_kto32((k_siginfo_t
*)&killinfo
.prk_info
, &si32
);
2281 bcopy(&si32
, &killinfo
.prk_info
, sizeof (si32
));
2285 (void) core_write(vp
, UIO_SYSSPACE
, v
[i
].p_offset
,
2286 &killinfo
, sizeof (killinfo
), rlimit
, credp
);
2289 * For the segment on which we took the signal, indicate that
2290 * its data now refers to a siginfo.
2293 v
[i
].p_flags
|= PF_SUNW_FAILURE
| PF_SUNW_KILLED
|
2297 * And for every other segment, indicate that its absence
2298 * is due to a signal.
2300 for (j
= i
+ 1; j
< nphdrs
; j
++) {
2302 v
[j
].p_flags
|= PF_SUNW_FAILURE
| PF_SUNW_KILLED
;
2306 * Finally, write out our modified program headers.
2308 if ((error
= core_write(vp
, UIO_SYSSPACE
,
2309 poffset
+ sizeof (v
[i
]) * i
, &v
[i
],
2310 sizeof (v
[i
]) * (nphdrs
- i
), rlimit
, credp
)) != 0)
2317 bzero(&bigwad
->shdr
[0], shdrsz
);
2319 if (nshdrs
>= SHN_LORESERVE
)
2320 bigwad
->shdr
[0].sh_size
= nshdrs
;
2322 if (nshdrs
- 1 >= SHN_LORESERVE
)
2323 bigwad
->shdr
[0].sh_link
= nshdrs
- 1;
2325 if (nphdrs
>= PN_XNUM
)
2326 bigwad
->shdr
[0].sh_info
= nphdrs
;
2329 AS_LOCK_ENTER(as
, RW_WRITER
);
2330 if ((error
= process_scns(content
, p
, credp
, vp
,
2331 &bigwad
->shdr
[0], nshdrs
, rlimit
, &doffset
,
2339 if ((error
= core_write(vp
, UIO_SYSSPACE
, soffset
,
2340 &bigwad
->shdr
[0], shdrsz
, rlimit
, credp
)) != 0)
2345 kmem_free(bigwad
, bigsize
);
2349 #ifndef _ELF32_COMPAT
2351 static struct execsw esw
= {
2363 static struct modlexec modlexec
= {
2364 &mod_execops
, "exec module for elf", &esw
2368 extern int elf32exec(vnode_t
*vp
, execa_t
*uap
, uarg_t
*args
,
2369 intpdata_t
*idatap
, int level
, long *execsz
,
2370 int setid
, caddr_t exec_file
, cred_t
*cred
,
2372 extern int elf32core(vnode_t
*vp
, proc_t
*p
, cred_t
*credp
,
2373 rlim_t rlimit
, int sig
, core_content_t content
);
2375 static struct execsw esw32
= {
2383 static struct modlexec modlexec32
= {
2384 &mod_execops
, "32-bit exec module for elf", &esw32
2388 static struct modlinkage modlinkage
= {
2392 (void *)&modlexec32
,
2400 return (mod_install(&modlinkage
));
2406 return (mod_remove(&modlinkage
));
2410 _info(struct modinfo
*modinfop
)
2412 return (mod_info(&modlinkage
, modinfop
));
2415 #endif /* !_ELF32_COMPAT */