4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Copyright (c) 2015, Joyent, Inc. All rights reserved.
37 #include <sys/sysmacros.h>
38 #include <sys/machelf.h>
41 #include "Psymtab_machelf.h"
45 * This file contains code for use by Psymtab.c that is compiled once
46 * for each supported ELFCLASS.
48 * When processing ELF files, it is common to encounter a situation where
49 * a program with one ELFCLASS (32 or 64-bit) is required to examine a
50 * file with a different ELFCLASS. For example, the 32-bit linker (ld) may
51 * be used to link a 64-bit program. The simplest solution to this problem
52 * is to duplicate each such piece of code, modifying only the data types,
53 * and to use if statements to select the code to run. The problem with
54 * doing it that way is that the resulting code is difficult to maintain.
55 * It is inevitable that the copies will not always get modified identically,
56 * and will drift apart. The only robust solution is to generate the
57 * multiple instances of code automatically from a single piece of code.
59 * The solution used within the Solaris linker is to write the code once,
60 * using the data types defined in sys/machelf.h, and then to compile that
61 * code twice, once with _ELF64 defined (to generate ELFCLASS64 code) and
62 * once without (to generate ELFCLASS32). We use the same approach here.
64 * Note that the _ELF64 definition does not refer to the ELFCLASS of
65 * the resulting code, but rather, to the ELFCLASS of the data it
66 * examines. By repeating the above double-compilation for both 32-bit
67 * and 64-bit builds, we end up with 4 instances, which collectively
68 * can handle any combination of program and ELF data class:
82 * Read data from the specified process and construct an in memory
83 * image of an ELF file that will let us use libelf for most of the
84 * work we need to later (e.g. symbol table lookups). This is used
85 * in cases where no usable on-disk image for the process is available.
86 * We need sections for the dynsym, dynstr, and plt, and we need
87 * the program headers from the text section. The former is used in
88 * Pbuild_file_symtab(); the latter is used in several functions in
89 * Pcore.c to reconstruct the origin of each mapping from the load
90 * object that spawned it.
92 * Here are some useful pieces of elf trivia that will help
93 * to elucidate this code.
95 * All the information we need about the dynstr can be found in these
96 * two entries in the dynamic section:
98 * DT_STRTAB base of dynstr
99 * DT_STRSZ size of dynstr
101 * So deciphering the dynstr is pretty straightforward.
103 * The dynsym is a little trickier.
105 * DT_SYMTAB base of dynsym
106 * DT_SYMENT size of a dynstr entry (Elf{32,64}_Sym)
107 * DT_HASH base of hash table for dynamic lookups
109 * The DT_SYMTAB entry gives us any easy way of getting to the base
110 * of the dynsym, but getting the size involves rooting around in the
111 * dynamic lookup hash table. Here's the layout of the hash table:
113 * +-------------------+
114 * | nbucket | All values are 32-bit
115 * +-------------------+ (Elf32_Word or Elf64_Word)
117 * +-------------------+
120 * | bucket[nbucket-1] |
121 * +-------------------+
124 * | chain[nchain-1] |
125 * +-------------------+
126 * (figure 5-12 from the SYS V Generic ABI)
128 * Symbols names are hashed into a particular bucket which contains
129 * an index into the symbol table. Each entry in the symbol table
130 * has a corresponding entry in the chain table which tells the
131 * consumer where the next entry in the hash chain is. We can use
132 * the nchain field to find out the size of the dynsym.
134 * If there is a dynsym present, there may also be an optional
135 * section called the SUNW_ldynsym that augments the dynsym by
136 * providing local function symbols. When the Solaris linker lays
137 * out a file that has both of these sections, it makes sure that
138 * the data for the two sections is adjacent with the SUNW_ldynsym
139 * in front. This allows the runtime linker to treat these two
140 * symbol tables as being a single larger table. There are two
141 * items in the dynamic section for this:
143 * DT_SUNW_SYMTAB base of the SUNW_ldynsym
144 * DT_SUNW_SYMSZ total size of SUNW_ldynsym and dynsym
145 * added together. We can figure out the
146 * size of the SUNW_ldynsym section by
147 * subtracting the size of the dynsym
148 * (described above) from this value.
150 * We can figure out the size of the .plt section, but it takes some
151 * doing. We need to use the following information:
153 * DT_PLTGOT GOT PLT entry offset (on x86) or PLT offset (on sparc)
154 * DT_JMPREL base of the PLT's relocation section
155 * DT_PLTRELSZ size of the PLT's relocation section
156 * DT_PLTREL type of the PLT's relocation section
158 * We can use the number of relocation entries to calculate the size of
159 * the PLT. We get the address of the PLT by looking up the
160 * _PROCEDURE_LINKAGE_TABLE_ symbol.
162 * For more information, check out the System V Generic ABI.
167 * The fake_elfXX() function generated by this file uses the following
168 * string as the string table for the section names. Since it is critical
169 * to count correctly, and to improve readability, the SHSTR_NDX_ macros
170 * supply the proper offset for each name within the string.
172 static char shstr
[] =
173 ".shstrtab\0.dynsym\0.dynstr\0.dynamic\0.plt\0.SUNW_ldynsym";
175 /* Offsets within shstr for each name */
176 #define SHSTR_NDX_shstrtab 0
177 #define SHSTR_NDX_dynsym 10
178 #define SHSTR_NDX_dynstr 18
179 #define SHSTR_NDX_dynamic 26
180 #define SHSTR_NDX_plt 35
181 #define SHSTR_NDX_SUNW_ldynsym 40
185 * Section header alignment for 32 and 64-bit ELF files differs
188 #define SH_ADDRALIGN 8
190 #define SH_ADDRALIGN 4
194 * This is the smallest number of PLT relocation entries allowed in a proper
197 #define PLTREL_MIN_ENTRIES 0
201 fake_elf64(struct ps_prochandle
*P
, file_info_t
*fptr
, uintptr_t addr
,
202 Ehdr
*ehdr
, uint_t phnum
, Phdr
*phdr
)
205 fake_elf32(struct ps_prochandle
*P
, file_info_t
*fptr
, uintptr_t addr
,
206 Ehdr
*ehdr
, uint_t phnum
, Phdr
*phdr
)
224 * Mask of dynamic options that must be present in a well
225 * formed dynamic section. We need all of these in order to
226 * put together a complete set of elf sections. They are
227 * mandatory in both executables and shared objects so if one
228 * of them is missing, we're in some trouble and should abort.
229 * The PLT items are expected, but we will let them slide if
230 * need be. The DI_SUNW_SYM* items are completely optional, so
231 * we use them if they are present and ignore them otherwise.
233 const int di_req_mask
= (1 << DI_SYMTAB
) |
234 (1 << DI_SYMENT
) | (1 << DI_STRTAB
) | (1 << DI_STRSZ
);
237 caddr_t elfdata
= NULL
;
239 size_t dynsym_size
= 0, ldynsym_size
;
244 Dyn
*d
[DI_NENT
] = { 0 };
247 size_t pltsz
= 0, pltentries
= 0;
248 uintptr_t hptr
= (uintptr_t)NULL
;
249 Word hnchains
= 0, hnbuckets
= 0;
251 if (ehdr
->e_type
== ET_DYN
)
252 phdr
->p_vaddr
+= addr
;
254 if (P
->rap
!= NULL
) {
255 if (rd_get_dyns(P
->rap
, addr
, (void **)&dp
, NULL
) != RD_OK
)
258 if ((dp
= malloc(phdr
->p_filesz
)) == NULL
)
260 if (Pread(P
, dp
, phdr
->p_filesz
, phdr
->p_vaddr
) !=
266 * Iterate over the items in the dynamic section, grabbing
267 * the address of items we want and saving them in dp[].
269 for (i
= 0; i
< phdr
->p_filesz
/ sizeof (Dyn
); i
++) {
270 switch (dp
[i
].d_tag
) {
271 /* For the .plt section */
273 d
[DI_PLTGOT
] = &dp
[i
];
276 d
[DI_JMPREL
] = &dp
[i
];
279 d
[DI_PLTRELSZ
] = &dp
[i
];
282 d
[DI_PLTREL
] = &dp
[i
];
285 /* For the .dynsym section */
287 d
[DI_SYMTAB
] = &dp
[i
];
288 di_mask
|= (1 << DI_SYMTAB
);
292 di_mask
|= (1 << DI_HASH
);
295 d
[DI_SYMENT
] = &dp
[i
];
296 di_mask
|= (1 << DI_SYMENT
);
299 d
[DI_SUNW_SYMTAB
] = &dp
[i
];
302 d
[DI_SUNW_SYMSZ
] = &dp
[i
];
305 /* For the .dynstr section */
307 d
[DI_STRTAB
] = &dp
[i
];
308 di_mask
|= (1 << DI_STRTAB
);
311 d
[DI_STRSZ
] = &dp
[i
];
312 di_mask
|= (1 << DI_STRSZ
);
317 /* Ensure all required entries were collected */
318 if ((di_mask
& di_req_mask
) != di_req_mask
) {
319 dprintf("text section missing required dynamic entries: "
320 "required 0x%x, found 0x%x\n", di_req_mask
, di_mask
);
324 /* SUNW_ldynsym must be adjacent to dynsym. Ignore if not */
325 if ((d
[DI_SUNW_SYMTAB
] != NULL
) && (d
[DI_SUNW_SYMSZ
] != NULL
) &&
326 ((d
[DI_SYMTAB
]->d_un
.d_ptr
<= d
[DI_SUNW_SYMTAB
]->d_un
.d_ptr
) ||
327 (d
[DI_SYMTAB
]->d_un
.d_ptr
>= (d
[DI_SUNW_SYMTAB
]->d_un
.d_ptr
+
328 d
[DI_SUNW_SYMSZ
]->d_un
.d_val
)))) {
329 d
[DI_SUNW_SYMTAB
] = NULL
;
330 d
[DI_SUNW_SYMSZ
] = NULL
;
334 size
= sizeof (Ehdr
);
336 /* program headers from in-core elf fragment */
337 size
+= phnum
* ehdr
->e_phentsize
;
339 /* unused shdr, and .shstrtab section */
340 size
+= sizeof (Shdr
);
341 size
+= sizeof (Shdr
);
342 size
+= roundup(sizeof (shstr
), SH_ADDRALIGN
);
344 if (d
[DI_HASH
] != NULL
) {
347 hptr
= d
[DI_HASH
]->d_un
.d_ptr
;
348 if (ehdr
->e_type
== ET_DYN
)
351 if (Pread(P
, hash
, sizeof (hash
), hptr
) != sizeof (hash
)) {
352 dprintf("Pread of .hash at %lx failed\n",
362 * .dynsym and .SUNW_ldynsym sections.
364 * The string table section used for the symbol table and
365 * dynamic sections lies immediately after the dynsym, so the
366 * presence of SUNW_ldynsym changes the dynstr section index.
368 if (d
[DI_SUNW_SYMTAB
] != NULL
) {
369 size
+= sizeof (Shdr
); /* SUNW_ldynsym shdr */
370 ldynsym_size
= (size_t)d
[DI_SUNW_SYMSZ
]->d_un
.d_val
;
371 dynsym_size
= ldynsym_size
- (d
[DI_SYMTAB
]->d_un
.d_ptr
372 - d
[DI_SUNW_SYMTAB
]->d_un
.d_ptr
);
373 ldynsym_size
-= dynsym_size
;
376 dynsym_size
= sizeof (Sym
) * hnchains
;
380 size
+= sizeof (Shdr
) + ldynsym_size
+ dynsym_size
;
382 /* .dynstr section */
383 size
+= sizeof (Shdr
);
384 size
+= roundup(d
[DI_STRSZ
]->d_un
.d_val
, SH_ADDRALIGN
);
386 /* .dynamic section */
387 size
+= sizeof (Shdr
);
388 size
+= roundup(phdr
->p_filesz
, SH_ADDRALIGN
);
391 if (d
[DI_PLTGOT
] != NULL
&& d
[DI_JMPREL
] != NULL
&&
392 d
[DI_PLTRELSZ
] != NULL
&& d
[DI_PLTREL
] != NULL
) {
393 size_t pltrelsz
= d
[DI_PLTRELSZ
]->d_un
.d_val
;
395 if (d
[DI_PLTREL
]->d_un
.d_val
== DT_RELA
) {
396 pltentries
= pltrelsz
/ sizeof (Rela
);
397 } else if (d
[DI_PLTREL
]->d_un
.d_val
== DT_REL
) {
398 pltentries
= pltrelsz
/ sizeof (Rel
);
400 /* fall back to the platform default */
401 #if ((defined(__i386) || defined(__amd64)) && !defined(_ELF64))
402 pltentries
= pltrelsz
/ sizeof (Rel
);
403 dprintf("DI_PLTREL not found, defaulting to Rel");
404 #else /* (!(__i386 || __amd64)) || _ELF64 */
405 pltentries
= pltrelsz
/ sizeof (Rela
);
406 dprintf("DI_PLTREL not found, defaulting to Rela");
407 #endif /* (!(__i386 || __amd64) || _ELF64 */
410 if (pltentries
< PLTREL_MIN_ENTRIES
) {
411 dprintf("too few PLT relocation entries "
412 "(found %lu, expected at least %d)\n",
413 (long)pltentries
, PLTREL_MIN_ENTRIES
);
416 if (pltentries
< PLTREL_MIN_ENTRIES
+ 2)
420 * Now that we know the number of plt relocation entries
421 * we can calculate the size of the plt.
423 pltsz
= (pltentries
+ M_PLT_XNumber
) * M_PLT_ENTSIZE
;
425 size
+= sizeof (Shdr
);
426 size
+= roundup(pltsz
, SH_ADDRALIGN
);
430 if ((elfdata
= calloc(1, size
)) == NULL
) {
431 dprintf("failed to allocate size %ld\n", (long)size
);
435 /* LINTED - alignment */
436 ep
= (Ehdr
*)elfdata
;
437 (void) memcpy(ep
, ehdr
, offsetof(Ehdr
, e_phoff
));
439 ep
->e_ehsize
= sizeof (Ehdr
);
440 ep
->e_phoff
= sizeof (Ehdr
);
441 ep
->e_phentsize
= ehdr
->e_phentsize
;
443 ep
->e_shoff
= ep
->e_phoff
+ phnum
* ep
->e_phentsize
;
444 ep
->e_shentsize
= sizeof (Shdr
);
446 * Plt and SUNW_ldynsym sections are optional. C logical
447 * binary operators return a 0 or 1 value, so the following
448 * adds 1 for each optional section present.
450 ep
->e_shnum
= 5 + (pltsz
!= 0) + (d
[DI_SUNW_SYMTAB
] != NULL
);
453 /* LINTED - alignment */
454 sp
= (Shdr
*)(elfdata
+ ep
->e_shoff
);
455 off
= ep
->e_shoff
+ ep
->e_shentsize
* ep
->e_shnum
;
458 * Copying the program headers directly from the process's
459 * address space is a little suspect, but since we only
460 * use them for their address and size values, this is fine.
462 if (Pread(P
, &elfdata
[ep
->e_phoff
], phnum
* ep
->e_phentsize
,
463 addr
+ ehdr
->e_phoff
) != phnum
* ep
->e_phentsize
) {
464 dprintf("failed to read program headers\n");
469 * The first elf section is always skipped.
474 * Section Header: .shstrtab
476 sp
->sh_name
= SHSTR_NDX_shstrtab
;
477 sp
->sh_type
= SHT_STRTAB
;
478 sp
->sh_flags
= SHF_STRINGS
;
481 sp
->sh_size
= sizeof (shstr
);
484 sp
->sh_addralign
= 1;
487 (void) memcpy(&elfdata
[off
], shstr
, sizeof (shstr
));
488 off
+= roundup(sp
->sh_size
, SH_ADDRALIGN
);
492 * Section Header: .SUNW_ldynsym
494 if (d
[DI_SUNW_SYMTAB
] != NULL
) {
495 sp
->sh_name
= SHSTR_NDX_SUNW_ldynsym
;
496 sp
->sh_type
= SHT_SUNW_LDYNSYM
;
497 sp
->sh_flags
= SHF_ALLOC
;
498 sp
->sh_addr
= d
[DI_SUNW_SYMTAB
]->d_un
.d_ptr
;
499 if (ehdr
->e_type
== ET_DYN
)
502 sp
->sh_size
= ldynsym_size
;
503 sp
->sh_link
= dynstr_shndx
;
504 /* Index of 1st global in table that has none == # items */
505 sp
->sh_info
= sp
->sh_size
/ sizeof (Sym
);
506 sp
->sh_addralign
= SH_ADDRALIGN
;
507 sp
->sh_entsize
= sizeof (Sym
);
509 if (Pread(P
, &elfdata
[off
], sp
->sh_size
,
510 sp
->sh_addr
) != sp
->sh_size
) {
511 dprintf("failed to read .SUNW_ldynsym at %lx\n",
516 /* No need to round up ldynsym data. Dynsym data is same type */
521 * Section Header: .dynsym
523 sp
->sh_name
= SHSTR_NDX_dynsym
;
524 sp
->sh_type
= SHT_DYNSYM
;
525 sp
->sh_flags
= SHF_ALLOC
;
526 sp
->sh_addr
= d
[DI_SYMTAB
]->d_un
.d_ptr
;
527 if (ehdr
->e_type
== ET_DYN
)
530 sp
->sh_size
= dynsym_size
;
531 sp
->sh_link
= dynstr_shndx
;
532 sp
->sh_info
= 1; /* Index of 1st global in table */
533 sp
->sh_addralign
= SH_ADDRALIGN
;
534 sp
->sh_entsize
= sizeof (Sym
);
536 if (Pread(P
, &elfdata
[off
], sp
->sh_size
,
537 sp
->sh_addr
) != sp
->sh_size
) {
538 dprintf("failed to read .dynsym at %lx\n",
543 off
+= roundup(sp
->sh_size
, SH_ADDRALIGN
);
547 * Section Header: .dynstr
549 sp
->sh_name
= SHSTR_NDX_dynstr
;
550 sp
->sh_type
= SHT_STRTAB
;
551 sp
->sh_flags
= SHF_ALLOC
| SHF_STRINGS
;
552 sp
->sh_addr
= d
[DI_STRTAB
]->d_un
.d_ptr
;
553 if (ehdr
->e_type
== ET_DYN
)
556 sp
->sh_size
= d
[DI_STRSZ
]->d_un
.d_val
;
559 sp
->sh_addralign
= 1;
562 if (Pread(P
, &elfdata
[off
], sp
->sh_size
,
563 sp
->sh_addr
) != sp
->sh_size
) {
564 dprintf("failed to read .dynstr\n");
567 off
+= roundup(sp
->sh_size
, SH_ADDRALIGN
);
571 * Section Header: .dynamic
573 sp
->sh_name
= SHSTR_NDX_dynamic
;
574 sp
->sh_type
= SHT_DYNAMIC
;
575 sp
->sh_flags
= SHF_WRITE
| SHF_ALLOC
;
576 sp
->sh_addr
= phdr
->p_vaddr
;
577 if (ehdr
->e_type
== ET_DYN
)
580 sp
->sh_size
= phdr
->p_filesz
;
581 sp
->sh_link
= dynstr_shndx
;
583 sp
->sh_addralign
= SH_ADDRALIGN
;
584 sp
->sh_entsize
= sizeof (Dyn
);
586 (void) memcpy(&elfdata
[off
], dp
, sp
->sh_size
);
587 off
+= roundup(sp
->sh_size
, SH_ADDRALIGN
);
591 * Section Header: .plt
596 uintptr_t strtabptr
, strtabname
;
599 char strbuf
[sizeof ("_PROCEDURE_LINKAGE_TABLE_")];
602 * Now we need to find the address of the plt by looking
603 * up the "_PROCEDURE_LINKAGE_TABLE_" symbol.
606 /* get the address of the symtab and strtab sections */
607 strtabptr
= d
[DI_STRTAB
]->d_un
.d_ptr
;
608 symtabptr
= (Sym
*)(uintptr_t)d
[DI_SYMTAB
]->d_un
.d_ptr
;
609 if (ehdr
->e_type
== ET_DYN
) {
611 symtabptr
= (Sym
*)((uintptr_t)symtabptr
+ addr
);
614 if ((hptr
== 0) || (hnbuckets
== 0) || (hnchains
== 0)) {
615 dprintf("empty or missing .hash\n");
619 /* find the .hash bucket address for this symbol */
620 plt_symhash
= elf_hash("_PROCEDURE_LINKAGE_TABLE_");
621 htmp
= plt_symhash
% hnbuckets
;
622 hash
= &((uint_t
*)hptr
)[2 + htmp
];
624 /* read the elf hash bucket index */
625 if (Pread(P
, &ndx
, sizeof (ndx
), (uintptr_t)hash
) !=
627 dprintf("Pread of .hash at %lx failed\n", (long)hash
);
632 if (Pread(P
, &sym
, sizeof (sym
),
633 (uintptr_t)&symtabptr
[ndx
]) != sizeof (sym
)) {
634 dprintf("Pread of .symtab at %lx failed\n",
635 (long)&symtabptr
[ndx
]);
639 strtabname
= strtabptr
+ sym
.st_name
;
640 if (Pread_string(P
, strbuf
, sizeof (strbuf
),
642 dprintf("Pread of .strtab at %lx failed\n",
647 if (strcmp("_PROCEDURE_LINKAGE_TABLE_", strbuf
) == 0)
650 hash
= &((uint_t
*)hptr
)[2 + hnbuckets
+ ndx
];
651 if (Pread(P
, &ndx
, sizeof (ndx
), (uintptr_t)hash
) !=
653 dprintf("Pread of .hash at %lx failed\n",
662 "Failed to find \"_PROCEDURE_LINKAGE_TABLE_\"\n");
666 sp
->sh_name
= SHSTR_NDX_plt
;
667 sp
->sh_type
= SHT_PROGBITS
;
668 sp
->sh_flags
= SHF_WRITE
| SHF_ALLOC
| SHF_EXECINSTR
;
669 sp
->sh_addr
= sym
.st_value
;
670 if (ehdr
->e_type
== ET_DYN
)
676 sp
->sh_addralign
= SH_ADDRALIGN
;
677 sp
->sh_entsize
= M_PLT_ENTSIZE
;
679 if (Pread(P
, &elfdata
[off
], sp
->sh_size
, sp
->sh_addr
) !=
681 dprintf("failed to read .plt at %lx\n",
685 off
+= roundup(sp
->sh_size
, SH_ADDRALIGN
);
690 /* make sure we didn't write past the end of allocated memory */
692 assert(((uintptr_t)(sp
) - 1) < ((uintptr_t)elfdata
+ size
));
695 if ((elf
= elf_memory(elfdata
, size
)) == NULL
) {
696 dprintf("failed to create ELF object "
697 "in memory for size %ld\n", (long)size
);
702 fptr
->file_elfmem
= elfdata
;