8354 sync regcomp(3C) with upstream (fix make catalog)
[unleashed/tickless.git] / usr / src / cmd / dis / dis_main.c
blob2886f412c9a024e24f511aca32dc7d095fcbae08
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright 2011 Jason King. All rights reserved.
27 * Copyright 2012 Joshua M. Clulow <josh@sysmgr.org>
28 * Copyright 2015 Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
31 #include <ctype.h>
32 #include <getopt.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <sys/sysmacros.h>
37 #include <sys/elf_SPARC.h>
39 #include <libdisasm.h>
41 #include "dis_target.h"
42 #include "dis_util.h"
43 #include "dis_list.h"
45 int g_demangle; /* Demangle C++ names */
46 int g_quiet; /* Quiet mode */
47 int g_numeric; /* Numeric mode */
48 int g_flags; /* libdisasm language flags */
49 int g_doall; /* true if no functions or sections were given */
51 dis_namelist_t *g_funclist; /* list of functions to disassemble, if any */
52 dis_namelist_t *g_seclist; /* list of sections to disassemble, if any */
55 * Section options for -d, -D, and -s
57 #define DIS_DATA_RELATIVE 1
58 #define DIS_DATA_ABSOLUTE 2
59 #define DIS_TEXT 3
62 * libdisasm callback data. Keeps track of current data (function or section)
63 * and offset within that data.
65 typedef struct dis_buffer {
66 dis_tgt_t *db_tgt; /* current dis target */
67 void *db_data; /* function or section data */
68 uint64_t db_addr; /* address of function start */
69 size_t db_size; /* size of data */
70 uint64_t db_nextaddr; /* next address to be read */
71 } dis_buffer_t;
73 #define MINSYMWIDTH 22 /* Minimum width of symbol portion of line */
76 * Given a symbol+offset as returned by dis_tgt_lookup(), print an appropriately
77 * formatted symbol, based on the offset and current setttings.
79 void
80 getsymname(uint64_t addr, const char *symbol, off_t offset, char *buf,
81 size_t buflen)
83 if (symbol == NULL || g_numeric) {
84 if (g_flags & DIS_OCTAL)
85 (void) snprintf(buf, buflen, "0%llo", addr);
86 else
87 (void) snprintf(buf, buflen, "0x%llx", addr);
88 } else {
89 if (g_demangle)
90 symbol = dis_demangle(symbol);
92 if (offset == 0)
93 (void) snprintf(buf, buflen, "%s", symbol);
94 else if (g_flags & DIS_OCTAL)
95 (void) snprintf(buf, buflen, "%s+0%o", symbol, offset);
96 else
97 (void) snprintf(buf, buflen, "%s+0x%x", symbol, offset);
102 * Determine if we are on an architecture with fixed-size instructions,
103 * and if so, what size they are.
105 static int
106 insn_size(dis_handle_t *dhp)
108 int min = dis_min_instrlen(dhp);
109 int max = dis_max_instrlen(dhp);
111 if (min == max)
112 return (min);
114 return (0);
118 * The main disassembly routine. Given a fixed-sized buffer and starting
119 * address, disassemble the data using the supplied target and libdisasm handle.
121 void
122 dis_data(dis_tgt_t *tgt, dis_handle_t *dhp, uint64_t addr, void *data,
123 size_t datalen)
125 dis_buffer_t db = { 0 };
126 char buf[BUFSIZE];
127 char symbuf[BUFSIZE];
128 const char *symbol;
129 const char *last_symbol;
130 off_t symoffset;
131 int i;
132 int bytesperline;
133 size_t symsize;
134 int isfunc;
135 size_t symwidth = 0;
136 int ret;
137 int insz = insn_size(dhp);
139 db.db_tgt = tgt;
140 db.db_data = data;
141 db.db_addr = addr;
142 db.db_size = datalen;
144 dis_set_data(dhp, &db);
146 if ((bytesperline = dis_max_instrlen(dhp)) > 6)
147 bytesperline = 6;
149 symbol = NULL;
151 while (addr < db.db_addr + db.db_size) {
153 ret = dis_disassemble(dhp, addr, buf, BUFSIZE);
154 if (ret != 0 && insz > 0) {
156 * Since we know instructions are fixed size, we
157 * always know the address of the next instruction
159 (void) snprintf(buf, sizeof (buf),
160 "*** invalid opcode ***");
161 db.db_nextaddr = addr + insz;
163 } else if (ret != 0) {
164 off_t next;
166 (void) snprintf(buf, sizeof (buf),
167 "*** invalid opcode ***");
170 * On architectures with variable sized instructions
171 * we have no way to figure out where the next
172 * instruction starts if we encounter an invalid
173 * instruction. Instead we print the rest of the
174 * instruction stream as hex until we reach the
175 * next valid symbol in the section.
177 if ((next = dis_tgt_next_symbol(tgt, addr)) == 0) {
178 db.db_nextaddr = db.db_addr + db.db_size;
179 } else {
180 if (next > db.db_size)
181 db.db_nextaddr = db.db_addr +
182 db.db_size;
183 else
184 db.db_nextaddr = addr + next;
189 * Print out the line as:
191 * address: bytes text
193 * If there are more than 6 bytes in any given instruction,
194 * spread the bytes across two lines. We try to get symbolic
195 * information for the address, but if that fails we print out
196 * the numeric address instead.
198 * We try to keep the address portion of the text aligned at
199 * MINSYMWIDTH characters. If we are disassembling a function
200 * with a long name, this can be annoying. So we pick a width
201 * based on the maximum width that the current symbol can be.
202 * This at least produces text aligned within each function.
204 last_symbol = symbol;
205 symbol = dis_tgt_lookup(tgt, addr, &symoffset, 1, &symsize,
206 &isfunc);
207 if (symbol == NULL) {
208 symbol = dis_find_section(tgt, addr, &symoffset);
209 symsize = symoffset;
212 if (symbol != last_symbol)
213 getsymname(addr, symbol, symsize, symbuf,
214 sizeof (symbuf));
216 symwidth = MAX(symwidth, strlen(symbuf));
217 getsymname(addr, symbol, symoffset, symbuf, sizeof (symbuf));
220 * If we've crossed a new function boundary, print out the
221 * function name on a blank line.
223 if (!g_quiet && symoffset == 0 && symbol != NULL && isfunc)
224 (void) printf("%s()\n", symbol);
226 (void) printf(" %s:%*s ", symbuf,
227 symwidth - strlen(symbuf), "");
229 /* print bytes */
230 for (i = 0; i < MIN(bytesperline, (db.db_nextaddr - addr));
231 i++) {
232 int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
233 if (g_flags & DIS_OCTAL)
234 (void) printf("%03o ", byte);
235 else
236 (void) printf("%02x ", byte);
239 /* trailing spaces for missing bytes */
240 for (; i < bytesperline; i++) {
241 if (g_flags & DIS_OCTAL)
242 (void) printf(" ");
243 else
244 (void) printf(" ");
247 /* contents of disassembly */
248 (void) printf(" %s", buf);
250 /* excess bytes that spill over onto subsequent lines */
251 for (; i < db.db_nextaddr - addr; i++) {
252 int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
253 if (i % bytesperline == 0)
254 (void) printf("\n %*s ", symwidth, "");
255 if (g_flags & DIS_OCTAL)
256 (void) printf("%03o ", byte);
257 else
258 (void) printf("%02x ", byte);
261 (void) printf("\n");
263 addr = db.db_nextaddr;
268 * libdisasm wrapper around symbol lookup. Invoke the target-specific lookup
269 * function, and convert the result using getsymname().
272 do_lookup(void *data, uint64_t addr, char *buf, size_t buflen, uint64_t *start,
273 size_t *symlen)
275 dis_buffer_t *db = data;
276 const char *symbol;
277 off_t offset;
278 size_t size;
281 * If NULL symbol is returned, getsymname takes care of
282 * printing appropriate address in buf instead of symbol.
284 symbol = dis_tgt_lookup(db->db_tgt, addr, &offset, 0, &size, NULL);
286 if (buf != NULL)
287 getsymname(addr, symbol, offset, buf, buflen);
289 if (start != NULL)
290 *start = addr - offset;
291 if (symlen != NULL)
292 *symlen = size;
294 if (symbol == NULL)
295 return (-1);
297 return (0);
301 * libdisasm wrapper around target reading. libdisasm will always read data
302 * in order, so update our current offset within the buffer appropriately.
303 * We only support reading from within the current object; libdisasm should
304 * never ask us to do otherwise.
307 do_read(void *data, uint64_t addr, void *buf, size_t len)
309 dis_buffer_t *db = data;
310 size_t offset;
312 if (addr < db->db_addr || addr >= db->db_addr + db->db_size)
313 return (-1);
315 offset = addr - db->db_addr;
316 len = MIN(len, db->db_size - offset);
318 (void) memcpy(buf, (char *)db->db_data + offset, len);
320 db->db_nextaddr = addr + len;
322 return (len);
326 * Routine to dump raw data in a human-readable format. Used by the -d and -D
327 * options. We model our output after the xxd(1) program, which gives nicely
328 * formatted output, along with an ASCII translation of the result.
330 void
331 dump_data(uint64_t addr, void *data, size_t datalen)
333 uintptr_t curaddr = addr & (~0xf);
334 uint8_t *bytes = data;
335 int i;
336 int width;
339 * Determine if the address given to us fits in 32-bit range, in which
340 * case use a 4-byte width.
342 if (((addr + datalen) & 0xffffffff00000000ULL) == 0ULL)
343 width = 8;
344 else
345 width = 16;
347 while (curaddr < addr + datalen) {
349 * Display leading address
351 (void) printf("%0*x: ", width, curaddr);
354 * Print out data in two-byte chunks. If the current address
355 * is before the starting address or after the end of the
356 * section, print spaces.
358 for (i = 0; i < 16; i++) {
359 if (curaddr + i < addr ||curaddr + i >= addr + datalen)
360 (void) printf(" ");
361 else
362 (void) printf("%02x",
363 bytes[curaddr + i - addr]);
365 if (i & 1)
366 (void) printf(" ");
369 (void) printf(" ");
372 * Print out the ASCII representation
374 for (i = 0; i < 16; i++) {
375 if (curaddr + i < addr ||
376 curaddr + i >= addr + datalen) {
377 (void) printf(" ");
378 } else {
379 uint8_t byte = bytes[curaddr + i - addr];
380 if (isprint(byte))
381 (void) printf("%c", byte);
382 else
383 (void) printf(".");
387 (void) printf("\n");
389 curaddr += 16;
394 * Disassemble a section implicitly specified as part of a file. This function
395 * is called for all sections when no other flags are specified. We ignore any
396 * data sections, and print out only those sections containing text.
398 void
399 dis_text_section(dis_tgt_t *tgt, dis_scn_t *scn, void *data)
401 dis_handle_t *dhp = data;
403 /* ignore data sections */
404 if (!dis_section_istext(scn))
405 return;
407 if (!g_quiet)
408 (void) printf("\nsection %s\n", dis_section_name(scn));
410 dis_data(tgt, dhp, dis_section_addr(scn), dis_section_data(scn),
411 dis_section_size(scn));
415 * Structure passed to dis_named_{section,function} which keeps track of both
416 * the target and the libdisasm handle.
418 typedef struct callback_arg {
419 dis_tgt_t *ca_tgt;
420 dis_handle_t *ca_handle;
421 } callback_arg_t;
424 * Disassemble a section explicitly named with -s, -d, or -D. The 'type'
425 * argument contains the type of argument given. Pass the data onto the
426 * appropriate helper routine.
428 void
429 dis_named_section(dis_scn_t *scn, int type, void *data)
431 callback_arg_t *ca = data;
433 if (!g_quiet)
434 (void) printf("\nsection %s\n", dis_section_name(scn));
436 switch (type) {
437 case DIS_DATA_RELATIVE:
438 dump_data(0, dis_section_data(scn), dis_section_size(scn));
439 break;
440 case DIS_DATA_ABSOLUTE:
441 dump_data(dis_section_addr(scn), dis_section_data(scn),
442 dis_section_size(scn));
443 break;
444 case DIS_TEXT:
445 dis_data(ca->ca_tgt, ca->ca_handle, dis_section_addr(scn),
446 dis_section_data(scn), dis_section_size(scn));
447 break;
452 * Disassemble a function explicitly specified with '-F'. The 'type' argument
453 * is unused.
455 /* ARGSUSED */
456 void
457 dis_named_function(dis_func_t *func, int type, void *data)
459 callback_arg_t *ca = data;
461 dis_data(ca->ca_tgt, ca->ca_handle, dis_function_addr(func),
462 dis_function_data(func), dis_function_size(func));
466 * Disassemble a complete file. First, we determine the type of the file based
467 * on the ELF machine type, and instantiate a version of the disassembler
468 * appropriate for the file. We then resolve any named sections or functions
469 * against the file, and iterate over the results (or all sections if no flags
470 * were specified).
472 void
473 dis_file(const char *filename)
475 dis_tgt_t *tgt, *current;
476 dis_scnlist_t *sections;
477 dis_funclist_t *functions;
478 dis_handle_t *dhp;
479 GElf_Ehdr ehdr;
482 * First, initialize the target
484 if ((tgt = dis_tgt_create(filename)) == NULL)
485 return;
487 if (!g_quiet)
488 (void) printf("disassembly for %s\n\n", filename);
491 * A given file may contain multiple targets (if it is an archive, for
492 * example). We iterate over all possible targets if this is the case.
494 for (current = tgt; current != NULL; current = dis_tgt_next(current)) {
495 dis_tgt_ehdr(current, &ehdr);
498 * Eventually, this should probably live within libdisasm, and
499 * we should be able to disassemble targets from different
500 * architectures. For now, we only support objects as the
501 * native machine type.
503 switch (ehdr.e_machine) {
504 case EM_SPARC:
505 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
506 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
507 warn("invalid E_IDENT field for SPARC object");
508 return;
510 g_flags |= DIS_SPARC_V8;
511 break;
513 case EM_SPARC32PLUS:
515 uint64_t flags = ehdr.e_flags & EF_SPARC_32PLUS_MASK;
517 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
518 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
519 warn("invalid E_IDENT field for SPARC object");
520 return;
523 if (flags != 0 &&
524 (flags & (EF_SPARC_32PLUS | EF_SPARC_SUN_US1 |
525 EF_SPARC_SUN_US3)) != EF_SPARC_32PLUS)
526 g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
527 else
528 g_flags |= DIS_SPARC_V9;
529 break;
532 case EM_SPARCV9:
533 if (ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
534 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
535 warn("invalid E_IDENT field for SPARC object");
536 return;
539 g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
540 break;
542 case EM_386:
543 g_flags |= DIS_X86_SIZE32;
544 break;
546 case EM_AMD64:
547 g_flags |= DIS_X86_SIZE64;
548 break;
550 case EM_S370:
551 g_flags |= DIS_S370;
553 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
554 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
555 warn("invalid E_IDENT field for S370 object");
556 return;
558 break;
560 case EM_S390:
562 * Both 390 and z/Architecture use EM_S390, the only
563 * differences is the class: ELFCLASS32 for plain
564 * old s390 and ELFCLASS64 for z/Architecture (aka.
565 * s390x).
567 if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
568 g_flags |= DIS_S390_31;
569 } else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) {
570 g_flags |= DIS_S390_64;
571 } else {
572 warn("invalid E_IDENT field for S390 object");
573 return;
576 if (ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
577 warn("invalid E_IDENT field for S390 object");
578 return;
580 break;
582 default:
583 die("%s: unsupported ELF machine 0x%x", filename,
584 ehdr.e_machine);
588 * If ET_REL (.o), printing immediate symbols is likely to
589 * result in garbage, as symbol lookups on unrelocated
590 * immediates find false and useless matches.
593 if (ehdr.e_type == ET_REL)
594 g_flags |= DIS_NOIMMSYM;
596 if (!g_quiet && dis_tgt_member(current) != NULL)
597 (void) printf("\narchive member %s\n",
598 dis_tgt_member(current));
601 * Instantiate a libdisasm handle based on the file type.
603 if ((dhp = dis_handle_create(g_flags, current, do_lookup,
604 do_read)) == NULL)
605 die("%s: failed to initialize disassembler: %s",
606 filename, dis_strerror(dis_errno()));
608 if (g_doall) {
610 * With no arguments, iterate over all sections and
611 * disassemble only those that contain text.
613 dis_tgt_section_iter(current, dis_text_section, dhp);
614 } else {
615 callback_arg_t ca;
617 ca.ca_tgt = current;
618 ca.ca_handle = dhp;
621 * If sections or functions were explicitly specified,
622 * resolve those names against the object, and iterate
623 * over just the resulting data.
625 sections = dis_namelist_resolve_sections(g_seclist,
626 current);
627 functions = dis_namelist_resolve_functions(g_funclist,
628 current);
630 dis_scnlist_iter(sections, dis_named_section, &ca);
631 dis_funclist_iter(functions, dis_named_function, &ca);
633 dis_scnlist_destroy(sections);
634 dis_funclist_destroy(functions);
637 dis_handle_destroy(dhp);
640 dis_tgt_destroy(tgt);
643 void
644 usage(void)
646 (void) fprintf(stderr, "usage: dis [-CVoqn] [-d sec] \n");
647 (void) fprintf(stderr, "\t[-D sec] [-F function] [-t sec] file ..\n");
648 exit(2);
651 typedef struct lib_node {
652 char *path;
653 struct lib_node *next;
654 } lib_node_t;
657 main(int argc, char **argv)
659 int optchar;
660 int i;
661 lib_node_t *libs = NULL;
663 g_funclist = dis_namelist_create();
664 g_seclist = dis_namelist_create();
666 while ((optchar = getopt(argc, argv, "Cd:D:F:l:Lot:Vqn")) != -1) {
667 switch (optchar) {
668 case 'C':
669 g_demangle = 1;
670 break;
671 case 'd':
672 dis_namelist_add(g_seclist, optarg, DIS_DATA_RELATIVE);
673 break;
674 case 'D':
675 dis_namelist_add(g_seclist, optarg, DIS_DATA_ABSOLUTE);
676 break;
677 case 'F':
678 dis_namelist_add(g_funclist, optarg, 0);
679 break;
680 case 'l': {
682 * The '-l foo' option historically would attempt to
683 * disassemble '$LIBDIR/libfoo.a'. The $LIBDIR
684 * environment variable has never been supported or
685 * documented for our linker. However, until this
686 * option is formally EOLed, we have to support it.
688 char *dir;
689 lib_node_t *node;
690 size_t len;
692 if ((dir = getenv("LIBDIR")) == NULL ||
693 dir[0] == '\0')
694 dir = "/usr/lib";
695 node = safe_malloc(sizeof (lib_node_t));
696 len = strlen(optarg) + strlen(dir) + sizeof ("/lib.a");
697 node->path = safe_malloc(len);
699 (void) snprintf(node->path, len, "%s/lib%s.a", dir,
700 optarg);
701 node->next = libs;
702 libs = node;
703 break;
705 case 'L':
707 * The '-L' option historically would attempt to read
708 * the .debug section of the target to determine source
709 * line information in order to annotate the output.
710 * No compiler has emitted these sections in many years,
711 * and the option has never done what it purported to
712 * do. We silently consume the option for
713 * compatibility.
715 break;
716 case 'n':
717 g_numeric = 1;
718 break;
719 case 'o':
720 g_flags |= DIS_OCTAL;
721 break;
722 case 'q':
723 g_quiet = 1;
724 break;
725 case 't':
726 dis_namelist_add(g_seclist, optarg, DIS_TEXT);
727 break;
728 case 'V':
729 (void) printf("Solaris disassembler version 1.0\n");
730 return (0);
731 default:
732 usage();
733 break;
737 argc -= optind;
738 argv += optind;
740 if (argc == 0 && libs == NULL) {
741 warn("no objects specified");
742 usage();
745 if (dis_namelist_empty(g_funclist) && dis_namelist_empty(g_seclist))
746 g_doall = 1;
749 * See comment for 'l' option, above.
751 while (libs != NULL) {
752 lib_node_t *node = libs->next;
754 dis_file(libs->path);
755 free(libs->path);
756 free(libs);
757 libs = node;
760 for (i = 0; i < argc; i++)
761 dis_file(argv[i]);
763 dis_namelist_destroy(g_funclist);
764 dis_namelist_destroy(g_seclist);
766 return (g_error);