BR 2822263: don't re-invoke ofmt->symdef() in pass2 unless "special"
[nasm/avx512.git] / output / outmacho64.c
blobcad399de29cec3ff46e4d33fc0d7f720daa4f8af
1 /* ----------------------------------------------------------------------- *
2 *
3 * Copyright 1996-2009 The NASM Authors - All Rights Reserved
4 * See the file AUTHORS included with the NASM distribution for
5 * the specific copyright holders.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following
9 * conditions are met:
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * ----------------------------------------------------------------------- */
35 * outmacho64.c output routines for the Netwide Assembler to produce
36 * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (x86_64) object files
39 /* Most of this file is, like Mach-O itself, based on a.out. For more
40 * guidelines see outaout.c. */
42 #include "compiler.h"
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <ctype.h>
48 #include <inttypes.h>
50 #include "nasm.h"
51 #include "nasmlib.h"
52 #include "saa.h"
53 #include "raa.h"
54 #include "output/outform.h"
55 #include "output/outlib.h"
57 #if defined(OF_MACHO64)
59 /* Mach-O in-file header structure sizes */
60 #define MACHO_HEADER64_SIZE (32)
61 #define MACHO_SEGCMD64_SIZE (72)
62 #define MACHO_SECTCMD64_SIZE (80)
63 #define MACHO_SYMCMD_SIZE (24)
64 #define MACHO_NLIST64_SIZE (16)
65 #define MACHO_RELINFO64_SIZE (8)
67 /* Mach-O file header values */
68 #define MH_MAGIC_64 (0xfeedfacf)
69 #define CPU_TYPE_X86_64 (0x01000007) /* x86-64 platform */
70 #define CPU_SUBTYPE_I386_ALL (3) /* all-x86 compatible */
71 #define MH_OBJECT (0x1) /* object file */
73 #define LC_SEGMENT_64 (0x19) /* segment load command */
74 #define LC_SYMTAB (0x2) /* symbol table load command */
76 #define VM_PROT_NONE (0x00)
77 #define VM_PROT_READ (0x01)
78 #define VM_PROT_WRITE (0x02)
79 #define VM_PROT_EXECUTE (0x04)
81 #define VM_PROT_DEFAULT (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)
82 #define VM_PROT_ALL (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)
84 struct section {
85 /* nasm internal data */
86 struct section *next;
87 struct SAA *data;
88 int32_t index;
89 struct reloc *relocs;
90 int align;
92 /* data that goes into the file */
93 char sectname[16]; /* what this section is called */
94 char segname[16]; /* segment this section will be in */
95 uint64_t addr; /* in-memory address (subject to alignment) */
96 uint64_t size; /* in-memory and -file size */
97 uint32_t nreloc; /* relocation entry count */
98 uint32_t flags; /* type and attributes (masked) */
99 uint32_t extreloc; /* external relocations */
102 #define SECTION_TYPE 0x000000ff /* section type mask */
104 #define S_REGULAR (0x0) /* standard section */
105 #define S_ZEROFILL (0x1) /* zerofill, in-memory only */
107 #define SECTION_ATTRIBUTES_SYS 0x00ffff00 /* system setable attributes */
108 #define S_ATTR_SOME_INSTRUCTIONS 0x00000400 /* section contains some
109 machine instructions */
110 #define S_ATTR_EXT_RELOC 0x00000200 /* section has external
111 relocation entries */
112 #define S_ATTR_LOC_RELOC 0x00000100 /* section has local
113 relocation entries */
114 #define S_ATTR_PURE_INSTRUCTIONS 0x80000000 /* section uses pure
115 machine instructions */
117 static struct sectmap {
118 const char *nasmsect;
119 const char *segname;
120 const char *sectname;
121 const int32_t flags;
122 } sectmap[] = {
123 {".text", "__TEXT", "__text", S_REGULAR|S_ATTR_SOME_INSTRUCTIONS|S_ATTR_PURE_INSTRUCTIONS},
124 {".data", "__DATA", "__data", S_REGULAR},
125 {".rodata", "__DATA", "__const", S_REGULAR},
126 {".bss", "__DATA", "__bss", S_ZEROFILL},
127 {NULL, NULL, NULL, 0}
130 struct reloc {
131 /* nasm internal data */
132 struct reloc *next;
134 /* data that goes into the file */
135 int32_t addr; /* op's offset in section */
136 uint32_t snum:24, /* contains symbol index if
137 ** ext otherwise in-file
138 ** section number */
139 pcrel:1, /* relative relocation */
140 length:2, /* 0=byte, 1=word, 2=int32_t, 3=int64_t */
141 ext:1, /* external symbol referenced */
142 type:4; /* reloc type */
145 #define R_ABS 0 /* absolute relocation */
146 #define R_SCATTERED 0x80000000 /* reloc entry is scattered if
147 ** highest bit == 1 */
149 struct symbol {
150 /* nasm internal data */
151 struct symbol *next; /* next symbol in the list */
152 char *name; /* name of this symbol */
153 int32_t initial_snum; /* symbol number used above in
154 reloc */
155 int32_t snum; /* true snum for reloc */
157 /* data that goes into the file */
158 uint32_t strx; /* string table index */
159 uint8_t type; /* symbol type */
160 uint8_t sect; /* NO_SECT or section number */
161 uint16_t desc; /* for stab debugging, 0 for us */
162 uint64_t value; /* offset of symbol in section */
165 /* symbol type bits */
166 #define N_EXT 0x01 /* global or external symbol */
168 #define N_UNDF 0x0 /* undefined symbol | n_sect == */
169 #define N_ABS 0x2 /* absolute symbol | NO_SECT */
170 #define N_SECT 0xe /* defined symbol, n_sect holds
171 ** section number */
173 #define N_TYPE 0x0e /* type bit mask */
175 #define DEFAULT_SECTION_ALIGNMENT 0 /* byte (i.e. no) alignment */
177 /* special section number values */
178 #define NO_SECT 0 /* no section, invalid */
179 #define MAX_SECT 255 /* maximum number of sections */
181 static struct section *sects, **sectstail;
182 static struct symbol *syms, **symstail;
183 static uint32_t nsyms;
185 /* These variables are set by macho_layout_symbols() to organize
186 the symbol table and string table in order the dynamic linker
187 expects. They are then used in macho_write() to put out the
188 symbols and strings in that order.
190 The order of the symbol table is:
191 local symbols
192 defined external symbols (sorted by name)
193 undefined external symbols (sorted by name)
195 The order of the string table is:
196 strings for external symbols
197 strings for local symbols
199 static uint32_t ilocalsym = 0;
200 static uint32_t iextdefsym = 0;
201 static uint32_t iundefsym = 0;
202 static uint32_t nlocalsym;
203 static uint32_t nextdefsym;
204 static uint32_t nundefsym;
205 static struct symbol **extdefsyms = NULL;
206 static struct symbol **undefsyms = NULL;
208 static struct RAA *extsyms;
209 static struct SAA *strs;
210 static uint32_t strslen;
212 static FILE *machofp;
213 static efunc error;
214 static evalfunc evaluate;
216 extern struct ofmt of_macho64;
218 /* Global file information. This should be cleaned up into either
219 a structure or as function arguments. */
220 uint32_t head_ncmds64 = 0;
221 uint32_t head_sizeofcmds64 = 0;
222 uint64_t seg_filesize64 = 0;
223 uint64_t seg_vmsize64 = 0;
224 uint32_t seg_nsects64 = 0;
225 uint64_t rel_padcnt64 = 0;
228 #define xstrncpy(xdst, xsrc) \
229 memset(xdst, '\0', sizeof(xdst)); /* zero out whole buffer */ \
230 strncpy(xdst, xsrc, sizeof(xdst)); /* copy over string */ \
231 xdst[sizeof(xdst) - 1] = '\0'; /* proper null-termination */
233 #define align(x, y) \
234 (((x) + (y) - 1) & ~((y) - 1)) /* align x to multiple of y */
236 #define alignint32_t(x) \
237 align(x, sizeof(int32_t)) /* align x to int32_t boundary */
239 #define alignint64_t(x) \
240 align(x, sizeof(int64_t)) /* align x to int64_t boundary */
242 static void debug_reloc (struct reloc *);
243 static void debug_section_relocs (struct section *) _unused;
245 static int exact_log2 (uint32_t align)
247 if (align == 0) {
248 return 0;
249 } else if (align & (align-1)) {
250 return -1; /* Not a power of 2 */
251 } else {
252 #ifdef HAVE_GNUC_4
253 return __builtin_ctzl (align);
254 #else
255 uint32_t result = 0;
257 /* We know exactly one bit is set at this point. */
258 if (align & 0xffff0000)
259 result |= 16;
260 if (align & 0xff00ff00)
261 result |= 8;
262 if (align & 0xf0f0f0f0)
263 result |= 4;
264 if (align & 0xcccccccc)
265 result |= 2;
266 if (align & 0xaaaaaaaa)
267 result |= 1;
269 return result;
270 #endif
274 static struct section *get_section_by_name(const char *segname,
275 const char *sectname)
277 struct section *s;
279 for (s = sects; s != NULL; s = s->next)
280 if (!strcmp(s->segname, segname) && !strcmp(s->sectname, sectname))
281 break;
283 return s;
286 static struct section *get_section_by_index(const int32_t index)
288 struct section *s;
290 for (s = sects; s != NULL; s = s->next)
291 if (index == s->index)
292 break;
294 return s;
297 static int32_t get_section_index_by_name(const char *segname,
298 const char *sectname)
300 struct section *s;
302 for (s = sects; s != NULL; s = s->next)
303 if (!strcmp(s->segname, segname) && !strcmp(s->sectname, sectname))
304 return s->index;
306 return -1;
309 static char *get_section_name_by_index(const int32_t index)
311 struct section *s;
313 for (s = sects; s != NULL; s = s->next)
314 if (index == s->index)
315 return s->sectname;
317 return NULL;
320 static uint8_t get_section_fileindex_by_index(const int32_t index)
322 struct section *s;
323 uint8_t i = 1;
325 for (s = sects; s != NULL && i < MAX_SECT; s = s->next, ++i)
326 if (index == s->index)
327 return i;
329 if (i == MAX_SECT)
330 error(ERR_WARNING,
331 "too many sections (>255) - clipped by fileindex");
333 return NO_SECT;
336 static struct symbol *get_closest_section_symbol_by_offset(uint8_t fileindex, int64_t offset)
338 struct symbol *sym;
340 for (sym = syms; sym != NULL; sym = sym->next) {
341 if ((sym->sect != NO_SECT) &&
342 (sym->sect == fileindex) &&
343 ((int64_t)sym->value >= offset))
344 return sym;
347 return NULL;
352 * Special section numbers which are used to define Mach-O special
353 * symbols, which can be used with WRT to provide PIC relocation
354 * types.
356 static int32_t macho_gotpcrel_sect;
358 static void macho_init(FILE * fp, efunc errfunc, ldfunc ldef,
359 evalfunc eval)
361 char zero = 0;
363 maxbits = 64;
364 machofp = fp;
365 error = errfunc;
366 evaluate = eval;
368 (void)ldef; /* placate optimizers */
370 sects = NULL;
371 sectstail = &sects;
373 syms = NULL;
374 symstail = &syms;
375 nsyms = 0;
376 nlocalsym = 0;
377 nextdefsym = 0;
378 nundefsym = 0;
380 extsyms = raa_init();
381 strs = saa_init(1L);
383 /* string table starts with a zero byte - don't ask why */
384 saa_wbytes(strs, &zero, sizeof(char));
385 strslen = 1;
387 /* add special symbol for ..gotpcrel */
388 macho_gotpcrel_sect = seg_alloc();
389 macho_gotpcrel_sect ++;
390 ldef("..gotpcrel", macho_gotpcrel_sect, 0L, NULL, false, false, &of_macho64, error);
393 static int macho_setinfo(enum geninfo type, char **val)
395 (void)type;
396 (void)val;
397 return 0;
400 static void sect_write(struct section *sect,
401 const uint8_t *data, uint32_t len)
403 saa_wbytes(sect->data, data, len);
404 sect->size += len;
407 static int32_t add_reloc(struct section *sect, int32_t section,
408 int pcrel, int bytes, int64_t reloff)
410 struct reloc *r;
411 struct symbol *sym;
412 int32_t fi;
413 int32_t adjustment = 0;
415 /* NeXT as puts relocs in reversed order (address-wise) into the
416 ** files, so we do the same, doesn't seem to make much of a
417 ** difference either way */
418 r = nasm_malloc(sizeof(struct reloc));
419 r->next = sect->relocs;
420 sect->relocs = r;
422 /* the current end of the section will be the symbol's address for
423 ** now, might have to be fixed by macho_fixup_relocs() later on. make
424 ** sure we don't make the symbol scattered by setting the highest
425 ** bit by accident */
426 r->addr = sect->size & ~R_SCATTERED;
427 r->ext = 1;
428 r->pcrel = (pcrel ? 1 : 0);
430 /* match byte count 1, 2, 4, 8 to length codes 0, 1, 2, 3 respectively */
431 switch(bytes){
432 case 1:
433 r->length = 0;
434 break;
435 case 2:
436 r->length = 1;
437 break;
438 case 4:
439 r->length = 2;
440 break;
441 case 8:
442 r->length = 3;
443 break;
444 default:
445 break;
448 /* set default relocation values */
449 r->type = 0; // X86_64_RELOC_UNSIGNED
450 r->snum = R_ABS; // Absolute Symbol (indicates no relocation)
452 /* absolute relocation */
453 if (pcrel == 0) {
455 /* intra-section */
456 if (section == NO_SEG) {
457 // r->snum = R_ABS; // Set above
459 /* inter-section */
460 } else {
461 fi = get_section_fileindex_by_index(section);
463 /* external */
464 if (fi == NO_SECT) {
465 r->snum = raa_read(extsyms, section);
467 /* local */
468 } else {
469 sym = get_closest_section_symbol_by_offset(fi, reloff);
470 r->snum = sym->initial_snum;
471 adjustment = sym->value;
475 /* relative relocation */
476 } else if (pcrel == 1) {
478 /* intra-section */
479 if (section == NO_SEG) {
480 r->type = 1; // X86_64_RELOC_SIGNED
482 /* inter-section */
483 } else {
484 r->type = 2; // X86_64_RELOC_BRANCH
485 fi = get_section_fileindex_by_index(section);
487 /* external */
488 if (fi == NO_SECT) {
489 sect->extreloc = 1;
490 r->snum = raa_read(extsyms, section);
492 /* local */
493 } else {
494 sym = get_closest_section_symbol_by_offset(fi, reloff);
495 r->snum = sym->initial_snum;
496 adjustment = sym->value;
500 /* subtractor */
501 } else if (pcrel == 2) {
502 r->pcrel = 0;
503 r->type = 5; // X86_64_RELOC_SUBTRACTOR
505 /* gotpcrel */
506 } else if (pcrel == 3) {
507 r->type = 4; // X86_64_RELOC_GOT
508 r->snum = macho_gotpcrel_sect;
510 /* gotpcrel MOVQ load */
511 } else if (pcrel == 4) {
512 r->type = 3; // X86_64_RELOC_GOT_LOAD
513 r->snum = macho_gotpcrel_sect;
516 ++sect->nreloc;
518 return adjustment;
521 static void macho_output(int32_t secto, const void *data,
522 enum out_type type, uint64_t size,
523 int32_t section, int32_t wrt)
525 struct section *s, *sbss;
526 int64_t addr;
527 uint8_t mydata[16], *p, gotload;
529 if (secto == NO_SEG) {
530 if (type != OUT_RESERVE)
531 error(ERR_NONFATAL, "attempt to assemble code in "
532 "[ABSOLUTE] space");
534 return;
537 s = get_section_by_index(secto);
539 if (s == NULL) {
540 error(ERR_WARNING, "attempt to assemble code in"
541 " section %d: defaulting to `.text'", secto);
542 s = get_section_by_name("__TEXT", "__text");
544 /* should never happen */
545 if (s == NULL)
546 error(ERR_PANIC, "text section not found");
549 sbss = get_section_by_name("__DATA", "__bss");
551 if (s == sbss && type != OUT_RESERVE) {
552 error(ERR_WARNING, "attempt to initialize memory in the"
553 " BSS section: ignored");
554 s->size += realsize(type, size);
555 return;
558 switch (type) {
559 case OUT_RESERVE:
560 if (s != sbss) {
561 error(ERR_WARNING, "uninitialized space declared in"
562 " %s section: zeroing",
563 get_section_name_by_index(secto));
565 sect_write(s, NULL, size);
566 } else
567 s->size += size;
569 break;
571 case OUT_RAWDATA:
572 if (section != NO_SEG)
573 error(ERR_PANIC, "OUT_RAWDATA with other than NO_SEG");
575 sect_write(s, data, size);
576 break;
578 case OUT_ADDRESS:
579 addr = *(int64_t *)data;
580 if (section != NO_SEG) {
581 if (section % 2) {
582 error(ERR_NONFATAL, "Mach-O format does not support"
583 " section base references");
584 } else {
585 if (wrt == NO_SEG) {
586 if (size < 8) {
587 error(ERR_NONFATAL, "Mach-O 64-bit format does not support"
588 " 32-bit absolute addresses");
590 Seemingly, Mach-O's X86_64_RELOC_SUBTRACTOR would require
591 pre-determined knowledge of where the image base would be,
592 making it impractical for use in intermediate object files
594 } else {
595 addr -= add_reloc(s, section, 0, size, addr); // X86_64_RELOC_UNSIGNED
597 } else {
598 error(ERR_NONFATAL, "Mach-O format does not support"
599 " this use of WRT");
604 p = mydata;
605 WRITEADDR(p, addr, size);
606 sect_write(s, mydata, size);
607 break;
609 case OUT_REL2ADR:
610 p = mydata;
611 WRITESHORT(p, *(int64_t *)data);
613 if (section == secto)
614 error(ERR_PANIC, "intra-section OUT_REL2ADR");
616 if (section == NO_SEG) {
617 /* Do nothing */
618 } else if (section % 2) {
619 error(ERR_NONFATAL, "Mach-O format does not support"
620 " section base references");
621 } else {
622 error(ERR_NONFATAL, "Unsupported non-32-bit"
623 " Macho-O relocation [2]");
626 sect_write(s, mydata, 2L);
627 break;
629 case OUT_REL4ADR:
630 p = mydata;
631 WRITELONG(p, *(int64_t *)data);
633 if (section == secto)
634 error(ERR_PANIC, "intra-section OUT_REL4ADR");
636 if (section != NO_SEG && section % 2) {
637 error(ERR_NONFATAL, "Mach-O format does not support"
638 " section base references");
639 } else {
640 if (wrt == NO_SEG) {
641 *mydata -= add_reloc(s, section, 1, 4, (int64_t)*mydata); // X86_64_RELOC_SIGNED/BRANCH
642 } else if (wrt == macho_gotpcrel_sect) {
643 if (s->data->datalen > 1) {
644 saa_fread(s->data, s->data->datalen-2, &gotload, 1); // Retrieve Instruction Opcode
645 } else {
646 gotload = 0;
648 if (gotload == 0x8B) { // Check for MOVQ Opcode
649 *mydata -= add_reloc(s, section, 4, 4, (int64_t)*mydata); // X86_64_GOT_LOAD (MOVQ load)
650 } else {
651 *mydata -= add_reloc(s, section, 3, 4, (int64_t)*mydata); // X86_64_GOT
653 } else {
654 error(ERR_NONFATAL, "Mach-O format does not support"
655 " this use of WRT");
656 wrt = NO_SEG; /* we can at least _try_ to continue */
660 sect_write(s, mydata, 4L);
661 break;
663 default:
664 error(ERR_PANIC, "unknown output type?");
665 break;
669 static int32_t macho_section(char *name, int pass, int *bits)
671 int32_t index, originalIndex;
672 char *sectionAttributes;
673 struct sectmap *sm;
674 struct section *s;
676 (void)pass;
678 /* Default to 64 bits. */
679 if (!name) {
680 *bits = 64;
681 name = ".text";
682 sectionAttributes = NULL;
683 } else {
684 sectionAttributes = name;
685 name = nasm_strsep(&sectionAttributes, " \t");
688 for (sm = sectmap; sm->nasmsect != NULL; ++sm) {
689 /* make lookup into section name translation table */
690 if (!strcmp(name, sm->nasmsect)) {
691 char *currentAttribute;
693 /* try to find section with that name */
694 originalIndex = index = get_section_index_by_name(sm->segname,
695 sm->sectname);
697 /* create it if it doesn't exist yet */
698 if (index == -1) {
699 s = *sectstail = nasm_malloc(sizeof(struct section));
700 s->next = NULL;
701 sectstail = &s->next;
703 s->data = saa_init(1L);
704 s->index = seg_alloc();
705 s->relocs = NULL;
706 s->align = -1;
708 xstrncpy(s->segname, sm->segname);
709 xstrncpy(s->sectname, sm->sectname);
710 s->size = 0;
711 s->nreloc = 0;
712 s->flags = sm->flags;
714 index = s->index;
715 } else {
716 s = get_section_by_index(index);
719 while ((NULL != sectionAttributes)
720 && (currentAttribute = nasm_strsep(&sectionAttributes, " \t"))) {
721 if (0 != *currentAttribute) {
722 if (!nasm_strnicmp("align=", currentAttribute, 6)) {
723 char *end;
724 int newAlignment, value;
726 value = strtoul(currentAttribute + 6, (char**)&end, 0);
727 newAlignment = exact_log2(value);
729 if (0 != *end) {
730 error(ERR_PANIC,
731 "unknown or missing alignment value \"%s\" "
732 "specified for section \"%s\"",
733 currentAttribute + 6,
734 name);
735 return NO_SEG;
736 } else if (0 > newAlignment) {
737 error(ERR_PANIC,
738 "alignment of %d (for section \"%s\") is not "
739 "a power of two",
740 value,
741 name);
742 return NO_SEG;
745 if ((-1 != originalIndex)
746 && (s->align != newAlignment)
747 && (s->align != -1)) {
748 error(ERR_PANIC,
749 "section \"%s\" has already been specified "
750 "with alignment %d, conflicts with new "
751 "alignment of %d",
752 name,
753 (1 << s->align),
754 value);
755 return NO_SEG;
758 s->align = newAlignment;
759 } else if (!nasm_stricmp("data", currentAttribute)) {
760 /* Do nothing; 'data' is implicit */
761 } else {
762 error(ERR_PANIC,
763 "unknown section attribute %s for section %s",
764 currentAttribute,
765 name);
766 return NO_SEG;
771 return index;
775 error(ERR_PANIC, "invalid section name %s", name);
776 return NO_SEG;
779 static void macho_symdef(char *name, int32_t section, int64_t offset,
780 int is_global, char *special)
782 struct symbol *sym;
784 if (special) {
785 error(ERR_NONFATAL, "The Mach-O output format does "
786 "not support any special symbol types");
787 return;
790 if (is_global == 3) {
791 error(ERR_NONFATAL, "The Mach-O format does not "
792 "(yet) support forward reference fixups.");
793 return;
796 if (name[0] == '.' && name[1] == '.' && name[2] != '@') {
798 * This is a NASM special symbol. We never allow it into
799 * the Macho-O symbol table, even if it's a valid one. If it
800 * _isn't_ a valid one, we should barf immediately.
802 if (strcmp(name, "..gotpcrel"))
803 error(ERR_NONFATAL, "unrecognized special symbol `%s'", name);
804 return;
807 sym = *symstail = nasm_malloc(sizeof(struct symbol));
808 sym->next = NULL;
809 symstail = &sym->next;
811 sym->name = name;
812 sym->strx = strslen;
813 sym->type = 0;
814 sym->desc = 0;
815 sym->value = offset;
816 sym->initial_snum = -1;
818 /* external and common symbols get N_EXT */
819 if (is_global != 0) {
820 sym->type |= N_EXT;
823 if (section == NO_SEG) {
824 /* symbols in no section get absolute */
825 sym->type |= N_ABS;
826 sym->sect = NO_SECT;
827 } else {
828 sym->type |= N_SECT;
830 /* get the in-file index of the section the symbol was defined in */
831 sym->sect = get_section_fileindex_by_index(section);
833 /* track the initially allocated symbol number for use in future fix-ups */
834 sym->initial_snum = nsyms;
836 if (sym->sect == NO_SECT) {
838 /* remember symbol number of references to external
839 ** symbols, this works because every external symbol gets
840 ** its own section number allocated internally by nasm and
841 ** can so be used as a key */
842 extsyms = raa_write(extsyms, section, nsyms);
844 switch (is_global) {
845 case 1:
846 case 2:
847 /* there isn't actually a difference between global
848 ** and common symbols, both even have their size in
849 ** sym->value */
850 sym->type = N_EXT;
851 break;
853 default:
854 /* give an error on unfound section if it's not an
855 ** external or common symbol (assemble_file() does a
856 ** seg_alloc() on every call for them) */
857 error(ERR_PANIC, "in-file index for section %d not found",
858 section);
862 ++nsyms;
865 static int32_t macho_segbase(int32_t section)
867 return section;
870 static int macho_directive(char *directive, char *value, int pass)
872 (void)directive;
873 (void)value;
874 (void)pass;
875 return 0;
878 static void macho_filename(char *inname, char *outname, efunc error)
880 standard_extension(inname, outname, ".o", error);
883 extern macros_t macho_stdmac[];
885 /* Comparison function for qsort symbol layout. */
886 static int layout_compare (const struct symbol **s1,
887 const struct symbol **s2)
889 return (strcmp ((*s1)->name, (*s2)->name));
892 /* The native assembler does a few things in a similar function
894 * Remove temporary labels
895 * Sort symbols according to local, external, undefined (by name)
896 * Order the string table
898 We do not remove temporary labels right now.
900 numsyms is the total number of symbols we have. strtabsize is the
901 number entries in the string table. */
903 static void macho_layout_symbols (uint32_t *numsyms,
904 uint32_t *strtabsize)
906 struct symbol *sym, **symp;
907 uint32_t i,j;
909 *numsyms = 0;
910 *strtabsize = sizeof (char);
912 symp = &syms;
914 while ((sym = *symp)) {
915 /* Undefined symbols are now external. */
916 if (sym->type == N_UNDF)
917 sym->type |= N_EXT;
919 if ((sym->type & N_EXT) == 0) {
920 sym->snum = *numsyms;
921 *numsyms = *numsyms + 1;
922 nlocalsym++;
924 else {
925 if ((sym->type & N_TYPE) != N_UNDF) {
926 nextdefsym++;
927 } else {
928 nundefsym++;
931 /* If we handle debug info we'll want
932 to check for it here instead of just
933 adding the symbol to the string table. */
934 sym->strx = *strtabsize;
935 saa_wbytes (strs, sym->name, (int32_t)(strlen(sym->name) + 1));
936 *strtabsize += strlen(sym->name) + 1;
938 symp = &(sym->next);
941 /* Next, sort the symbols. Most of this code is a direct translation from
942 the Apple cctools symbol layout. We need to keep compatibility with that. */
943 /* Set the indexes for symbol groups into the symbol table */
944 ilocalsym = 0;
945 iextdefsym = nlocalsym;
946 iundefsym = nlocalsym + nextdefsym;
948 /* allocate arrays for sorting externals by name */
949 extdefsyms = nasm_malloc(nextdefsym * sizeof(struct symbol *));
950 undefsyms = nasm_malloc(nundefsym * sizeof(struct symbol *));
952 i = 0;
953 j = 0;
955 symp = &syms;
957 while ((sym = *symp)) {
959 if((sym->type & N_EXT) == 0) {
960 sym->strx = *strtabsize;
961 saa_wbytes (strs, sym->name, (int32_t)(strlen (sym->name) + 1));
962 *strtabsize += strlen(sym->name) + 1;
964 else {
965 if((sym->type & N_TYPE) != N_UNDF) {
966 extdefsyms[i++] = sym;
967 } else {
968 undefsyms[j++] = sym;
971 symp = &(sym->next);
974 qsort(extdefsyms, nextdefsym, sizeof(struct symbol *),
975 (int (*)(const void *, const void *))layout_compare);
976 qsort(undefsyms, nundefsym, sizeof(struct symbol *),
977 (int (*)(const void *, const void *))layout_compare);
979 for(i = 0; i < nextdefsym; i++) {
980 extdefsyms[i]->snum = *numsyms;
981 *numsyms += 1;
983 for(j = 0; j < nundefsym; j++) {
984 undefsyms[j]->snum = *numsyms;
985 *numsyms += 1;
989 /* Calculate some values we'll need for writing later. */
991 static void macho_calculate_sizes (void)
993 struct section *s;
995 /* count sections and calculate in-memory and in-file offsets */
996 for (s = sects; s != NULL; s = s->next) {
997 uint64_t pad = 0;
999 /* zerofill sections aren't actually written to the file */
1000 if ((s->flags & SECTION_TYPE) != S_ZEROFILL)
1001 seg_filesize64 += s->size;
1003 /* recalculate segment address based on alignment and vm size */
1004 s->addr = seg_vmsize64;
1005 /* we need section alignment to calculate final section address */
1006 if (s->align == -1)
1007 s->align = DEFAULT_SECTION_ALIGNMENT;
1008 if(s->align) {
1009 uint64_t newaddr = align(s->addr, 1 << s->align);
1010 pad = newaddr - s->addr;
1011 s->addr = newaddr;
1014 seg_vmsize64 += s->size + pad;
1015 ++seg_nsects64;
1018 /* calculate size of all headers, load commands and sections to
1019 ** get a pointer to the start of all the raw data */
1020 if (seg_nsects64 > 0) {
1021 ++head_ncmds64;
1022 head_sizeofcmds64 +=
1023 MACHO_SEGCMD64_SIZE + seg_nsects64 * MACHO_SECTCMD64_SIZE;
1026 if (nsyms > 0) {
1027 ++head_ncmds64;
1028 head_sizeofcmds64 += MACHO_SYMCMD_SIZE;
1032 /* Write out the header information for the file. */
1034 static void macho_write_header (void)
1036 fwriteint32_t(MH_MAGIC_64, machofp); /* magic */
1037 fwriteint32_t(CPU_TYPE_X86_64, machofp); /* CPU type */
1038 fwriteint32_t(CPU_SUBTYPE_I386_ALL, machofp); /* CPU subtype */
1039 fwriteint32_t(MH_OBJECT, machofp); /* Mach-O file type */
1040 fwriteint32_t(head_ncmds64, machofp); /* number of load commands */
1041 fwriteint32_t(head_sizeofcmds64, machofp); /* size of load commands */
1042 fwriteint32_t(0, machofp); /* no flags */
1043 fwriteint32_t(0, machofp); /* reserved for future use */
1046 /* Write out the segment load command at offset. */
1048 static uint32_t macho_write_segment (uint64_t offset)
1050 uint64_t rel_base = alignint64_t (offset + seg_filesize64);
1051 uint32_t s_reloff = 0;
1052 struct section *s;
1054 fwriteint32_t(LC_SEGMENT_64, machofp); /* cmd == LC_SEGMENT_64 */
1056 /* size of load command including section load commands */
1057 fwriteint32_t(MACHO_SEGCMD64_SIZE + seg_nsects64 *
1058 MACHO_SECTCMD64_SIZE, machofp);
1060 /* in an MH_OBJECT file all sections are in one unnamed (name
1061 ** all zeros) segment */
1062 fwritezero(16, machofp);
1063 fwriteint64_t(0, machofp); /* in-memory offset */
1064 fwriteint64_t(seg_vmsize64, machofp); /* in-memory size */
1065 fwriteint64_t(offset, machofp); /* in-file offset to data */
1066 fwriteint64_t(seg_filesize64, machofp); /* in-file size */
1067 fwriteint32_t(VM_PROT_DEFAULT, machofp); /* maximum vm protection */
1068 fwriteint32_t(VM_PROT_DEFAULT, machofp); /* initial vm protection */
1069 fwriteint32_t(seg_nsects64, machofp); /* number of sections */
1070 fwriteint32_t(0, machofp); /* no flags */
1072 /* emit section headers */
1073 for (s = sects; s != NULL; s = s->next) {
1074 fwrite(s->sectname, sizeof(s->sectname), 1, machofp);
1075 fwrite(s->segname, sizeof(s->segname), 1, machofp);
1076 fwriteint64_t(s->addr, machofp);
1077 fwriteint64_t(s->size, machofp);
1079 /* dummy data for zerofill sections or proper values */
1080 if ((s->flags & SECTION_TYPE) != S_ZEROFILL) {
1081 fwriteint32_t(offset, machofp);
1082 /* Write out section alignment, as a power of two.
1083 e.g. 32-bit word alignment would be 2 (2^2 = 4). */
1084 if (s->align == -1)
1085 s->align = DEFAULT_SECTION_ALIGNMENT;
1086 fwriteint32_t(s->align, machofp);
1087 /* To be compatible with cctools as we emit
1088 a zero reloff if we have no relocations. */
1089 fwriteint32_t(s->nreloc ? rel_base + s_reloff : 0, machofp);
1090 fwriteint32_t(s->nreloc, machofp);
1092 offset += s->size;
1093 s_reloff += s->nreloc * MACHO_RELINFO64_SIZE;
1094 } else {
1095 fwriteint32_t(0, machofp);
1096 fwriteint32_t(0, machofp);
1097 fwriteint32_t(0, machofp);
1098 fwriteint32_t(0, machofp);
1101 if (s->nreloc) {
1102 s->flags |= S_ATTR_LOC_RELOC;
1103 if (s->extreloc)
1104 s->flags |= S_ATTR_EXT_RELOC;
1107 fwriteint32_t(s->flags, machofp); /* flags */
1108 fwriteint32_t(0, machofp); /* reserved */
1109 fwriteint32_t(0, machofp); /* reserved */
1111 fwriteint32_t(0, machofp); /* align */
1114 rel_padcnt64 = rel_base - offset;
1115 offset = rel_base + s_reloff;
1117 return offset;
1120 /* For a given chain of relocs r, write out the entire relocation
1121 chain to the object file. */
1123 static void macho_write_relocs (struct reloc *r)
1125 while (r) {
1126 uint32_t word2;
1128 fwriteint32_t(r->addr, machofp); /* reloc offset */
1130 word2 = r->snum;
1131 word2 |= r->pcrel << 24;
1132 word2 |= r->length << 25;
1133 word2 |= r->ext << 27;
1134 word2 |= r->type << 28;
1135 fwriteint32_t(word2, machofp); /* reloc data */
1136 r = r->next;
1140 /* Write out the section data. */
1141 static void macho_write_section (void)
1143 struct section *s, *s2;
1144 struct reloc *r;
1145 uint8_t fi, *p, *q, blk[8];
1146 int32_t len;
1147 int64_t l;
1149 for (s = sects; s != NULL; s = s->next) {
1150 if ((s->flags & SECTION_TYPE) == S_ZEROFILL)
1151 continue;
1153 /* no padding needs to be done to the sections */
1155 /* Like a.out Mach-O references things in the data or bss
1156 * sections by addresses which are actually relative to the
1157 * start of the _text_ section, in the _file_. See outaout.c
1158 * for more information. */
1159 saa_rewind(s->data);
1160 for (r = s->relocs; r != NULL; r = r->next) {
1161 len = (int32_t)r->length << 1;
1162 if(len > 4) len = 8;
1163 saa_fread(s->data, r->addr, blk, len);
1164 p = q = blk;
1165 l = *p++;
1167 /* get offset based on relocation type */
1168 if (r->length > 0) {
1169 l += ((int64_t)*p++) << 8;
1171 if (r->length > 1) {
1172 l += ((int64_t)*p++) << 16;
1173 l += ((int64_t)*p++) << 24;
1176 if (r->length > 2) {
1177 l += ((int64_t)*p++) << 32;
1178 l += ((int64_t)*p++) << 40;
1179 l += ((int64_t)*p++) << 48;
1180 l += ((int64_t)*p++) << 56;
1186 /* If the relocation is internal add to the current section
1187 offset. Otherwise the only value we need is the symbol
1188 offset which we already have. The linker takes care
1189 of the rest of the address. */
1190 if (!r->ext) {
1191 /* generate final address by section address and offset */
1192 for (s2 = sects, fi = 1;
1193 s2 != NULL; s2 = s2->next, fi++) {
1194 if (fi == r->snum) {
1195 l += s2->addr;
1196 break;
1201 /* write new offset back */
1202 if (r->length == 3)
1203 WRITEDLONG(q, l);
1204 else if (r->length == 2)
1205 WRITELONG(q, l);
1206 else if (r->length == 1)
1207 WRITESHORT(q, l);
1208 else
1209 *q++ = l & 0xFF;
1211 saa_fwrite(s->data, r->addr, blk, len);
1214 /* dump the section data to file */
1215 saa_fpwrite(s->data, machofp);
1218 /* pad last section up to reloc entries on int64_t boundary */
1219 fwritezero(rel_padcnt64, machofp);
1221 /* emit relocation entries */
1222 for (s = sects; s != NULL; s = s->next)
1223 macho_write_relocs (s->relocs);
1226 /* Write out the symbol table. We should already have sorted this
1227 before now. */
1228 static void macho_write_symtab (void)
1230 struct symbol *sym;
1231 struct section *s;
1232 int64_t fi;
1233 uint64_t i;
1235 /* we don't need to pad here since MACHO_RELINFO_SIZE == 8 */
1237 for (sym = syms; sym != NULL; sym = sym->next) {
1238 if ((sym->type & N_EXT) == 0) {
1239 fwriteint32_t(sym->strx, machofp); /* string table entry number */
1240 fwrite(&sym->type, 1, 1, machofp); /* symbol type */
1241 fwrite(&sym->sect, 1, 1, machofp); /* section */
1242 fwriteint16_t(sym->desc, machofp); /* description */
1244 /* Fix up the symbol value now that we know the final section
1245 sizes. */
1246 if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
1247 for (s = sects, fi = 1;
1248 s != NULL && fi < sym->sect; s = s->next, ++fi)
1249 sym->value += s->size;
1252 fwriteint64_t(sym->value, machofp); /* value (i.e. offset) */
1256 for (i = 0; i < nextdefsym; i++) {
1257 sym = extdefsyms[i];
1258 fwriteint32_t(sym->strx, machofp);
1259 fwrite(&sym->type, 1, 1, machofp); /* symbol type */
1260 fwrite(&sym->sect, 1, 1, machofp); /* section */
1261 fwriteint16_t(sym->desc, machofp); /* description */
1263 /* Fix up the symbol value now that we know the final section
1264 sizes. */
1265 if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
1266 for (s = sects, fi = 1;
1267 s != NULL && fi < sym->sect; s = s->next, ++fi)
1268 sym->value += s->size;
1271 fwriteint64_t(sym->value, machofp); /* value (i.e. offset) */
1274 for (i = 0; i < nundefsym; i++) {
1275 sym = undefsyms[i];
1276 fwriteint32_t(sym->strx, machofp);
1277 fwrite(&sym->type, 1, 1, machofp); /* symbol type */
1278 fwrite(&sym->sect, 1, 1, machofp); /* section */
1279 fwriteint16_t(sym->desc, machofp); /* description */
1281 // Fix up the symbol value now that we know the final section sizes.
1282 if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
1283 for (s = sects, fi = 1;
1284 s != NULL && fi < sym->sect; s = s->next, ++fi)
1285 sym->value += s->size;
1288 fwriteint64_t(sym->value, machofp); // value (i.e. offset)
1293 /* Fixup the snum in the relocation entries, we should be
1294 doing this only for externally referenced symbols. */
1295 static void macho_fixup_relocs (struct reloc *r)
1297 struct symbol *sym;
1299 while (r != NULL) {
1300 if (r->ext) {
1301 for (sym = syms; sym != NULL; sym = sym->next) {
1302 if (sym->initial_snum == r->snum) {
1303 r->snum = sym->snum;
1304 break;
1308 r = r->next;
1312 /* Write out the object file. */
1314 static void macho_write (void)
1316 uint64_t offset = 0;
1318 /* mach-o object file structure:
1320 ** mach header
1321 ** uint32_t magic
1322 ** int cpu type
1323 ** int cpu subtype
1324 ** uint32_t mach file type
1325 ** uint32_t number of load commands
1326 ** uint32_t size of all load commands
1327 ** (includes section struct size of segment command)
1328 ** uint32_t flags
1330 ** segment command
1331 ** uint32_t command type == LC_SEGMENT_64
1332 ** uint32_t size of load command
1333 ** (including section load commands)
1334 ** char[16] segment name
1335 ** uint64_t in-memory offset
1336 ** uint64_t in-memory size
1337 ** uint64_t in-file offset to data area
1338 ** uint64_t in-file size
1339 ** (in-memory size excluding zerofill sections)
1340 ** int maximum vm protection
1341 ** int initial vm protection
1342 ** uint32_t number of sections
1343 ** uint32_t flags
1345 ** section commands
1346 ** char[16] section name
1347 ** char[16] segment name
1348 ** uint64_t in-memory offset
1349 ** uint64_t in-memory size
1350 ** uint32_t in-file offset
1351 ** uint32_t alignment
1352 ** (irrelevant in MH_OBJECT)
1353 ** uint32_t in-file offset of relocation entires
1354 ** uint32_t number of relocations
1355 ** uint32_t flags
1356 ** uint32_t reserved
1357 ** uint32_t reserved
1359 ** symbol table command
1360 ** uint32_t command type == LC_SYMTAB
1361 ** uint32_t size of load command
1362 ** uint32_t symbol table offset
1363 ** uint32_t number of symbol table entries
1364 ** uint32_t string table offset
1365 ** uint32_t string table size
1367 ** raw section data
1369 ** padding to int64_t boundary
1371 ** relocation data (struct reloc)
1372 ** int32_t offset
1373 ** uint data (symbolnum, pcrel, length, extern, type)
1375 ** symbol table data (struct nlist)
1376 ** int32_t string table entry number
1377 ** uint8_t type
1378 ** (extern, absolute, defined in section)
1379 ** uint8_t section
1380 ** (0 for global symbols, section number of definition (>= 1, <=
1381 ** 254) for local symbols, size of variable for common symbols
1382 ** [type == extern])
1383 ** int16_t description
1384 ** (for stab debugging format)
1385 ** uint64_t value (i.e. file offset) of symbol or stab offset
1387 ** string table data
1388 ** list of null-terminated strings
1391 /* Emit the Mach-O header. */
1392 macho_write_header();
1394 offset = MACHO_HEADER64_SIZE + head_sizeofcmds64;
1396 /* emit the segment load command */
1397 if (seg_nsects64 > 0)
1398 offset = macho_write_segment (offset);
1399 else
1400 error(ERR_WARNING, "no sections?");
1402 if (nsyms > 0) {
1403 /* write out symbol command */
1404 fwriteint32_t(LC_SYMTAB, machofp); /* cmd == LC_SYMTAB */
1405 fwriteint32_t(MACHO_SYMCMD_SIZE, machofp); /* size of load command */
1406 fwriteint32_t(offset, machofp); /* symbol table offset */
1407 fwriteint32_t(nsyms, machofp); /* number of symbol
1408 ** table entries */
1410 offset += nsyms * MACHO_NLIST64_SIZE;
1411 fwriteint32_t(offset, machofp); /* string table offset */
1412 fwriteint32_t(strslen, machofp); /* string table size */
1415 /* emit section data */
1416 if (seg_nsects64 > 0)
1417 macho_write_section ();
1419 /* emit symbol table if we have symbols */
1420 if (nsyms > 0)
1421 macho_write_symtab ();
1423 /* we don't need to pad here since MACHO_NLIST64_SIZE == 16 */
1425 /* emit string table */
1426 saa_fpwrite(strs, machofp);
1428 /* We do quite a bit here, starting with finalizing all of the data
1429 for the object file, writing, and then freeing all of the data from
1430 the file. */
1432 static void macho_cleanup(int debuginfo)
1434 struct section *s;
1435 struct reloc *r;
1436 struct symbol *sym;
1438 (void)debuginfo;
1440 /* Sort all symbols. */
1441 macho_layout_symbols (&nsyms, &strslen);
1443 /* Fixup relocation entries */
1444 for (s = sects; s != NULL; s = s->next) {
1445 macho_fixup_relocs (s->relocs);
1448 /* First calculate and finalize needed values. */
1449 macho_calculate_sizes();
1450 macho_write();
1452 /* done - yay! */
1453 fclose(machofp);
1455 /* free up everything */
1456 while (sects->next) {
1457 s = sects;
1458 sects = sects->next;
1460 saa_free(s->data);
1461 while (s->relocs != NULL) {
1462 r = s->relocs;
1463 s->relocs = s->relocs->next;
1464 nasm_free(r);
1467 nasm_free(s);
1470 saa_free(strs);
1471 raa_free(extsyms);
1473 if (syms) {
1474 while (syms->next) {
1475 sym = syms;
1476 syms = syms->next;
1478 nasm_free (sym);
1483 /* Debugging routines. */
1484 static void debug_reloc (struct reloc *r)
1486 fprintf (stdout, "reloc:\n");
1487 fprintf (stdout, "\taddr: %"PRId32"\n", r->addr);
1488 fprintf (stdout, "\tsnum: %d\n", r->snum);
1489 fprintf (stdout, "\tpcrel: %d\n", r->pcrel);
1490 fprintf (stdout, "\tlength: %d\n", r->length);
1491 fprintf (stdout, "\text: %d\n", r->ext);
1492 fprintf (stdout, "\ttype: %d\n", r->type);
1495 static void debug_section_relocs (struct section *s)
1497 struct reloc *r = s->relocs;
1499 fprintf (stdout, "relocs for section %s:\n\n", s->sectname);
1501 while (r != NULL) {
1502 debug_reloc (r);
1503 r = r->next;
1507 struct ofmt of_macho64 = {
1508 "NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (x86_64) object files",
1509 "macho64",
1511 null_debug_arr,
1512 &null_debug_form,
1513 macho_stdmac,
1514 macho_init,
1515 macho_setinfo,
1516 macho_output,
1517 macho_symdef,
1518 macho_section,
1519 macho_segbase,
1520 macho_directive,
1521 macho_filename,
1522 macho_cleanup
1525 #endif
1528 * Local Variables:
1529 * mode:c
1530 * c-basic-offset:4
1531 * End:
1533 * end of file */