Initial commit
[nasm/github.git] / output / outmac64.c
blobc07dcbc9a4fa9ffc407f46c689bd66ee4b0a2e44
1 /* ----------------------------------------------------------------------- *
2 *
3 * Copyright 1996-2013 The NASM Authors - All Rights Reserved
4 * See the file AUTHORS included with the NASM distribution for
5 * the specific copyright holders.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following
9 * conditions are met:
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * ----------------------------------------------------------------------- */
35 * outmac64.c output routines for the Netwide Assembler to produce
36 * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (x86_64) object files
39 /* Most of this file is, like Mach-O itself, based on a.out. For more
40 * guidelines see outaout.c. */
42 #include "compiler.h"
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <ctype.h>
48 #include <inttypes.h>
50 #include "nasm.h"
51 #include "nasmlib.h"
52 #include "saa.h"
53 #include "raa.h"
54 #include "output/outform.h"
55 #include "output/outlib.h"
57 #if defined(OF_MACHO64)
59 /* Mach-O in-file header structure sizes */
60 #define MACHO_HEADER64_SIZE (32)
61 #define MACHO_SEGCMD64_SIZE (72)
62 #define MACHO_SECTCMD64_SIZE (80)
63 #define MACHO_SYMCMD_SIZE (24)
64 #define MACHO_NLIST64_SIZE (16)
65 #define MACHO_RELINFO64_SIZE (8)
67 /* Mach-O file header values */
68 #define MH_MAGIC_64 (0xfeedfacf)
69 #define CPU_TYPE_X86_64 (0x01000007) /* x86-64 platform */
70 #define CPU_SUBTYPE_I386_ALL (3) /* all-x86 compatible */
71 #define MH_OBJECT (0x1) /* object file */
73 #define LC_SEGMENT_64 (0x19) /* segment load command */
74 #define LC_SYMTAB (0x2) /* symbol table load command */
76 #define VM_PROT_NONE (0x00)
77 #define VM_PROT_READ (0x01)
78 #define VM_PROT_WRITE (0x02)
79 #define VM_PROT_EXECUTE (0x04)
81 #define VM_PROT_DEFAULT (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)
82 #define VM_PROT_ALL (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)
84 struct section {
85 /* nasm internal data */
86 struct section *next;
87 struct SAA *data;
88 int32_t index;
89 struct reloc *relocs;
90 int align;
92 /* data that goes into the file */
93 char sectname[16]; /* what this section is called */
94 char segname[16]; /* segment this section will be in */
95 uint64_t addr; /* in-memory address (subject to alignment) */
96 uint64_t size; /* in-memory and -file size */
97 uint32_t nreloc; /* relocation entry count */
98 uint32_t flags; /* type and attributes (masked) */
99 uint32_t extreloc; /* external relocations */
102 #define SECTION_TYPE 0x000000ff /* section type mask */
104 #define S_REGULAR (0x0) /* standard section */
105 #define S_ZEROFILL (0x1) /* zerofill, in-memory only */
107 #define SECTION_ATTRIBUTES_SYS 0x00ffff00 /* system setable attributes */
108 #define S_ATTR_SOME_INSTRUCTIONS 0x00000400 /* section contains some
109 machine instructions */
110 #define S_ATTR_EXT_RELOC 0x00000200 /* section has external
111 relocation entries */
112 #define S_ATTR_LOC_RELOC 0x00000100 /* section has local
113 relocation entries */
114 #define S_ATTR_PURE_INSTRUCTIONS 0x80000000 /* section uses pure
115 machine instructions */
117 static struct sectmap {
118 const char *nasmsect;
119 const char *segname;
120 const char *sectname;
121 const int32_t flags;
122 } sectmap[] = {
123 {".text", "__TEXT", "__text", S_REGULAR|S_ATTR_SOME_INSTRUCTIONS|S_ATTR_PURE_INSTRUCTIONS},
124 {".data", "__DATA", "__data", S_REGULAR},
125 {".rodata", "__DATA", "__const", S_REGULAR},
126 {".bss", "__DATA", "__bss", S_ZEROFILL},
127 {NULL, NULL, NULL, 0}
130 struct reloc {
131 /* nasm internal data */
132 struct reloc *next;
134 /* data that goes into the file */
135 int32_t addr; /* op's offset in section */
136 uint32_t snum:24, /* contains symbol index if
137 ** ext otherwise in-file
138 ** section number */
139 pcrel:1, /* relative relocation */
140 length:2, /* 0=byte, 1=word, 2=int32_t, 3=int64_t */
141 ext:1, /* external symbol referenced */
142 type:4; /* reloc type */
145 #define R_ABS 0 /* absolute relocation */
146 #define R_SCATTERED 0x80000000 /* reloc entry is scattered if
147 ** highest bit == 1 */
149 struct symbol {
150 /* nasm internal data */
151 struct symbol *next; /* next symbol in the list */
152 char *name; /* name of this symbol */
153 int32_t initial_snum; /* symbol number used above in
154 reloc */
155 int32_t snum; /* true snum for reloc */
157 /* data that goes into the file */
158 uint32_t strx; /* string table index */
159 uint8_t type; /* symbol type */
160 uint8_t sect; /* NO_SECT or section number */
161 uint16_t desc; /* for stab debugging, 0 for us */
162 uint64_t value; /* offset of symbol in section */
165 /* symbol type bits */
166 #define N_EXT 0x01 /* global or external symbol */
168 #define N_UNDF 0x0 /* undefined symbol | n_sect == */
169 #define N_ABS 0x2 /* absolute symbol | NO_SECT */
170 #define N_SECT 0xe /* defined symbol, n_sect holds
171 ** section number */
173 #define N_TYPE 0x0e /* type bit mask */
175 #define DEFAULT_SECTION_ALIGNMENT 0 /* byte (i.e. no) alignment */
177 /* special section number values */
178 #define NO_SECT 0 /* no section, invalid */
179 #define MAX_SECT 255 /* maximum number of sections */
181 static struct section *sects, **sectstail;
182 static struct symbol *syms, **symstail;
183 static uint32_t nsyms;
185 /* These variables are set by macho_layout_symbols() to organize
186 the symbol table and string table in order the dynamic linker
187 expects. They are then used in macho_write() to put out the
188 symbols and strings in that order.
190 The order of the symbol table is:
191 local symbols
192 defined external symbols (sorted by name)
193 undefined external symbols (sorted by name)
195 The order of the string table is:
196 strings for external symbols
197 strings for local symbols
199 static uint32_t ilocalsym = 0;
200 static uint32_t iextdefsym = 0;
201 static uint32_t iundefsym = 0;
202 static uint32_t nlocalsym;
203 static uint32_t nextdefsym;
204 static uint32_t nundefsym;
205 static struct symbol **extdefsyms = NULL;
206 static struct symbol **undefsyms = NULL;
208 static struct RAA *extsyms;
209 static struct SAA *strs;
210 static uint32_t strslen;
212 extern struct ofmt of_macho64;
214 /* Global file information. This should be cleaned up into either
215 a structure or as function arguments. */
216 uint32_t head_ncmds64 = 0;
217 uint32_t head_sizeofcmds64 = 0;
218 uint64_t seg_filesize64 = 0;
219 uint64_t seg_vmsize64 = 0;
220 uint32_t seg_nsects64 = 0;
221 uint64_t rel_padcnt64 = 0;
224 #define xstrncpy(xdst, xsrc) \
225 memset(xdst, '\0', sizeof(xdst)); /* zero out whole buffer */ \
226 strncpy(xdst, xsrc, sizeof(xdst)); /* copy over string */ \
227 xdst[sizeof(xdst) - 1] = '\0'; /* proper null-termination */
229 #define alignint32_t(x) \
230 ALIGN(x, sizeof(int32_t)) /* align x to int32_t boundary */
232 #define alignint64_t(x) \
233 ALIGN(x, sizeof(int64_t)) /* align x to int64_t boundary */
235 static void debug_reloc (struct reloc *);
236 static void debug_section_relocs (struct section *) _unused;
238 static struct section *get_section_by_name(const char *segname,
239 const char *sectname)
241 struct section *s;
243 for (s = sects; s != NULL; s = s->next)
244 if (!strcmp(s->segname, segname) && !strcmp(s->sectname, sectname))
245 break;
247 return s;
250 static struct section *get_section_by_index(const int32_t index)
252 struct section *s;
254 for (s = sects; s != NULL; s = s->next)
255 if (index == s->index)
256 break;
258 return s;
261 static int32_t get_section_index_by_name(const char *segname,
262 const char *sectname)
264 struct section *s;
266 for (s = sects; s != NULL; s = s->next)
267 if (!strcmp(s->segname, segname) && !strcmp(s->sectname, sectname))
268 return s->index;
270 return -1;
273 static char *get_section_name_by_index(const int32_t index)
275 struct section *s;
277 for (s = sects; s != NULL; s = s->next)
278 if (index == s->index)
279 return s->sectname;
281 return NULL;
284 static uint8_t get_section_fileindex_by_index(const int32_t index)
286 struct section *s;
287 uint8_t i = 1;
289 for (s = sects; s != NULL && i < MAX_SECT; s = s->next, ++i)
290 if (index == s->index)
291 return i;
293 if (i == MAX_SECT)
294 nasm_error(ERR_WARNING,
295 "too many sections (>255) - clipped by fileindex");
297 return NO_SECT;
300 static struct symbol *get_closest_section_symbol_by_offset(uint8_t fileindex, int64_t offset)
302 struct symbol *nearest = NULL;
303 struct symbol *sym;
305 for (sym = syms; sym; sym = sym->next) {
306 if ((sym->sect != NO_SECT) && (sym->sect == fileindex)) {
307 if ((int64_t)sym->value >= offset)
308 break;
309 nearest = sym;
313 if (!nearest)
314 nasm_error(ERR_FATAL, "No section for index %x offset %llx found\n",
315 fileindex, (long long)offset);
317 return nearest;
321 * Special section numbers which are used to define Mach-O special
322 * symbols, which can be used with WRT to provide PIC relocation
323 * types.
325 static int32_t macho_gotpcrel_sect;
327 static void macho_init(void)
329 char zero = 0;
331 maxbits = 64;
333 sects = NULL;
334 sectstail = &sects;
336 syms = NULL;
337 symstail = &syms;
338 nsyms = 0;
339 nlocalsym = 0;
340 nextdefsym = 0;
341 nundefsym = 0;
343 extsyms = raa_init();
344 strs = saa_init(1L);
346 /* string table starts with a zero byte - don't ask why */
347 saa_wbytes(strs, &zero, sizeof(char));
348 strslen = 1;
350 /* add special symbol for ..gotpcrel */
351 macho_gotpcrel_sect = seg_alloc();
352 macho_gotpcrel_sect++;
353 define_label("..gotpcrel", macho_gotpcrel_sect, 0L, NULL, false, false);
356 static void sect_write(struct section *sect,
357 const uint8_t *data, uint32_t len)
359 saa_wbytes(sect->data, data, len);
360 sect->size += len;
363 static int32_t add_reloc(struct section *sect, int32_t section,
364 int pcrel, int bytes, int64_t reloff)
366 struct reloc *r;
367 struct symbol *sym;
368 int32_t fi;
369 int32_t adjustment = 0;
371 /* NeXT as puts relocs in reversed order (address-wise) into the
372 ** files, so we do the same, doesn't seem to make much of a
373 ** difference either way */
374 r = nasm_malloc(sizeof(struct reloc));
375 r->next = sect->relocs;
376 sect->relocs = r;
378 /* the current end of the section will be the symbol's address for
379 ** now, might have to be fixed by macho_fixup_relocs() later on. make
380 ** sure we don't make the symbol scattered by setting the highest
381 ** bit by accident */
382 r->addr = sect->size & ~R_SCATTERED;
383 r->ext = 1;
384 r->pcrel = (pcrel ? 1 : 0);
386 /* match byte count 1, 2, 4, 8 to length codes 0, 1, 2, 3 respectively */
387 switch(bytes){
388 case 1:
389 r->length = 0;
390 break;
391 case 2:
392 r->length = 1;
393 break;
394 case 4:
395 r->length = 2;
396 break;
397 case 8:
398 r->length = 3;
399 break;
400 default:
401 break;
404 /* set default relocation values */
405 r->type = 0; // X86_64_RELOC_UNSIGNED
406 r->snum = R_ABS; // Absolute Symbol (indicates no relocation)
408 /* absolute relocation */
409 if (pcrel == 0) {
411 /* intra-section */
412 if (section == NO_SEG) {
413 // r->snum = R_ABS; // Set above
415 /* inter-section */
416 } else {
417 fi = get_section_fileindex_by_index(section);
419 /* external */
420 if (fi == NO_SECT) {
421 r->snum = raa_read(extsyms, section);
423 /* local */
424 } else {
425 sym = get_closest_section_symbol_by_offset(fi, reloff);
426 r->snum = sym->initial_snum;
427 adjustment = sym->value;
431 /* relative relocation */
432 } else if (pcrel == 1) {
434 /* intra-section */
435 if (section == NO_SEG) {
436 r->type = 1; // X86_64_RELOC_SIGNED
438 /* inter-section */
439 } else {
440 r->type = 2; // X86_64_RELOC_BRANCH
441 fi = get_section_fileindex_by_index(section);
443 /* external */
444 if (fi == NO_SECT) {
445 sect->extreloc = 1;
446 r->snum = raa_read(extsyms, section);
448 /* local */
449 } else {
450 sym = get_closest_section_symbol_by_offset(fi, reloff);
451 r->snum = sym->initial_snum;
452 adjustment = sym->value;
456 /* subtractor */
457 } else if (pcrel == 2) {
458 r->pcrel = 0;
459 r->type = 5; // X86_64_RELOC_SUBTRACTOR
461 /* gotpcrel */
462 } else if (pcrel == 3) {
463 r->type = 4; // X86_64_RELOC_GOT
464 r->snum = macho_gotpcrel_sect;
466 /* gotpcrel MOVQ load */
467 } else if (pcrel == 4) {
468 r->type = 3; // X86_64_RELOC_GOT_LOAD
469 r->snum = macho_gotpcrel_sect;
472 ++sect->nreloc;
474 return adjustment;
477 static void macho_output(int32_t secto, const void *data,
478 enum out_type type, uint64_t size,
479 int32_t section, int32_t wrt)
481 struct section *s, *sbss;
482 int64_t addr;
483 uint8_t mydata[16], *p, gotload;
485 if (secto == NO_SEG) {
486 if (type != OUT_RESERVE)
487 nasm_error(ERR_NONFATAL, "attempt to assemble code in "
488 "[ABSOLUTE] space");
490 return;
493 s = get_section_by_index(secto);
495 if (s == NULL) {
496 nasm_error(ERR_WARNING, "attempt to assemble code in"
497 " section %d: defaulting to `.text'", secto);
498 s = get_section_by_name("__TEXT", "__text");
500 /* should never happen */
501 if (s == NULL)
502 nasm_error(ERR_PANIC, "text section not found");
505 sbss = get_section_by_name("__DATA", "__bss");
507 if (s == sbss && type != OUT_RESERVE) {
508 nasm_error(ERR_WARNING, "attempt to initialize memory in the"
509 " BSS section: ignored");
510 s->size += realsize(type, size);
511 return;
514 switch (type) {
515 case OUT_RESERVE:
516 if (s != sbss) {
517 nasm_error(ERR_WARNING, "uninitialized space declared in"
518 " %s section: zeroing",
519 get_section_name_by_index(secto));
521 sect_write(s, NULL, size);
522 } else
523 s->size += size;
525 break;
527 case OUT_RAWDATA:
528 if (section != NO_SEG)
529 nasm_error(ERR_PANIC, "OUT_RAWDATA with other than NO_SEG");
531 sect_write(s, data, size);
532 break;
534 case OUT_ADDRESS:
536 int asize = abs(size);
538 addr = *(int64_t *)data;
539 if (section != NO_SEG) {
540 if (section % 2) {
541 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
542 " section base references");
543 } else {
544 if (wrt == NO_SEG) {
545 if (asize < 8) {
546 nasm_error(ERR_NONFATAL, "Mach-O 64-bit format does not support"
547 " 32-bit absolute addresses");
549 Seemingly, Mach-O's X86_64_RELOC_SUBTRACTOR would require
550 pre-determined knowledge of where the image base would be,
551 making it impractical for use in intermediate object files
553 } else {
554 addr -= add_reloc(s, section, 0, asize, addr); // X86_64_RELOC_UNSIGNED
556 } else {
557 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
558 " this use of WRT");
563 p = mydata;
564 WRITEADDR(p, addr, asize);
565 sect_write(s, mydata, asize);
566 break;
569 case OUT_REL2ADR:
570 p = mydata;
571 WRITESHORT(p, *(int64_t *)data);
573 if (section == secto)
574 nasm_error(ERR_PANIC, "intra-section OUT_REL2ADR");
576 if (section == NO_SEG) {
577 /* Do nothing */
578 } else if (section % 2) {
579 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
580 " section base references");
581 } else {
582 nasm_error(ERR_NONFATAL, "Unsupported non-32-bit"
583 " Macho-O relocation [2]");
586 sect_write(s, mydata, 2L);
587 break;
589 case OUT_REL4ADR:
590 p = mydata;
591 WRITELONG(p, *(int64_t *)data + 4 - size);
593 if (section == secto)
594 nasm_error(ERR_PANIC, "intra-section OUT_REL4ADR");
596 if (section != NO_SEG && section % 2) {
597 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
598 " section base references");
599 } else {
600 if (wrt == NO_SEG) {
601 *mydata -= add_reloc(s, section, 1, 4, *(int64_t *)mydata); // X86_64_RELOC_SIGNED/BRANCH
602 } else if (wrt == macho_gotpcrel_sect) {
603 if (s->data->datalen > 1) {
604 saa_fread(s->data, s->data->datalen-2, &gotload, 1); // Retrieve Instruction Opcode
605 } else {
606 gotload = 0;
608 if (gotload == 0x8B) { // Check for MOVQ Opcode
609 *mydata -= add_reloc(s, section, 4, 4, *(int64_t *)mydata); // X86_64_GOT_LOAD (MOVQ load)
610 } else {
611 *mydata -= add_reloc(s, section, 3, 4, *(int64_t *)mydata); // X86_64_GOT
613 } else {
614 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
615 " this use of WRT");
616 wrt = NO_SEG; /* we can at least _try_ to continue */
620 sect_write(s, mydata, 4L);
621 break;
623 default:
624 nasm_error(ERR_PANIC, "unknown output type?");
625 break;
629 static int32_t macho_section(char *name, int pass, int *bits)
631 int32_t index, originalIndex;
632 char *sectionAttributes;
633 struct sectmap *sm;
634 struct section *s;
636 (void)pass;
638 /* Default to 64 bits. */
639 if (!name) {
640 *bits = 64;
641 name = ".text";
642 sectionAttributes = NULL;
643 } else {
644 sectionAttributes = name;
645 name = nasm_strsep(&sectionAttributes, " \t");
648 for (sm = sectmap; sm->nasmsect != NULL; ++sm) {
649 /* make lookup into section name translation table */
650 if (!strcmp(name, sm->nasmsect)) {
651 char *currentAttribute;
653 /* try to find section with that name */
654 originalIndex = index = get_section_index_by_name(sm->segname,
655 sm->sectname);
657 /* create it if it doesn't exist yet */
658 if (index == -1) {
659 s = *sectstail = nasm_malloc(sizeof(struct section));
660 s->next = NULL;
661 sectstail = &s->next;
663 s->data = saa_init(1L);
664 s->index = seg_alloc();
665 s->relocs = NULL;
666 s->align = -1;
668 xstrncpy(s->segname, sm->segname);
669 xstrncpy(s->sectname, sm->sectname);
670 s->size = 0;
671 s->nreloc = 0;
672 s->flags = sm->flags;
674 index = s->index;
675 } else {
676 s = get_section_by_index(index);
679 while ((NULL != sectionAttributes)
680 && (currentAttribute = nasm_strsep(&sectionAttributes, " \t"))) {
681 if (0 != *currentAttribute) {
682 if (!nasm_strnicmp("align=", currentAttribute, 6)) {
683 char *end;
684 int newAlignment, value;
686 value = strtoul(currentAttribute + 6, (char**)&end, 0);
687 newAlignment = alignlog2_32(value);
689 if (0 != *end) {
690 nasm_error(ERR_PANIC,
691 "unknown or missing alignment value \"%s\" "
692 "specified for section \"%s\"",
693 currentAttribute + 6,
694 name);
695 return NO_SEG;
696 } else if (0 > newAlignment) {
697 nasm_error(ERR_PANIC,
698 "alignment of %d (for section \"%s\") is not "
699 "a power of two",
700 value,
701 name);
702 return NO_SEG;
705 if ((-1 != originalIndex)
706 && (s->align != newAlignment)
707 && (s->align != -1)) {
708 nasm_error(ERR_PANIC,
709 "section \"%s\" has already been specified "
710 "with alignment %d, conflicts with new "
711 "alignment of %d",
712 name,
713 (1 << s->align),
714 value);
715 return NO_SEG;
718 s->align = newAlignment;
719 } else if (!nasm_stricmp("data", currentAttribute)) {
720 /* Do nothing; 'data' is implicit */
721 } else {
722 nasm_error(ERR_PANIC,
723 "unknown section attribute %s for section %s",
724 currentAttribute,
725 name);
726 return NO_SEG;
731 return index;
735 nasm_error(ERR_PANIC, "invalid section name %s", name);
736 return NO_SEG;
739 static void macho_symdef(char *name, int32_t section, int64_t offset,
740 int is_global, char *special)
742 struct symbol *sym;
744 if (special) {
745 nasm_error(ERR_NONFATAL, "The Mach-O output format does "
746 "not support any special symbol types");
747 return;
750 if (is_global == 3) {
751 nasm_error(ERR_NONFATAL, "The Mach-O format does not "
752 "(yet) support forward reference fixups.");
753 return;
756 if (name[0] == '.' && name[1] == '.' && name[2] != '@') {
758 * This is a NASM special symbol. We never allow it into
759 * the Macho-O symbol table, even if it's a valid one. If it
760 * _isn't_ a valid one, we should barf immediately.
762 if (strcmp(name, "..gotpcrel"))
763 nasm_error(ERR_NONFATAL, "unrecognized special symbol `%s'", name);
764 return;
767 sym = *symstail = nasm_malloc(sizeof(struct symbol));
768 sym->next = NULL;
769 symstail = &sym->next;
771 sym->name = name;
772 sym->strx = strslen;
773 sym->type = 0;
774 sym->desc = 0;
775 sym->value = offset;
776 sym->initial_snum = -1;
778 /* external and common symbols get N_EXT */
779 if (is_global != 0) {
780 sym->type |= N_EXT;
783 if (section == NO_SEG) {
784 /* symbols in no section get absolute */
785 sym->type |= N_ABS;
786 sym->sect = NO_SECT;
787 } else {
788 sym->type |= N_SECT;
790 /* get the in-file index of the section the symbol was defined in */
791 sym->sect = get_section_fileindex_by_index(section);
793 /* track the initially allocated symbol number for use in future fix-ups */
794 sym->initial_snum = nsyms;
796 if (sym->sect == NO_SECT) {
798 /* remember symbol number of references to external
799 ** symbols, this works because every external symbol gets
800 ** its own section number allocated internally by nasm and
801 ** can so be used as a key */
802 extsyms = raa_write(extsyms, section, nsyms);
804 switch (is_global) {
805 case 1:
806 case 2:
807 /* there isn't actually a difference between global
808 ** and common symbols, both even have their size in
809 ** sym->value */
810 sym->type = N_EXT;
811 break;
813 default:
814 /* give an error on unfound section if it's not an
815 ** external or common symbol (assemble_file() does a
816 ** seg_alloc() on every call for them) */
817 nasm_error(ERR_PANIC, "in-file index for section %d not found",
818 section);
822 ++nsyms;
825 static void macho_sectalign(int32_t seg, unsigned int value)
827 struct section *s;
829 list_for_each(s, sects) {
830 if (s->index == seg)
831 break;
834 if (!s || !is_power2(value))
835 return;
837 value = alignlog2_32(value);
838 if (s->align < (int)value)
839 s->align = value;
842 static int32_t macho_segbase(int32_t section)
844 return section;
847 static void macho_filename(char *inname, char *outname)
849 standard_extension(inname, outname, ".o");
852 extern macros_t macho_stdmac[];
854 /* Comparison function for qsort symbol layout. */
855 static int layout_compare (const struct symbol **s1,
856 const struct symbol **s2)
858 return (strcmp ((*s1)->name, (*s2)->name));
861 /* The native assembler does a few things in a similar function
863 * Remove temporary labels
864 * Sort symbols according to local, external, undefined (by name)
865 * Order the string table
867 We do not remove temporary labels right now.
869 numsyms is the total number of symbols we have. strtabsize is the
870 number entries in the string table. */
872 static void macho_layout_symbols (uint32_t *numsyms,
873 uint32_t *strtabsize)
875 struct symbol *sym, **symp;
876 uint32_t i,j;
878 *numsyms = 0;
879 *strtabsize = sizeof (char);
881 symp = &syms;
883 while ((sym = *symp)) {
884 /* Undefined symbols are now external. */
885 if (sym->type == N_UNDF)
886 sym->type |= N_EXT;
888 if ((sym->type & N_EXT) == 0) {
889 sym->snum = *numsyms;
890 *numsyms = *numsyms + 1;
891 nlocalsym++;
893 else {
894 if ((sym->type & N_TYPE) != N_UNDF) {
895 nextdefsym++;
896 } else {
897 nundefsym++;
900 /* If we handle debug info we'll want
901 to check for it here instead of just
902 adding the symbol to the string table. */
903 sym->strx = *strtabsize;
904 saa_wbytes (strs, sym->name, (int32_t)(strlen(sym->name) + 1));
905 *strtabsize += strlen(sym->name) + 1;
907 symp = &(sym->next);
910 /* Next, sort the symbols. Most of this code is a direct translation from
911 the Apple cctools symbol layout. We need to keep compatibility with that. */
912 /* Set the indexes for symbol groups into the symbol table */
913 ilocalsym = 0;
914 iextdefsym = nlocalsym;
915 iundefsym = nlocalsym + nextdefsym;
917 /* allocate arrays for sorting externals by name */
918 extdefsyms = nasm_malloc(nextdefsym * sizeof(struct symbol *));
919 undefsyms = nasm_malloc(nundefsym * sizeof(struct symbol *));
921 i = 0;
922 j = 0;
924 symp = &syms;
926 while ((sym = *symp)) {
928 if((sym->type & N_EXT) == 0) {
929 sym->strx = *strtabsize;
930 saa_wbytes (strs, sym->name, (int32_t)(strlen (sym->name) + 1));
931 *strtabsize += strlen(sym->name) + 1;
933 else {
934 if((sym->type & N_TYPE) != N_UNDF) {
935 extdefsyms[i++] = sym;
936 } else {
937 undefsyms[j++] = sym;
940 symp = &(sym->next);
943 qsort(extdefsyms, nextdefsym, sizeof(struct symbol *),
944 (int (*)(const void *, const void *))layout_compare);
945 qsort(undefsyms, nundefsym, sizeof(struct symbol *),
946 (int (*)(const void *, const void *))layout_compare);
948 for(i = 0; i < nextdefsym; i++) {
949 extdefsyms[i]->snum = *numsyms;
950 *numsyms += 1;
952 for(j = 0; j < nundefsym; j++) {
953 undefsyms[j]->snum = *numsyms;
954 *numsyms += 1;
958 /* Calculate some values we'll need for writing later. */
960 static void macho_calculate_sizes (void)
962 struct section *s;
964 /* count sections and calculate in-memory and in-file offsets */
965 for (s = sects; s != NULL; s = s->next) {
966 uint64_t pad = 0;
968 /* zerofill sections aren't actually written to the file */
969 if ((s->flags & SECTION_TYPE) != S_ZEROFILL)
970 seg_filesize64 += s->size;
972 /* recalculate segment address based on alignment and vm size */
973 s->addr = seg_vmsize64;
974 /* we need section alignment to calculate final section address */
975 if (s->align == -1)
976 s->align = DEFAULT_SECTION_ALIGNMENT;
977 if(s->align) {
978 uint64_t newaddr = ALIGN(s->addr, 1 << s->align);
979 pad = newaddr - s->addr;
980 s->addr = newaddr;
983 seg_vmsize64 += s->size + pad;
984 ++seg_nsects64;
987 /* calculate size of all headers, load commands and sections to
988 ** get a pointer to the start of all the raw data */
989 if (seg_nsects64 > 0) {
990 ++head_ncmds64;
991 head_sizeofcmds64 +=
992 MACHO_SEGCMD64_SIZE + seg_nsects64 * MACHO_SECTCMD64_SIZE;
995 if (nsyms > 0) {
996 ++head_ncmds64;
997 head_sizeofcmds64 += MACHO_SYMCMD_SIZE;
1001 /* Write out the header information for the file. */
1003 static void macho_write_header (void)
1005 fwriteint32_t(MH_MAGIC_64, ofile); /* magic */
1006 fwriteint32_t(CPU_TYPE_X86_64, ofile); /* CPU type */
1007 fwriteint32_t(CPU_SUBTYPE_I386_ALL, ofile); /* CPU subtype */
1008 fwriteint32_t(MH_OBJECT, ofile); /* Mach-O file type */
1009 fwriteint32_t(head_ncmds64, ofile); /* number of load commands */
1010 fwriteint32_t(head_sizeofcmds64, ofile); /* size of load commands */
1011 fwriteint32_t(0, ofile); /* no flags */
1012 fwriteint32_t(0, ofile); /* reserved for future use */
1015 /* Write out the segment load command at offset. */
1017 static uint32_t macho_write_segment (uint64_t offset)
1019 uint64_t rel_base = alignint64_t (offset + seg_filesize64);
1020 uint32_t s_reloff = 0;
1021 struct section *s;
1023 fwriteint32_t(LC_SEGMENT_64, ofile); /* cmd == LC_SEGMENT_64 */
1025 /* size of load command including section load commands */
1026 fwriteint32_t(MACHO_SEGCMD64_SIZE + seg_nsects64 *
1027 MACHO_SECTCMD64_SIZE, ofile);
1029 /* in an MH_OBJECT file all sections are in one unnamed (name
1030 ** all zeros) segment */
1031 fwritezero(16, ofile);
1032 fwriteint64_t(0, ofile); /* in-memory offset */
1033 fwriteint64_t(seg_vmsize64, ofile); /* in-memory size */
1034 fwriteint64_t(offset, ofile); /* in-file offset to data */
1035 fwriteint64_t(seg_filesize64, ofile); /* in-file size */
1036 fwriteint32_t(VM_PROT_DEFAULT, ofile); /* maximum vm protection */
1037 fwriteint32_t(VM_PROT_DEFAULT, ofile); /* initial vm protection */
1038 fwriteint32_t(seg_nsects64, ofile); /* number of sections */
1039 fwriteint32_t(0, ofile); /* no flags */
1041 /* emit section headers */
1042 for (s = sects; s != NULL; s = s->next) {
1043 nasm_write(s->sectname, sizeof(s->sectname), ofile);
1044 nasm_write(s->segname, sizeof(s->segname), ofile);
1045 fwriteint64_t(s->addr, ofile);
1046 fwriteint64_t(s->size, ofile);
1048 /* dummy data for zerofill sections or proper values */
1049 if ((s->flags & SECTION_TYPE) != S_ZEROFILL) {
1050 fwriteint32_t(offset, ofile);
1051 /* Write out section alignment, as a power of two.
1052 e.g. 32-bit word alignment would be 2 (2^2 = 4). */
1053 if (s->align == -1)
1054 s->align = DEFAULT_SECTION_ALIGNMENT;
1055 fwriteint32_t(s->align, ofile);
1056 /* To be compatible with cctools as we emit
1057 a zero reloff if we have no relocations. */
1058 fwriteint32_t(s->nreloc ? rel_base + s_reloff : 0, ofile);
1059 fwriteint32_t(s->nreloc, ofile);
1061 offset += s->size;
1062 s_reloff += s->nreloc * MACHO_RELINFO64_SIZE;
1063 } else {
1064 fwriteint32_t(0, ofile);
1065 fwriteint32_t(0, ofile);
1066 fwriteint32_t(0, ofile);
1067 fwriteint32_t(0, ofile);
1070 if (s->nreloc) {
1071 s->flags |= S_ATTR_LOC_RELOC;
1072 if (s->extreloc)
1073 s->flags |= S_ATTR_EXT_RELOC;
1076 fwriteint32_t(s->flags, ofile); /* flags */
1077 fwriteint32_t(0, ofile); /* reserved */
1078 fwriteint32_t(0, ofile); /* reserved */
1080 fwriteint32_t(0, ofile); /* align */
1083 rel_padcnt64 = rel_base - offset;
1084 offset = rel_base + s_reloff;
1086 return offset;
1089 /* For a given chain of relocs r, write out the entire relocation
1090 chain to the object file. */
1092 static void macho_write_relocs (struct reloc *r)
1094 while (r) {
1095 uint32_t word2;
1097 fwriteint32_t(r->addr, ofile); /* reloc offset */
1099 word2 = r->snum;
1100 word2 |= r->pcrel << 24;
1101 word2 |= r->length << 25;
1102 word2 |= r->ext << 27;
1103 word2 |= r->type << 28;
1104 fwriteint32_t(word2, ofile); /* reloc data */
1105 r = r->next;
1109 /* Write out the section data. */
1110 static void macho_write_section (void)
1112 struct section *s, *s2;
1113 struct reloc *r;
1114 uint8_t fi, *p, *q, blk[8];
1115 int32_t len;
1116 int64_t l;
1118 for (s = sects; s != NULL; s = s->next) {
1119 if ((s->flags & SECTION_TYPE) == S_ZEROFILL)
1120 continue;
1122 /* no padding needs to be done to the sections */
1124 /* Like a.out Mach-O references things in the data or bss
1125 * sections by addresses which are actually relative to the
1126 * start of the _text_ section, in the _file_. See outaout.c
1127 * for more information. */
1128 saa_rewind(s->data);
1129 for (r = s->relocs; r != NULL; r = r->next) {
1130 len = (int32_t)r->length << 1;
1131 if(len > 4) len = 8;
1132 saa_fread(s->data, r->addr, blk, len);
1133 p = q = blk;
1134 l = *p++;
1136 /* get offset based on relocation type */
1137 if (r->length > 0) {
1138 l += ((int64_t)*p++) << 8;
1140 if (r->length > 1) {
1141 l += ((int64_t)*p++) << 16;
1142 l += ((int64_t)*p++) << 24;
1145 if (r->length > 2) {
1146 l += ((int64_t)*p++) << 32;
1147 l += ((int64_t)*p++) << 40;
1148 l += ((int64_t)*p++) << 48;
1149 l += ((int64_t)*p++) << 56;
1155 /* If the relocation is internal add to the current section
1156 offset. Otherwise the only value we need is the symbol
1157 offset which we already have. The linker takes care
1158 of the rest of the address. */
1159 if (!r->ext) {
1160 /* generate final address by section address and offset */
1161 for (s2 = sects, fi = 1;
1162 s2 != NULL; s2 = s2->next, fi++) {
1163 if (fi == r->snum) {
1164 l += s2->addr;
1165 break;
1170 /* write new offset back */
1171 if (r->length == 3)
1172 WRITEDLONG(q, l);
1173 else if (r->length == 2)
1174 WRITELONG(q, l);
1175 else if (r->length == 1)
1176 WRITESHORT(q, l);
1177 else
1178 *q++ = l & 0xFF;
1180 saa_fwrite(s->data, r->addr, blk, len);
1183 /* dump the section data to file */
1184 saa_fpwrite(s->data, ofile);
1187 /* pad last section up to reloc entries on int64_t boundary */
1188 fwritezero(rel_padcnt64, ofile);
1190 /* emit relocation entries */
1191 for (s = sects; s != NULL; s = s->next)
1192 macho_write_relocs (s->relocs);
1195 /* Write out the symbol table. We should already have sorted this
1196 before now. */
1197 static void macho_write_symtab (void)
1199 struct symbol *sym;
1200 struct section *s;
1201 int64_t fi;
1202 uint64_t i;
1204 /* we don't need to pad here since MACHO_RELINFO_SIZE == 8 */
1206 for (sym = syms; sym != NULL; sym = sym->next) {
1207 if ((sym->type & N_EXT) == 0) {
1208 fwriteint32_t(sym->strx, ofile); /* string table entry number */
1209 nasm_write(&sym->type, 1, ofile); /* symbol type */
1210 nasm_write(&sym->sect, 1, ofile); /* section */
1211 fwriteint16_t(sym->desc, ofile); /* description */
1213 /* Fix up the symbol value now that we know the final section
1214 sizes. */
1215 if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
1216 for (s = sects, fi = 1; s != NULL; s = s->next, fi++) {
1217 if (fi == sym->sect) {
1218 sym->value += s->addr;
1219 break;
1224 fwriteint64_t(sym->value, ofile); /* value (i.e. offset) */
1228 for (i = 0; i < nextdefsym; i++) {
1229 sym = extdefsyms[i];
1230 fwriteint32_t(sym->strx, ofile);
1231 nasm_write(&sym->type, 1, ofile); /* symbol type */
1232 nasm_write(&sym->sect, 1, ofile); /* section */
1233 fwriteint16_t(sym->desc, ofile); /* description */
1235 /* Fix up the symbol value now that we know the final section
1236 sizes. */
1237 if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
1238 for (s = sects, fi = 1;
1239 s != NULL && fi < sym->sect; s = s->next, ++fi)
1240 sym->value += s->size;
1243 fwriteint64_t(sym->value, ofile); /* value (i.e. offset) */
1246 for (i = 0; i < nundefsym; i++) {
1247 sym = undefsyms[i];
1248 fwriteint32_t(sym->strx, ofile);
1249 nasm_write(&sym->type, 1, ofile); /* symbol type */
1250 nasm_write(&sym->sect, 1, ofile); /* section */
1251 fwriteint16_t(sym->desc, ofile); /* description */
1253 // Fix up the symbol value now that we know the final section sizes.
1254 if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
1255 for (s = sects, fi = 1;
1256 s != NULL && fi < sym->sect; s = s->next, ++fi)
1257 sym->value += s->size;
1260 fwriteint64_t(sym->value, ofile); // value (i.e. offset)
1265 /* Fixup the snum in the relocation entries, we should be
1266 doing this only for externally referenced symbols. */
1267 static void macho_fixup_relocs (struct reloc *r)
1269 struct symbol *sym;
1271 while (r != NULL) {
1272 if (r->ext) {
1273 for (sym = syms; sym != NULL; sym = sym->next) {
1274 if (sym->initial_snum == r->snum) {
1275 r->snum = sym->snum;
1276 break;
1280 r = r->next;
1284 /* Write out the object file. */
1286 static void macho_write (void)
1288 uint64_t offset = 0;
1290 /* mach-o object file structure:
1292 ** mach header
1293 ** uint32_t magic
1294 ** int cpu type
1295 ** int cpu subtype
1296 ** uint32_t mach file type
1297 ** uint32_t number of load commands
1298 ** uint32_t size of all load commands
1299 ** (includes section struct size of segment command)
1300 ** uint32_t flags
1302 ** segment command
1303 ** uint32_t command type == LC_SEGMENT_64
1304 ** uint32_t size of load command
1305 ** (including section load commands)
1306 ** char[16] segment name
1307 ** uint64_t in-memory offset
1308 ** uint64_t in-memory size
1309 ** uint64_t in-file offset to data area
1310 ** uint64_t in-file size
1311 ** (in-memory size excluding zerofill sections)
1312 ** int maximum vm protection
1313 ** int initial vm protection
1314 ** uint32_t number of sections
1315 ** uint32_t flags
1317 ** section commands
1318 ** char[16] section name
1319 ** char[16] segment name
1320 ** uint64_t in-memory offset
1321 ** uint64_t in-memory size
1322 ** uint32_t in-file offset
1323 ** uint32_t alignment
1324 ** (irrelevant in MH_OBJECT)
1325 ** uint32_t in-file offset of relocation entires
1326 ** uint32_t number of relocations
1327 ** uint32_t flags
1328 ** uint32_t reserved
1329 ** uint32_t reserved
1331 ** symbol table command
1332 ** uint32_t command type == LC_SYMTAB
1333 ** uint32_t size of load command
1334 ** uint32_t symbol table offset
1335 ** uint32_t number of symbol table entries
1336 ** uint32_t string table offset
1337 ** uint32_t string table size
1339 ** raw section data
1341 ** padding to int64_t boundary
1343 ** relocation data (struct reloc)
1344 ** int32_t offset
1345 ** uint data (symbolnum, pcrel, length, extern, type)
1347 ** symbol table data (struct nlist)
1348 ** int32_t string table entry number
1349 ** uint8_t type
1350 ** (extern, absolute, defined in section)
1351 ** uint8_t section
1352 ** (0 for global symbols, section number of definition (>= 1, <=
1353 ** 254) for local symbols, size of variable for common symbols
1354 ** [type == extern])
1355 ** int16_t description
1356 ** (for stab debugging format)
1357 ** uint64_t value (i.e. file offset) of symbol or stab offset
1359 ** string table data
1360 ** list of null-terminated strings
1363 /* Emit the Mach-O header. */
1364 macho_write_header();
1366 offset = MACHO_HEADER64_SIZE + head_sizeofcmds64;
1368 /* emit the segment load command */
1369 if (seg_nsects64 > 0)
1370 offset = macho_write_segment (offset);
1371 else
1372 nasm_error(ERR_WARNING, "no sections?");
1374 if (nsyms > 0) {
1375 /* write out symbol command */
1376 fwriteint32_t(LC_SYMTAB, ofile); /* cmd == LC_SYMTAB */
1377 fwriteint32_t(MACHO_SYMCMD_SIZE, ofile); /* size of load command */
1378 fwriteint32_t(offset, ofile); /* symbol table offset */
1379 fwriteint32_t(nsyms, ofile); /* number of symbol
1380 ** table entries */
1382 offset += nsyms * MACHO_NLIST64_SIZE;
1383 fwriteint32_t(offset, ofile); /* string table offset */
1384 fwriteint32_t(strslen, ofile); /* string table size */
1387 /* emit section data */
1388 if (seg_nsects64 > 0)
1389 macho_write_section ();
1391 /* emit symbol table if we have symbols */
1392 if (nsyms > 0)
1393 macho_write_symtab ();
1395 /* we don't need to pad here since MACHO_NLIST64_SIZE == 16 */
1397 /* emit string table */
1398 saa_fpwrite(strs, ofile);
1400 /* We do quite a bit here, starting with finalizing all of the data
1401 for the object file, writing, and then freeing all of the data from
1402 the file. */
1404 static void macho_cleanup(int debuginfo)
1406 struct section *s;
1407 struct reloc *r;
1408 struct symbol *sym;
1410 (void)debuginfo;
1412 /* Sort all symbols. */
1413 macho_layout_symbols (&nsyms, &strslen);
1415 /* Fixup relocation entries */
1416 for (s = sects; s != NULL; s = s->next) {
1417 macho_fixup_relocs (s->relocs);
1420 /* First calculate and finalize needed values. */
1421 macho_calculate_sizes();
1422 macho_write();
1424 /* free up everything */
1425 while (sects->next) {
1426 s = sects;
1427 sects = sects->next;
1429 saa_free(s->data);
1430 while (s->relocs != NULL) {
1431 r = s->relocs;
1432 s->relocs = s->relocs->next;
1433 nasm_free(r);
1436 nasm_free(s);
1439 saa_free(strs);
1440 raa_free(extsyms);
1442 if (syms) {
1443 while (syms->next) {
1444 sym = syms;
1445 syms = syms->next;
1447 nasm_free (sym);
1452 /* Debugging routines. */
1453 static void debug_reloc (struct reloc *r)
1455 fprintf (stdout, "reloc:\n");
1456 fprintf (stdout, "\taddr: %"PRId32"\n", r->addr);
1457 fprintf (stdout, "\tsnum: %d\n", r->snum);
1458 fprintf (stdout, "\tpcrel: %d\n", r->pcrel);
1459 fprintf (stdout, "\tlength: %d\n", r->length);
1460 fprintf (stdout, "\text: %d\n", r->ext);
1461 fprintf (stdout, "\ttype: %d\n", r->type);
1464 static void debug_section_relocs (struct section *s)
1466 struct reloc *r = s->relocs;
1468 fprintf (stdout, "relocs for section %s:\n\n", s->sectname);
1470 while (r != NULL) {
1471 debug_reloc (r);
1472 r = r->next;
1476 struct ofmt of_macho64 = {
1477 "NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (x86_64) object files",
1478 "macho64",
1480 null_debug_arr,
1481 &null_debug_form,
1482 macho_stdmac,
1483 macho_init,
1484 null_setinfo,
1485 macho_output,
1486 macho_symdef,
1487 macho_section,
1488 macho_sectalign,
1489 macho_segbase,
1490 null_directive,
1491 macho_filename,
1492 macho_cleanup
1495 #endif
1498 * Local Variables:
1499 * mode:c
1500 * c-basic-offset:4
1501 * End:
1503 * end of file */