2 * $Id: codegen.c,v 1.10 2007/08/12 18:58:12 khansen Exp $
4 * Revision 1.10 2007/08/12 18:58:12 khansen
5 * ability to generate pure 6502 binary (--pure-binary switch)
7 * Revision 1.9 2007/08/09 20:33:48 khansen
10 * Revision 1.8 2007/08/07 22:42:17 khansen
11 * make sure a CMD_FILE command is output at the start of each segment
13 * Revision 1.7 2007/08/07 21:23:24 khansen
16 * Revision 1.6 2007/07/22 13:33:26 khansen
17 * convert tabs to whitespaces
19 * Revision 1.5 2004/12/19 19:58:37 kenth
22 * Revision 1.4 2004/12/18 16:58:52 kenth
23 * outputs file and line info if --debug
24 * CMD_DSW, CMD_DSD gone
26 * Revision 1.3 2004/12/14 01:49:14 kenth
29 * Revision 1.2 2004/12/06 04:52:36 kenth
32 * Revision 1.1 2004/06/30 07:55:38 kenth
38 * (C) 2004 Kent Hansen
40 * The XORcyst is free software; you can redistribute it and/or modify
41 * it under the terms of the GNU General Public License as published by
42 * the Free Software Foundation; either version 2 of the License, or
43 * (at your option) any later version.
45 * The XORcyst is distributed in the hope that it will be useful,
46 * but WITHOUT ANY WARRANTY; without even the implied warranty of
47 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
48 * GNU General Public License for more details.
50 * You should have received a copy of the GNU General Public License
51 * along with The XORcyst; if not, write to the Free Software
52 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
56 * This file contains functions for generating object code from an abstract
58 * The output of the assembler is not pure 6502 binary code. It is a file in
59 * a custom object format meant to be fed to the XORcyst linker. The object
60 * file contains information about the symbols it exports (that others may
61 * access), the symbols it imports (the linker will look for these in other
62 * units and resolve their values), and of course the 6502 instructions.
63 * Labels, data and instructions are encoded in a special bytecode language.
64 * All operands to instructions and directives which aren't constants are
65 * stored as expressions. (It is the linker's job to parse the expressions,
66 * compute their values and produce the actual 6502 binary.)
68 * The format of resulting file is:
69 * 1 word: magic number
71 * 1 word: number of exported constants
72 * ? bytes: exported constants descriptors
73 * 1 byte: number of units explicitly imported from
74 * ? bytes: names of units explicitly imported from
75 * 1 word: number of imported symbols (externals)
76 * ? bytes: imported symbols descriptors
77 * 24-bit word: size of data segment
78 * ? bytes: data segment bytecodes
79 * 24-bit word: size of code segment
80 * ? bytes: code segment bytecodes
81 * 1 word: number of expressions
82 * ? bytes: expression descriptors
95 /*---------------------------------------------------------------------------*/
97 /* Convenient macros to write integers in big-endian form. */
98 #define put_1(f, b) { fputc((unsigned char)(b), f); }
99 #define put_2(f, w) { put_1(f, (w) >> 8); put_1(f, w); }
100 #define put_3(f, t) { put_2(f, (t) >> 8); put_1(f, t); }
101 #define put_4(f, q) { put_2(f, (q) >> 16); put_2(f, q); }
103 /* Convenient macros to write length-prepended strings. */
104 #define put_str_8(f, s) { put_1(f, strlen(s)-1); fprintf(f, "%s", s); }
105 #define put_str_16(f, s) { put_2(f, strlen(s)-1); fprintf(f, "%s", s); }
106 #define put_str(f, s) { if (strlen(s) <= 0x100) { put_str_8(f, s); } else { put_str_16(f, s); } }
109 * Writes a sequence of bytes to file.
110 * @param f File handle
111 * @param bytes Array of bytes
112 * @param count Number of bytes
114 static void put_bytes(FILE *f
, const unsigned char *bytes
, int count
)
117 /* Write the bytes */
118 for (i
=0; i
<count
; i
++) {
123 /*---------------------------------------------------------------------------*/
125 #define BUF_MAX_SIZE 0x10000
126 /** Buffer used to collect binary data generated from processing consecutive code statements. */
127 static unsigned char buf
[BUF_MAX_SIZE
];
128 /** Current position in buffer */
129 static int buf_pos
= 0;
131 static FILE *buf_file
= NULL
;
133 #define buf_reset() { buf_pos = 0; }
136 * Flushes contents of <code>buf</code> to file.
137 * @param f File handle
139 static void buf_flush(FILE *f
)
142 if (buf_pos
<= 0x100) {
143 /* Use 8-bit form. */
144 put_1(f
, XASM_CMD_BIN8
);
145 put_1(f
, buf_pos
- 1);
146 put_bytes(f
, buf
, buf_pos
);
148 else if (buf_pos
<= 0x10000) {
149 /* Use 16-bit form. */
150 put_1(f
, XASM_CMD_BIN16
);
151 put_2(f
, buf_pos
- 1);
152 put_bytes(f
, buf
, buf_pos
);
155 /* Error, buffer overflow. */
156 fprintf(stderr
, "buf_flush: buffer overflow\n");
162 /** Appends single byte to buffer. */
163 #define buf_append(b) { if (buf_pos >= BUF_MAX_SIZE) buf_flush(buf_file); buf[buf_pos++] = (unsigned char)(b); }
165 /** Appends n bytes to buffer. */
166 static void buf_append_n(unsigned char *data
, int size
)
169 for (i
=0; i
<size
; i
++) {
174 /*---------------------------------------------------------------------------*/
177 * Converts an arithmetic operator (from ARITHMETIC_NODE) into a byte value.
178 * The bytecode language uses a different operator encoding which is why
179 * we need this function.
180 * @param op Operator to convert
181 * @return Bytecode-equivalent operator
183 static unsigned char operator_to_bytecode(arithmetic_operator op
)
186 case PLUS_OPERATOR
: return XASM_OP_PLUS
;
187 case MINUS_OPERATOR
: return XASM_OP_MINUS
;
188 case MUL_OPERATOR
: return XASM_OP_MUL
;
189 case DIV_OPERATOR
: return XASM_OP_DIV
;
190 case MOD_OPERATOR
: return XASM_OP_MOD
;
191 case AND_OPERATOR
: return XASM_OP_AND
;
192 case OR_OPERATOR
: return XASM_OP_OR
;
193 case XOR_OPERATOR
: return XASM_OP_XOR
;
194 case SHL_OPERATOR
: return XASM_OP_SHL
;
195 case SHR_OPERATOR
: return XASM_OP_SHR
;
196 case LT_OPERATOR
: return XASM_OP_LT
;
197 case GT_OPERATOR
: return XASM_OP_GT
;
198 case EQ_OPERATOR
: return XASM_OP_EQ
;
199 case NE_OPERATOR
: return XASM_OP_NE
;
200 case LE_OPERATOR
: return XASM_OP_LE
;
201 case GE_OPERATOR
: return XASM_OP_GE
;
202 case NEG_OPERATOR
: return XASM_OP_NEG
;
203 case NOT_OPERATOR
: return XASM_OP_NOT
;
204 case LO_OPERATOR
: return XASM_OP_LO
;
205 case HI_OPERATOR
: return XASM_OP_HI
;
206 case UMINUS_OPERATOR
: return XASM_OP_UMINUS
;
207 case BANK_OPERATOR
: return XASM_OP_BANK
;
210 fprintf(stderr
, "operator_to_bytecode(): bad operator\n");
215 /*---------------------------------------------------------------------------*/
217 /* Maximum number of expressions that the code generator can handle. */
218 #define MAX_EXPRS 16384
220 /* List of expressions involved in code statements. */
221 static astnode
*expr_list
[MAX_EXPRS
];
222 /* Number of expressions generated so far. */
223 static int expr_count
= 0;
226 * Registers an expression with the code generator.
227 * Unique 16-bit ID of the expression is returned, which may then be used to refer
228 * to the expression in bytecodes.
229 * @param expr Expression to register
230 * @return Unique ID of expression
232 static int register_expression(astnode
*expr
)
235 /* See if an equivalent expression already is registered */
236 for (i
=0; i
<expr_count
; i
++) {
237 if (astnode_equal(expr
, expr_list
[i
])) {
238 /* Return ID of equivalent expression. */
242 /* This is a new expression, add it to end of list if possible. */
243 if (expr_count
== MAX_EXPRS
) {
244 fprintf(stderr
, "register_expression(): buffer overflow\n");
248 expr_list
[expr_count
++] = expr
;
254 * Serializes an expression recursively.
255 * The result of serializing a leaf node is always (type, value).
256 * The result of serializing an operator node is always (operator, lhs, rhs).
257 * @param fp File handle
258 * @param expr Expression to serialize
260 static void put_expr_recursive(FILE *fp
, const astnode
*expr
)
265 if (expr
== NULL
) { return; }
266 // printf("put_expr_recursive(%s)\n", astnode_type_to_string(expr->type) );
267 switch (astnode_get_type(expr
)) {
269 v
= (unsigned long)expr
->integer
;
271 /* Write type */ put_1(fp
, XASM_INT_8
);
272 /* Write value */ put_1(fp
, v
);
274 else if (v
< 0x10000) {
275 /* Write type */ put_1(fp
, XASM_INT_16
);
276 /* Write value */ put_2(fp
, v
);
278 else if (v
< 0x1000000) {
279 /* Write type */ put_1(fp
, XASM_INT_24
);
280 /* Write value */ put_3(fp
, v
);
283 /* Write type */ put_1(fp
, XASM_INT_32
);
284 /* Write value */ put_4(fp
, v
);
290 if (strlen(s
) <= 0x100) {
291 /* Write type */ put_1(fp
, XASM_STR_8
);
292 /* Write string */ put_str_8(fp
, s
);
295 /* Write type */ put_1(fp
, XASM_STR_16
);
296 /* Write string */ put_str_16(fp
, s
);
300 case IDENTIFIER_NODE
:
302 e
= symtab_lookup(expr
->ident
);
304 put_1(fp
, (e
->flags
& EXTRN_FLAG
) ? XASM_EXTRN
: XASM_LOCAL
);
309 case ARITHMETIC_NODE
:
310 /* The operator goes first */
311 put_1(fp
, operator_to_bytecode(expr
->oper
) );
312 /* Then left-hand side */
313 put_expr_recursive(fp
, LHS(expr
));
314 /* Then right-hand side */
315 put_expr_recursive(fp
, RHS(expr
));
318 case CURRENT_PC_NODE
:
326 "put_expr_recursive(): unknown expression type (%s)\n",
327 astnode_type_to_string(astnode_get_type(expr
))
334 * Serializes an expression.
335 * The expression is written in postfix form so that it can be easily evaluated
336 * even as pure binary.
337 * @param fp File handle
338 * @param expr Expression to serialize
340 static void put_expr(FILE *fp
, const astnode
*expr
)
342 put_expr_recursive(fp
, expr
);
346 * Writes all expressions registered during code generation to file.
347 * @param fp File handle
349 static void put_expressions(FILE *fp
)
352 /* Write expression count */
353 put_2(fp
, expr_count
);
354 /* Write serialized expressions */
355 for (i
=0; i
<expr_count
; i
++) {
356 put_expr(fp
, expr_list
[i
]);
360 /*---------------------------------------------------------------------------*/
363 * Tests if two file location descriptors are equal.
365 static int locations_are_equal(const location
*l1
, const location
*l2
)
367 return ((l1
->first_line
== l2
->first_line
) && (strcmp(l1
->file
, l2
->file
) == 0));
370 /*---------------------------------------------------------------------------*/
373 * Writes a statement to file.
374 * @param fp File handle
377 static void put_statement(FILE *fp
, const astnode
*n
, location
*loc
)
386 static int tag
= 0; /* Used to give labels unique IDs */
388 /* See if we should embed file+line information */
389 if (xasm_args
.debug
&& !locations_are_equal(loc
, &n
->loc
) ) {
391 if (strcmp(loc
->file
, n
->loc
.file
) != 0) {
392 put_1(fp
, XASM_CMD_FILE
);
393 put_str_8(fp
, n
->loc
.file
);
395 if (loc
->first_line
!= n
->loc
.first_line
) {
396 line
= n
->loc
.first_line
;
397 if (line
== loc
->first_line
+ 1) {
398 put_1(fp
, XASM_CMD_LINE_INC
);
401 put_1(fp
, XASM_CMD_LINE8
);
404 else if (line
< 65536) {
405 put_1(fp
, XASM_CMD_LINE16
);
409 put_1(fp
, XASM_CMD_LINE24
);
417 switch (astnode_get_type(n
)) {
422 put_1(fp
, XASM_CMD_LABEL
);
423 /* Look it up in symbol table */
424 e
= symtab_lookup(n
->label
);
426 /* IMPORTANT: Tag label uniquely so we can refer to it in expressions */
428 /* Write flag byte */
430 flags
|= (e
->flags
& PUBLIC_FLAG
) ? XASM_LABEL_FLAG_EXPORT
: 0;
431 flags
|= (e
->flags
& ZEROPAGE_FLAG
) ? XASM_LABEL_FLAG_ZEROPAGE
: 0;
432 flags
|= (e
->flags
& ALIGN_FLAG
) ? XASM_LABEL_FLAG_ALIGN
: 0;
433 flags
|= (e
->flags
& ADDR_FLAG
) ? XASM_LABEL_FLAG_ADDR
: 0;
435 /* If exported, write name also */
436 if (e
->flags
& PUBLIC_FLAG
) {
437 put_str_8(fp
, e
->id
);
439 /* if alignment, write that too */
440 if (e
->flags
& ALIGN_FLAG
) {
443 /* if address, write that too */
444 if (e
->flags
& ADDR_FLAG
) {
445 put_2(fp
, e
->address
);
449 case INSTRUCTION_NODE
:
450 /* Get # of bytes such a 6502 instruction occupies */
451 len
= opcode_length(n
->instr
.opcode
);
453 /* Instruction has no operand, so append it to binary buffer. */
454 buf_append(n
->instr
.opcode
);
457 /* Instruction has an operand.
458 It may be a constant or something unresolved. */
459 expr
= astnode_get_child(n
, 0);
460 if (astnode_get_type(expr
) == INTEGER_NODE
) {
461 /* Instruction has constant operand, so append it to binary buffer. */
462 buf_append(n
->instr
.opcode
);
464 buf_append(expr
->integer
);
467 buf_append(expr
->integer
>> 8);
471 /* Instruction has unresolved operand. */
472 /* Flush binary buffer to file */
474 /* Output 4-byte sequence: CMD_INSTR [opcode] [expr-id] */
475 put_1(fp
, XASM_CMD_INSTR
);
476 put_1(fp
, n
->instr
.opcode
);
477 put_2(fp
, register_expression(expr
));
483 /* Append the binary to the buffer */
484 buf_append_n(n
->binary
.data
, n
->binary
.size
);
490 /* Go through all the elements of the data array */
491 for (expr
= RHS(n
); expr
!= NULL
; expr
= astnode_get_next_sibling(expr
) ) {
492 if (astnode_get_type(expr
) == INTEGER_NODE
) {
493 /* Constant, output as binary */
494 switch (type
->datatype
) {
496 buf_append(expr
->integer
);
500 /* Note: little-endian here (6502) */
501 buf_append(expr
->integer
);
502 buf_append(expr
->integer
>> 8);
506 /* Note: little-endian here (6502) */
507 buf_append(expr
->integer
);
508 buf_append(expr
->integer
>> 8);
509 buf_append(expr
->integer
>> 16);
510 buf_append(expr
->integer
>> 24);
519 /* Unresolved. Linker will handle it. */
520 /* Flush binary buffer to file */
522 /* Output 3-byte sequence: [type-cmd] [expr-id] */
523 switch (type
->datatype
) {
524 case BYTE_DATATYPE
: put_1(fp
, XASM_CMD_DB
); break;
525 case WORD_DATATYPE
: put_1(fp
, XASM_CMD_DW
); break;
526 case DWORD_DATATYPE
: put_1(fp
, XASM_CMD_DD
); break;
531 put_2(fp
, register_expression(expr
));
539 assert(type
->datatype
== BYTE_DATATYPE
);
540 /* Get expression which is the count */
543 if (astnode_get_type(expr
) == INTEGER_NODE
) {
544 /* Use the immediate form. */
545 /* Calculate the # of bytes. */
547 /* Select bytecode depending on whether count fits in 8 bits or not */
550 put_1(fp
, XASM_CMD_DSI8
);
556 put_1(fp
, XASM_CMD_DSI16
);
562 /* Use the unresolved form. */
563 put_1(fp
, XASM_CMD_DSB
);
564 /* Write expression ID */
565 put_2(fp
, register_expression(expr
));
572 case STRUC_DECL_NODE
:
573 case UNION_DECL_NODE
:
575 case RECORD_DECL_NODE
:
581 fprintf(stderr
, "put_statement(%s): unsupported type\n", astnode_type_to_string(astnode_get_type(n
)));
586 /*---------------------------------------------------------------------------*/
589 * Writes the public constants to file.
590 * @param fp File handle
592 static void put_public_constants(FILE *fp
)
594 symbol_ident_list list
;
602 /* 16-bit count followed by (name, type, value) triplets */
604 symtab_list_type(CONSTANT_SYMBOL
, &list
);
605 /* Make one iteration to look them up and count them */
607 for (i
=0; i
<list
.size
; i
++) {
608 e
= symtab_lookup(list
.idents
[i
]);
609 if (e
->flags
& PUBLIC_FLAG
) {
616 for (i
=0; i
<list
.size
; i
++) {
617 e
= symtab_lookup(list
.idents
[i
]);
618 if (e
->flags
& PUBLIC_FLAG
) {
620 put_str_8(fp
, e
->id
);
621 /* Get the expression which represents the value */
622 expr
= (astnode
*)e
->def
;
623 /* Write the type and value */
624 switch (astnode_get_type(expr
)) {
626 v
= (unsigned long)expr
->integer
;
628 /* Write type */ put_1(fp
, XASM_INT_8
);
629 /* Write value */ put_1(fp
, v
);
631 else if (v
< 0x10000) {
632 /* Write type */ put_1(fp
, XASM_INT_16
);
633 /* Write value */ put_2(fp
, v
);
635 else if (v
< 0x1000000) {
636 /* Write type */ put_1(fp
, XASM_INT_24
);
637 /* Write value */ put_3(fp
, v
);
640 /* Write type */ put_1(fp
, XASM_INT_32
);
641 /* Write value */ put_4(fp
, v
);
647 if (strlen(s
) <= 0x100) {
648 /* Write type */ put_1(fp
, XASM_STR_8
);
649 /* Write value */ put_str_8(fp
, s
);
651 else if (strlen(s
) <= 0x10000) {
652 /* Write type */ put_1(fp
, XASM_STR_16
);
653 /* Write value */ put_str_16(fp
, s
);
656 fprintf(stderr
, "put_public_constants(): string constant too long\n");
661 fprintf(stderr
, "put_public_constants(): expression isn't a constant after all\n");
666 symtab_list_finalize(&list
);
670 * Writes the external symbol specifiers to file.
671 * @param fp File handle
673 static void put_externals(FILE *fp
)
675 symbol_ident_list list
;
680 /* number of units explicitly imported from */
681 put_1(fp
, 0x00); /* TODO */
682 /* List of unit names (TODO) */
684 /* 16-bit count followed by name list */
686 symtab_list_type(ANY_SYMBOL
, &list
);
687 /* One iteration to count them */
689 for (i
=0; i
<list
.size
; i
++) {
690 e
= symtab_lookup(list
.idents
[i
]);
691 if ((e
->flags
& EXTRN_FLAG
) && (e
->ref_count
!= 0)) {
696 /* Another iteration to output them */
698 for (i
=0; i
<list
.size
; i
++) {
700 e
= symtab_lookup(list
.idents
[i
]);
701 if ((e
->flags
& EXTRN_FLAG
) && (e
->ref_count
!= 0)) {
702 /* IMPORTANT: Set unique tag so we can refer to it in expressions */
703 /* (This probably shouldn't be done here though...???) */
705 /* Write unit (TODO) */
706 put_1(fp
, 0x00); /* 0 = Any unit */
708 put_str_8(fp
, list
.idents
[i
]);
711 symtab_list_finalize(&list
);
715 * Writes a segment to file.
716 * @param fp File handle
717 * @param root AST node which has the list of assembly statements as its children
718 * @param targetseg Specifies the type of segment (data or code)
720 static void put_segment(FILE *fp
, const astnode
*root
, int targetseg
)
723 int seg
; /* The type of segment we're in */
724 fpos_t size_pos
, end_pos
;
726 location loc
= { -1, -1, -1, -1, "" };
728 /* Write the size of the segment (backpatched later) */
729 fgetpos(fp
, &size_pos
);
730 put_3(fp
, 0xDECADE); /* 24-bit integer */
732 seg
= 2; /* Codeseg is default */
735 /* Step through all the nodes in the list */
736 for (n
= astnode_get_first_child(root
); n
!= NULL
; n
= astnode_get_next_sibling(n
) ) {
737 switch (astnode_get_type(n
)) {
739 seg
= 1; /* Now we're in a DATA segment */
743 seg
= 2; /* Now we're in a CODE segment */
747 /* Only output the statement if we're in the proper segment. */
748 if (seg
== targetseg
) {
749 put_statement(fp
, n
, &loc
);
755 put_1(fp
, XASM_CMD_END
);
758 fgetpos(fp
, &end_pos
); /* First save the current (end) position */
759 fsetpos(fp
, &size_pos
); /* Set position to where the size should be written */
760 put_3(fp
, end
- start
); /* Size is difference between end and start offset */
761 fsetpos(fp
, &end_pos
); /* Restore end position */
765 * Writes a sequence of parsed assembly statements to file.
766 * @param fp File handle
767 * @param root AST node which has the assembly statements as its children
769 static void put_bytecodes(FILE *fp
, const astnode
*root
)
771 /* Write data segment first */
772 put_segment(fp
, root
, 1);
773 /* Write code segment next */
774 put_segment(fp
, root
, 2);
777 /*---------------------------------------------------------------------------*/
780 * Writes an object file which captures all the information in a syntax tree.
781 * @param root Root of the syntax tree
782 * @param filename Name of the file to write to (object file)
784 void codegen_write(const astnode
*root
, const char *filename
)
786 /* Attempt to open file for writing */
787 FILE *fp
= fopen(filename
, "wb");
789 fprintf(stderr
, "codegen_write: could not open '%s' for writing\n", filename
);
790 /* ### increase error count */
797 /* Write magic number */
798 put_2(fp
, XASM_MAGIC
);
799 /* Write version (upper nibble=major, lower nibble=minor) */
800 put_1(fp
, XASM_OBJ_VERSION
);
802 /* Write exported constants. */
803 put_public_constants(fp
);
805 /* Write imported symbols. */
808 /* Write bytecodes. */
809 put_bytecodes(fp
, root
);
811 /* Write expressions */