3 * $Id: scanner.l,v 1.15 2007/11/11 22:35:46 khansen Exp $
5 * Revision 1.15 2007/11/11 22:35:46 khansen
8 * Revision 1.14 2007/08/19 11:19:47 khansen
9 * --case-insensitive option
11 * Revision 1.13 2007/08/12 18:58:49 khansen
12 * ability to generate pure 6502 binary
14 * Revision 1.12 2007/08/11 01:25:39 khansen
15 * includepaths support (-I option)
17 * Revision 1.11 2007/08/10 20:21:36 khansen
18 * *** empty log message ***
20 * Revision 1.10 2007/07/22 13:34:16 khansen
21 * convert tabs to whitespaces
23 * Revision 1.9 2005/01/09 11:20:31 kenth
27 * Revision 1.8 2004/12/19 19:59:08 kenth
30 * Revision 1.7 2004/12/16 13:22:08 kenth
31 * added DEFINE parsing
33 * Revision 1.6 2004/12/14 01:50:33 kenth
36 * Revision 1.5 2004/12/11 02:12:32 kenth
39 * Revision 1.4 2004/12/10 22:43:53 kenth
40 * removed FILE_PATH recognition: ".db <a, >b" is erronously parsed otherwise
42 * Revision 1.3 2004/12/09 11:15:28 kenth
43 * bugfix: close file handle
44 * added: "WARNING", "ERROR" recognition
46 * Revision 1.2 2004/12/06 05:06:29 kenth
49 * Revision 1.1 2004/06/30 07:56:59 kenth
55 void yyerror(const char *); /* See below */
56 char *strip_label(); /* See below */
57 const char *maybe_ignorecase(char *);
65 /* Each time we match a string, move the end cursor to its end. */
66 #define YY_USER_ACTION yylloc.last_column += yyleng;
70 string_literal \"[^\n"]*\"
73 decimal_literal [1-9][0-9]*
75 hex_literal2 [0-9][0-9A-F]*
76 hex_literal3 0[A-F][0-9A-F]*
77 identifier [A-Z_][A-Z0-9_]{0,254}
78 local_id [0-9A-Z_]{1,255}
83 /* At each yylex invocation, mark the current position as the
84 start of the next token. */
85 LOCATION_STEP (yylloc);
87 {whitespace} { LOCATION_STEP (yylloc); }
88 {comment} { LOCATION_STEP (yylloc); }
89 "%"{binary_literal} { yylval.integer = strtol(&yytext[1], NULL, 2); return(INTEGER_LITERAL); }
90 {binary_literal}"B" { yylval.integer = strtol(yytext, NULL, 2); return(INTEGER_LITERAL); }
91 {octal_literal} { yylval.integer = strtol(yytext, NULL, 8); return(INTEGER_LITERAL); }
92 {decimal_literal} { yylval.integer = strtol(yytext, NULL, 10); return(INTEGER_LITERAL); }
93 "0X"{hex_literal} { yylval.integer = strtol(&yytext[2], NULL, 16); return(INTEGER_LITERAL); }
94 "$"{hex_literal} { yylval.integer = strtol(&yytext[1], NULL, 16); return(INTEGER_LITERAL); }
95 {hex_literal3}"H" { yylval.integer = strtol(yytext, NULL, 16); return(INTEGER_LITERAL); }
96 {hex_literal2}"H" { yylval.integer = strtol(yytext, NULL, 16); return(INTEGER_LITERAL); }
97 "'"."'" { yylval.integer = yytext[1]; return(INTEGER_LITERAL); }
98 "ADC" { yylval.mnemonic = ADC_MNEMONIC; return(MNEMONIC); }
99 "AND" { yylval.mnemonic = AND_MNEMONIC; return(MNEMONIC); }
100 "ASL" { yylval.mnemonic = ASL_MNEMONIC; return(MNEMONIC); }
101 "BCC" { yylval.mnemonic = BCC_MNEMONIC; return(MNEMONIC); }
102 "BCS" { yylval.mnemonic = BCS_MNEMONIC; return(MNEMONIC); }
103 "BLT" { yylval.mnemonic = BCC_MNEMONIC; return(MNEMONIC); }
104 "BGE" { yylval.mnemonic = BCS_MNEMONIC; return(MNEMONIC); }
105 "BEQ" { yylval.mnemonic = BEQ_MNEMONIC; return(MNEMONIC); }
106 "BIT" { yylval.mnemonic = BIT_MNEMONIC; return(MNEMONIC); }
107 "BMI" { yylval.mnemonic = BMI_MNEMONIC; return(MNEMONIC); }
108 "BNE" { yylval.mnemonic = BNE_MNEMONIC; return(MNEMONIC); }
109 "BPL" { yylval.mnemonic = BPL_MNEMONIC; return(MNEMONIC); }
110 "BRK" { yylval.mnemonic = BRK_MNEMONIC; return(MNEMONIC); }
111 "BVC" { yylval.mnemonic = BVC_MNEMONIC; return(MNEMONIC); }
112 "BVS" { yylval.mnemonic = BVS_MNEMONIC; return(MNEMONIC); }
113 "CLC" { yylval.mnemonic = CLC_MNEMONIC; return(MNEMONIC); }
114 "CLD" { yylval.mnemonic = CLD_MNEMONIC; return(MNEMONIC); }
115 "CLI" { yylval.mnemonic = CLI_MNEMONIC; return(MNEMONIC); }
116 "CLV" { yylval.mnemonic = CLV_MNEMONIC; return(MNEMONIC); }
117 "CMP" { yylval.mnemonic = CMP_MNEMONIC; return(MNEMONIC); }
118 "CPX" { yylval.mnemonic = CPX_MNEMONIC; return(MNEMONIC); }
119 "CPY" { yylval.mnemonic = CPY_MNEMONIC; return(MNEMONIC); }
120 "DEC" { yylval.mnemonic = DEC_MNEMONIC; return(MNEMONIC); }
121 "DEX" { yylval.mnemonic = DEX_MNEMONIC; return(MNEMONIC); }
122 "DEY" { yylval.mnemonic = DEY_MNEMONIC; return(MNEMONIC); }
123 "EOR" { yylval.mnemonic = EOR_MNEMONIC; return(MNEMONIC); }
124 "INC" { yylval.mnemonic = INC_MNEMONIC; return(MNEMONIC); }
125 "INX" { yylval.mnemonic = INX_MNEMONIC; return(MNEMONIC); }
126 "INY" { yylval.mnemonic = INY_MNEMONIC; return(MNEMONIC); }
127 "JMP" { yylval.mnemonic = JMP_MNEMONIC; return(MNEMONIC); }
128 "JSR" { yylval.mnemonic = JSR_MNEMONIC; return(MNEMONIC); }
129 "LDA" { yylval.mnemonic = LDA_MNEMONIC; return(MNEMONIC); }
130 "LDX" { yylval.mnemonic = LDX_MNEMONIC; return(MNEMONIC); }
131 "LDY" { yylval.mnemonic = LDY_MNEMONIC; return(MNEMONIC); }
132 "LSR" { yylval.mnemonic = LSR_MNEMONIC; return(MNEMONIC); }
133 "NOP" { yylval.mnemonic = NOP_MNEMONIC; return(MNEMONIC); }
134 "ORA" { yylval.mnemonic = ORA_MNEMONIC; return(MNEMONIC); }
135 "PHA" { yylval.mnemonic = PHA_MNEMONIC; return(MNEMONIC); }
136 "PHP" { yylval.mnemonic = PHP_MNEMONIC; return(MNEMONIC); }
137 "PLA" { yylval.mnemonic = PLA_MNEMONIC; return(MNEMONIC); }
138 "PLP" { yylval.mnemonic = PLP_MNEMONIC; return(MNEMONIC); }
139 "ROL" { yylval.mnemonic = ROL_MNEMONIC; return(MNEMONIC); }
140 "ROR" { yylval.mnemonic = ROR_MNEMONIC; return(MNEMONIC); }
141 "RTI" { yylval.mnemonic = RTI_MNEMONIC; return(MNEMONIC); }
142 "RTS" { yylval.mnemonic = RTS_MNEMONIC; return(MNEMONIC); }
143 "SBC" { yylval.mnemonic = SBC_MNEMONIC; return(MNEMONIC); }
144 "SEC" { yylval.mnemonic = SEC_MNEMONIC; return(MNEMONIC); }
145 "SED" { yylval.mnemonic = SED_MNEMONIC; return(MNEMONIC); }
146 "SEI" { yylval.mnemonic = SEI_MNEMONIC; return(MNEMONIC); }
147 "STA" { yylval.mnemonic = STA_MNEMONIC; return(MNEMONIC); }
148 "STX" { yylval.mnemonic = STX_MNEMONIC; return(MNEMONIC); }
149 "STY" { yylval.mnemonic = STY_MNEMONIC; return(MNEMONIC); }
150 "TAX" { yylval.mnemonic = TAX_MNEMONIC; return(MNEMONIC); }
151 "TAY" { yylval.mnemonic = TAY_MNEMONIC; return(MNEMONIC); }
152 "TSX" { yylval.mnemonic = TSX_MNEMONIC; return(MNEMONIC); }
153 "TXA" { yylval.mnemonic = TXA_MNEMONIC; return(MNEMONIC); }
154 "TXS" { yylval.mnemonic = TXS_MNEMONIC; return(MNEMONIC); }
155 "TYA" { yylval.mnemonic = TYA_MNEMONIC; return(MNEMONIC); }
159 "MASK" { return(MASK); }
160 "SIZEOF" { return(SIZEOF); }
161 {pragma_prefix}"DATA" { return(DATASEG); }
162 {pragma_prefix}"CODE" { return(CODESEG); }
163 {pragma_prefix}"DATASEG" { return(DATASEG); }
164 {pragma_prefix}"CODESEG" { return(CODESEG); }
165 {pragma_prefix}"IF" { return(IF); }
166 {pragma_prefix}"IFDEF" { return(IFDEF); }
167 {pragma_prefix}"IFNDEF" { return(IFNDEF); }
168 {pragma_prefix}"ELSE" { return(ELSE); }
169 {pragma_prefix}"ELIF" { return(ELIF); }
170 {pragma_prefix}"ENDIF" { return(ENDIF); }
171 {pragma_prefix}"MACRO" { return(MACRO); }
172 {pragma_prefix}"END" { return(END); }
173 {pragma_prefix}"ENDE" { return(ENDE); }
174 {pragma_prefix}"ENDM" { return(ENDM); }
175 {pragma_prefix}"ENDP" { return(ENDP); }
176 {pragma_prefix}"ENDS" { return(ENDS); }
177 {pragma_prefix}"EQU" { return(EQU); }
178 {pragma_prefix}"DEFINE" { return(DEFINE); }
179 {pragma_prefix}"INCLUDE" { return(INCSRC); }
180 {pragma_prefix}"INCSRC" { return(INCSRC); }
181 {pragma_prefix}"INCBIN" { return(INCBIN); }
182 {pragma_prefix}"ALIGN" { return(ALIGN); }
183 {pragma_prefix}"PUBLIC" { return(PUBLIC); }
184 {pragma_prefix}"EXTRN" { return(EXTRN); }
185 {pragma_prefix}"ZEROPAGE" { return(ZEROPAGE); }
186 {pragma_prefix}"CHARMAP" { return(CHARMAP); }
187 {pragma_prefix}"STRUC" { return(STRUC); }
188 {pragma_prefix}"UNION" { return(UNION); }
189 {pragma_prefix}"RECORD" { return(RECORD); }
190 {pragma_prefix}"ENUM" { return(ENUM); }
191 {pragma_prefix}"PROC" { return(PROC); }
192 {pragma_prefix}"REPT" { return(REPT); }
193 {pragma_prefix}"TAG" { return(TAG); }
194 {pragma_prefix}"TYPE" { return(TAG); }
195 {pragma_prefix}"LABEL" { return(_LABEL_); }
196 {pragma_prefix}"MESSAGE" { return(MESSAGE); }
197 {pragma_prefix}"WARNING" { return(WARNING); }
198 {pragma_prefix}"ERROR" { return(ERROR); }
199 {pragma_prefix}"WHILE" { return(WHILE); }
200 {pragma_prefix}"PAD" { return(DSB); }
201 {pragma_prefix}"DB" { return(BYTE); }
202 {pragma_prefix}"BYTE" { return(BYTE); }
203 {pragma_prefix}"CHAR" { return(CHAR); }
204 {pragma_prefix}"ASC" { return(CHAR); }
205 {pragma_prefix}"DW" { return(WORD); }
206 {pragma_prefix}"WORD" { return(WORD); }
207 {pragma_prefix}"DD" { return(DWORD); }
208 {pragma_prefix}"DWORD" { return(DWORD); }
209 {pragma_prefix}"DSB" { return(DSB); }
210 {pragma_prefix}"DSW" { return(DSW); }
211 {pragma_prefix}"DSD" { return(DSD); }
212 {pragma_prefix}"ORG" { return(ORG); }
213 {label_prefix}{identifier}":" { yylval.label = maybe_ignorecase(strip_label()); return(LABEL); }
214 {label_prefix}"@@"{local_id}":" { yylval.label = maybe_ignorecase(strip_label()); return(LOCAL_LABEL); }
215 "@@"{local_id} { yylval.ident = maybe_ignorecase(yytext); return(LOCAL_ID); }
216 {identifier} { yylval.ident = maybe_ignorecase(yytext); return(IDENTIFIER); }
217 {string_literal} { yytext[yyleng-1] = '\0'; yylval.string = &yytext[1]; return(STRING_LITERAL); }
218 "+"{2,8} { yylval.ident = yytext; return(FORWARD_BRANCH); }
219 "-"{2,8} { yylval.ident = yytext; return(BACKWARD_BRANCH); }
220 "::" { return(SCOPE_OP); }
222 \n { LOCATION_LINES (yylloc, yyleng); LOCATION_STEP (yylloc); return('\n'); }
227 "(" { return( yyparswap ? '[' : '(' ); }
228 ")" { return( yyparswap ? ']' : ')' ); }
229 "[" { return( yyparswap ? '(' : '[' ); }
230 "]" { return( yyparswap ? ')' : ']'); }
245 ">>" { return(SHR_OP); }
246 "<<" { return(SHL_OP); }
247 "<=" { return(LE_OP); }
248 ">=" { return(GE_OP); }
249 "==" { return(EQ_OP); }
250 "!=" { return(NE_OP); }
253 . { yyerror("Skipping invalid character(s)"); LOCATION_STEP (yylloc); }
256 * Describes a file stack record.
257 * It holds name, location and buffer state for it.
259 typedef struct tag_file_stack_rec {
260 const char *name; /* Name of the file */
261 char *path; /* Absolute path */
262 YY_BUFFER_STATE buf; /* flex input buffer */
263 YYLTYPE loc; /* Current position in file */
264 FILE *fp; /* File handle */
267 /* Max. nesting depth */
268 #define FILE_STACK_SIZE 32
271 static file_stack_rec file_stack[FILE_STACK_SIZE];
273 /* File stack pointer */
274 static int file_stack_ptr;
276 /* Macro to access top of file stack */
277 #define FTOS() file_stack[file_stack_ptr]
280 * Resets the location described by yylloc.
282 void yyresetloc(void)
284 LOCATION_RESET(yylloc);
288 * Helper function used to handle INCSRC and INCBIN
289 * statements during parsing.
291 FILE *open_included_file(const char *filename, int quoted_form, char **path_out)
295 if (filename[0] == '/') {
297 fp = fopen(filename, "rt");
299 char *lastslash = strrchr(filename, '/');
300 int len = lastslash - filename;
302 path = (char *)malloc(len + 1);
303 strncpy(path, filename, len);
307 /* Not absolute path */
310 /* Try parent includes */
311 for (i = file_stack_ptr; i >= 0; --i) {
312 const char *include_path = file_stack[i].path;
313 char *tmp = (char *)malloc(
314 strlen(include_path) + strlen("/") + strlen(filename) + 1);
315 strcpy(tmp, include_path);
317 strcat(tmp, filename);
318 fp = fopen(tmp, "rt");
320 char *lastslash = strrchr(tmp, '/');
321 int len = lastslash - tmp;
323 path = (char *)malloc(len + 1);
324 strncpy(path, tmp, len);
330 /* We only search the current dir for now. */
335 /* Try search paths */
336 for (i = 0; i < xasm_args.include_path_count; ++i) {
337 const char *include_path = xasm_args.include_paths[i];
338 char *tmp = (char *)malloc(
339 strlen(include_path) + strlen("/") + strlen(filename) + 1);
340 strcpy(tmp, include_path);
342 strcat(tmp, filename);
343 fp = fopen(tmp, "rt");
345 char *lastslash = strrchr(tmp, '/');
346 int len = lastslash - tmp;
348 path = (char *)malloc(len + 1);
349 strncpy(path, tmp, len);
366 * Function called by lexer upon EOF(yyin).
370 /* Discard of buffer that has been scanned */
371 yy_delete_buffer(YY_CURRENT_BUFFER);
372 /* Close current file */
375 /* Resume scanning of previous buffer, if any */
376 if (file_stack_ptr > 0) {
379 /* Restore location */
381 /* Resume scanning from this buffer */
382 yy_switch_to_buffer(FTOS().buf);
383 /* Not end of all input. */
387 /* We have reached end of all input. */
393 * Opens a file and resumes scanning from there.
394 * The current file's status is pushed on stack.
395 * @param filename Name of new file to start scanning
396 * @return error code (0, 1 or 2)
398 int yypushandrestart(const char *filename, int quoted_form)
402 /* Check for stack overflow */
403 if (file_stack_ptr == FILE_STACK_SIZE-1) {
404 /* Error, stack overflow */
407 fp = open_included_file(filename, quoted_form, &path);
409 /* Save position in current file */
411 /* Push the new file */
413 FTOS().name = filename;
415 FTOS().buf = yy_create_buffer(fp, YY_BUF_SIZE);
417 /* Reset file location */
419 /* Resume scanning from the new buffer */
420 yy_switch_to_buffer(FTOS().buf);
424 /* Error, couldn't open file */
430 * Initializes the lexer, tells it to start tokenizing from the given file.
431 * @param filename Name of file where scanning is initiated
432 * @param swap_parens Use ( ) for indirection if 1, [ ] otherwise
433 * @return 1 if success, 0 otherwise
435 int yybegin(const char *filename, int swap_parens, int ignore_case)
438 yyparswap = swap_parens;
439 yyignorecase = ignore_case;
440 /* Attempt to open the given file */
441 fp = fopen(filename, "rt");
443 /* Figure out path */
445 char *lastslash = strrchr(filename, '/');
446 if (filename[0] != '/') {
449 int len = lastslash - filename;
451 path = (char *)malloc(strlen(xasm_path) + 1 + len + 1);
452 strcpy(path, xasm_path);
454 strncat(path, filename, len);
456 path = (char *)malloc(strlen(xasm_path) + 1);
457 strcpy(path, xasm_path);
461 int len = lastslash - filename;
463 path = (char *)malloc(len + 1);
464 strncpy(path, filename, len);
469 FTOS().name = filename;
471 FTOS().buf = yy_create_buffer(fp, YY_BUF_SIZE);
473 /* Reset file location */
475 /* Use the new buffer */
476 yy_switch_to_buffer(FTOS().buf);
480 /* Couldn't open file */
486 * Gets the name of the file that is currently being tokenized.
488 const char *yy_current_filename()
494 * Called by the parser to report an error during parsing.
495 * @param s Error message
497 void yyerror(const char *s) {
500 /* Print line of form: file:pos:message */
501 fprintf(stderr, "%s:", yy_current_filename());
502 LOCATION_PRINT(stderr, yylloc);
503 fprintf(stderr, ": error: %s\n", s);
504 /* Print include-trace */
505 for (i=file_stack_ptr-1; i>=0; i--) {
506 fprintf(stderr, "(%s:", file_stack[i].name);
507 LOCATION_PRINT(stderr, file_stack[i].loc);
508 fprintf(stderr, ")\n");
514 * Call when yytext is a label to remove whitespace and punctuation.
519 /* Kill the ':' at the end */
520 yytext[strlen(yytext)-1] = 0;
521 /* Skip the whitespace, if any */
523 while ((yytext[start] == ' ') || (yytext[start] == '\t')) start++;
524 return &yytext[start];
527 static char *__strupr(char *str)
537 const char *maybe_ignorecase(char *str)
544 char *scan_include(int terminator)
546 static char buf[1024];
552 else if (c == '\n') {
553 yyerror("unterminated include filename");
556 } else if (i == 1023) {
557 yyerror("include filename is too long");
564 LOCATION_STEP (yylloc);