4 * Copyright (c) Tuomo Valkonen 1999-2002.
6 * You may distribute and modify this library under the terms of either
7 * the Clarified Artistic License or the GNU LGPL, version 2.1 or later.
18 #include "tokenizer.h"
24 static const char *errors
[]={
25 DUMMY_TR("(no error)"),
26 DUMMY_TR("Unexpected end of file"), /* E_TOKZ_UNEXPECTED_EOF */
27 DUMMY_TR("Unexpected end of line"), /* E_TOKZ_UNEXPECTED_EOL */
28 DUMMY_TR("End of line expected"), /* E_TOKZ_EOL_EXPECTED */
29 DUMMY_TR("Invalid character"), /* E_TOKZ_INVALID_CHAR*/
30 DUMMY_TR("Numeric constant too big"), /* E_TOKZ_TOOBIG */
31 DUMMY_TR("Invalid numberic format"), /* E_TOKZ_NUMFMT */
32 DUMMY_TR("Junk after numeric constant"), /* E_TOKZ_NUM_JUNK */
33 DUMMY_TR("Not an integer"), /* E_TOKZ_NOTINT */
34 DUMMY_TR("Numeric constant out of range"), /* E_TOKZ_RANGE */
35 DUMMY_TR("Multi-character character constant"), /* E_TOKZ_MULTICHAR */
36 DUMMY_TR("Token/statement limit reached"), /* E_TOKZ_TOKEN_LIMIT */
37 DUMMY_TR("Unknown option"), /* E_TOKZ_UNKONWN_OPTION */
38 DUMMY_TR("Syntax error"), /* E_TOKZ_SYNTAX */
39 DUMMY_TR("Invalid argument"), /* E_TOKZ_INVALID_ARGUMENT */
40 DUMMY_TR("End of statement expected"), /* E_TOKZ_EOS_EXPECTED */
41 DUMMY_TR("Too few arguments"), /* E_TOKZ_TOO_FEW_ARGS */
42 DUMMY_TR("Too many arguments"), /* E_TOKZ_TOO_MANY_ARGS */
43 DUMMY_TR("Maximum section nestin level exceeded"), /* E_TOK_Z_MAX_NEST */
44 DUMMY_TR("Identifier expected"), /* E_TOKZ_IDENTIFIER_EXPECTED */
45 DUMMY_TR("Starting brace ('{') expected"), /* E_TOKZ_LBRACE_EXPECTED */
53 #define STRING_DECL(X) int err=0; char* X=NULL; char X##_tmp[STRBLEN]; int X##_tmpl=0
54 #define STRING_DECL_P(X, P) int err=0; char* X=NULL; char X##_tmp[STRBLEN]=P; int X##_tmpl=sizeof(P)-1
55 #define STRING_APPEND(X, C) {if(!_string_append(&X, X##_tmp, &X##_tmpl, c)) err=-ENOMEM;}
56 #define STRING_FREE(X) if(X!=NULL) free(X)
57 #define STRING_FINISH(X) {if(err!=0) return err; if(!_string_finish(&X, X##_tmp, X##_tmpl)) err=-ENOMEM;}
60 static bool _string_append(char **p
, char *tmp
, int *tmplen
, char c
)
64 if(*tmplen
==STRBLEN
-1){
83 static bool _string_finish(char **p
, char *tmp
, int tmplen
)
107 #define INC_LINE() tokz->line++
108 #define GETCH() _getch(tokz)
109 #define UNGETCH(C) _ungetch(tokz, C)
111 static int _getch(Tokenizer
*tokz
)
115 if(tokz
->ungetc
!=-1){
118 }else if (tokz
->flags
&TOKZ_READ_FROM_BUFFER
) {
119 assert(tokz
->buffer
.data
!=NULL
);
120 if (tokz
->buffer
.pos
==tokz
->buffer
.len
)
123 c
=tokz
->buffer
.data
[tokz
->buffer
.pos
++];
132 static void _ungetch(Tokenizer
*tokz
, int c
)
141 static int scan_line_comment(Token
*tok
, Tokenizer
*tokz
)
143 STRING_DECL_P(s
, "#");
148 while(c
!='\n' && c
!=EOF
){
157 TOK_SET_COMMENT(tok
, s
);
163 static int skip_line_comment(Tokenizer
*tokz
)
169 }while(c
!='\n' && c
!=EOF
);
180 static int scan_c_comment(Token
*tok
, Tokenizer
*tokz
)
182 STRING_DECL_P(s
, "/*");
191 return E_TOKZ_UNEXPECTED_EOF
;
198 }else if(st
==0 && c
=='*'){
209 TOK_SET_COMMENT(tok
, s
);
215 static int skip_c_comment(Tokenizer
*tokz
)
224 return E_TOKZ_UNEXPECTED_EOF
;
228 else if(st
==0 && c
=='*')
244 static int scan_char_escape(Tokenizer
*tokz
)
246 static char* special_chars
="nrtbae";
247 static char* specials
="\n\r\t\b\a\033";
253 for(i
=0;special_chars
[i
];i
++){
254 if(special_chars
[i
]==c
)
258 if(c
=='x' || c
=='X'){
260 }else if(c
=='d' || c
=='D'){
262 }else if(c
=='8' || c
=='9'){
263 base
=10;max
=2;i
=c
-'0';
264 }else if('0'<=c
&& c
<='7'){
265 base
=8;max
=2;i
=c
-'0';
321 static int scan_string(Token
*tok
, Tokenizer
*tokz
, bool escapes
)
335 return E_TOKZ_UNEXPECTED_EOL
;
338 if(c
=='\\' && escapes
){
339 c
=scan_char_escape(tokz
);
342 return E_TOKZ_UNEXPECTED_EOL
;
348 return E_TOKZ_UNEXPECTED_EOF
;
356 TOK_SET_STRING(tok
, s
);
365 static int scan_char(Token
*tok
, Tokenizer
*tokz
)
372 return E_TOKZ_UNEXPECTED_EOF
;
375 return E_TOKZ_UNEXPECTED_EOL
;
378 c
=scan_char_escape(tokz
);
381 return E_TOKZ_UNEXPECTED_EOF
;
384 return E_TOKZ_UNEXPECTED_EOL
;
390 return E_TOKZ_MULTICHAR
;
392 TOK_SET_CHAR(tok
, c
);
401 #define START_IDENT(X) (isalpha(X) || X=='_' || X=='$')
404 static int scan_identifier(Token
*tok
, Tokenizer
*tokz
, int c
)
411 }while(isalnum(c
) || c
=='_' || c
=='$');
417 TOK_SET_IDENT(tok
, s
);
422 #define NP_SIMPLE_IMPL
423 #include "np/numparser2.h"
424 #include "np/np-conv.h"
427 static int scan_number(Token
*tok
, Tokenizer
*tokz
, int c
)
432 if((e
=parse_number(&num
, tokz
, c
)))
435 if(num
.type
==NPNUM_INT
){
437 if((e
=num_to_long(&l
, &num
, TRUE
)))
440 TOK_SET_LONG(tok
, l
);
441 }else if(num
.type
==NPNUM_FLOAT
){
443 if((e
=num_to_double(&d
, &num
)))
446 TOK_SET_DOUBLE(tok
, d
);
448 return E_TOKZ_NUMFMT
;
458 static uchar op_map
[]={
459 0x00, /* ________ 0-7 */
460 0x00, /* ________ 8-15 */
461 0x00, /* ________ 16-23 */
462 0x00, /* ________ 24-31 */
463 0x62, /* _!___%&_ 32-39 */
464 0xff, /* ()*+,-./ 40-47 */
465 0x00, /* ________ 48-55 */
466 0xfc, /* __:;<=>? 56-63 */
467 0x01, /* @_______ 64-71 */
468 0x00, /* ________ 72-79 */
469 0x00, /* ________ 80-87 */
470 0x78, /* ___[_]^_ 88-95 */
471 0x00, /* ________ 96-103 */
472 0x00, /* ________ 104-111 */
473 0x00, /* ________ 112-119 */
474 0x38 /* ___{|}__ 120-127 */
478 static bool map_isset(uchar
*map
, uint ch
)
483 return map
[ch
>>3]&(1<<(ch
&7));
487 static bool is_opch(uint ch
)
489 return map_isset(op_map
, ch
);
493 static int scan_op(Token
*tok
, Tokenizer
*tokz
, int c
)
498 /* Quickly check it is an operator character */
500 return E_TOKZ_INVALID_CHAR
;
506 /* case '/': Checked elsewhere */
516 }else if(c2
==c
&& (c2
!='%' && c2
!='!' && c2
!='*')){
517 if(c
=='<' || c
=='>'){
520 op
=c
|(c2
<<8)|(c3
<<16);
534 /* It is already known that it is a operator so these are not needed
559 void tokz_warn(const Tokenizer
*tokz
, int line
, const char *fmt
, ...)
566 warn_obj_line_v(tokz
->name
, line
, fmt
, args
);
574 void tokz_warn_error(const Tokenizer
*tokz
, int line
, int e
)
576 if(e
==E_TOKZ_UNEXPECTED_EOF
)
580 tokz_warn(tokz
, line
, "%s", strerror(-e
));
582 tokz_warn(tokz
, line
, "%s", TR(errors
[e
]));
586 bool tokz_get_token(Tokenizer
*tokz
, Token
*tok
)
590 if (!(tokz
->flags
&TOKZ_READ_FROM_BUFFER
))
591 assert(tokz
->file
!=NULL
);
595 if(!TOK_IS_INVALID(&(tokz
->ungettok
))){
597 tokz
->ungettok
.type
=TOK_INVALID
;
607 }while(c
!='\n' && c
!=EOF
&& isspace(c
));
609 tok
->line
=tokz
->line
;
613 TOK_SET_OP(tok
, OP_EOF
);
619 if(tokz
->flags
&TOKZ_IGNORE_NEXTLINE
)
622 TOK_SET_OP(tok
, OP_NEXTLINE
);
630 TOK_SET_OP(tok
, OP_EOF
);
633 if(!isspace(c
) && e
==0){
634 e
=E_TOKZ_EOL_EXPECTED
;
635 tokz_warn_error(tokz
, tokz
->line
, e
);
636 if(!(tokz
->flags
&TOKZ_ERROR_TOLERANT
))
645 if(tokz
->flags
&TOKZ_READ_COMMENTS
){
646 e
=scan_line_comment(tok
, tokz
);
648 }else if((e
=skip_line_comment(tokz
))){
658 TOK_SET_OP(tok
, OP_AS_DIV
);
664 TOK_SET_OP(tok
, OP_DIV
);
668 if(tokz
->flags
&TOKZ_READ_COMMENTS
){
669 e
=scan_c_comment(tok
, tokz
);
671 }else if((e
=skip_c_comment(tokz
))){
678 e
=scan_string(tok
, tokz
, TRUE
);
682 e
=scan_char(tok
, tokz
);
686 if(('0'<=c
&& c
<='9') || c
=='-' || c
=='+'){
687 e
=scan_number(tok
, tokz
, c
);
692 e
=scan_identifier(tok
, tokz
, c
);
694 e
=scan_op(tok
, tokz
, c
);
700 tokz_warn_error(tokz
, tokz
->line
, e
);
706 void tokz_unget_token(Tokenizer
*tokz
, Token
*tok
)
708 tok_free(&(tokz
->ungettok
));
710 tok
->type
=TOK_INVALID
;
718 static bool do_tokz_pushf(Tokenizer
*tokz
)
720 Tokenizer_FInfo
*finfo
;
722 finfo
=REALLOC_N(tokz
->filestack
, Tokenizer_FInfo
,
723 tokz
->filestack_n
, tokz
->filestack_n
+1);
728 tokz
->filestack
=finfo
;
729 finfo
=&(finfo
[tokz
->filestack_n
++]);
731 finfo
->file
=tokz
->file
;
732 finfo
->name
=tokz
->name
;
733 finfo
->line
=tokz
->line
;
734 finfo
->ungetc
=tokz
->ungetc
;
735 finfo
->ungettok
=tokz
->ungettok
;
741 bool tokz_pushf_file(Tokenizer
*tokz
, FILE *file
, const char *fname
)
743 char *fname_copy
=NULL
;
749 fname_copy
=scopy(fname
);
750 if(fname_copy
==NULL
){
756 if(tokz
->file
!=NULL
){
757 if(!do_tokz_pushf(tokz
)){
766 tokz
->name
=fname_copy
;
769 tokz
->ungettok
.type
=TOK_INVALID
;
775 bool tokz_pushf(Tokenizer
*tokz
, const char *fname
)
779 file
=fopen(fname
, "r");
786 if(!tokz_pushf_file(tokz
, file
, fname
)){
796 static Tokenizer
*tokz_create()
800 tokz
=ALLOC(Tokenizer
);
811 tokz
->ungettok
.type
=TOK_INVALID
;
816 tokz
->filestack
=NULL
;
825 Tokenizer
*tokz_open(const char *fname
)
831 if(!tokz_pushf(tokz
, fname
)){
840 Tokenizer
*tokz_open_file(FILE *file
, const char *fname
)
846 if(!tokz_pushf_file(tokz
, file
, fname
)){
854 Tokenizer
*tokz_prepare_buffer(char *buffer
, int len
)
865 tokz
->flags
|=TOKZ_READ_FROM_BUFFER
;
866 tokz
->buffer
.data
=scopy(buffer
);
867 tokz
->buffer
.len
=(len
>0 ? (uint
)len
: strlen(tokz
->buffer
.data
));
880 static bool do_tokz_popf(Tokenizer
*tokz
, bool shrink
)
882 Tokenizer_FInfo
*finfo
;
884 if(tokz
->filestack_n
<=0)
892 finfo
=&(tokz
->filestack
[--tokz
->filestack_n
]);
894 tokz
->file
=finfo
->file
;
895 tokz
->name
=finfo
->name
;
896 tokz
->line
=finfo
->line
;
897 tokz
->ungetc
=finfo
->ungetc
;
898 tokz
->ungettok
=finfo
->ungettok
;
900 if(tokz
->filestack_n
==0){
901 free(tokz
->filestack
);
902 tokz
->filestack
=NULL
;
904 finfo
=REALLOC_N(tokz
->filestack
, Tokenizer_FInfo
,
905 tokz
->filestack_n
+1, tokz
->filestack_n
);
909 tokz
->filestack
=finfo
;
916 bool tokz_popf(Tokenizer
*tokz
)
918 return do_tokz_popf(tokz
, TRUE
);
922 void tokz_close(Tokenizer
*tokz
)
924 while(tokz
->filestack_n
>0)
925 do_tokz_popf(tokz
, FALSE
);
931 tok_free(&(tokz
->ungettok
));
941 void tok_free(Token
*tok
)
943 if(TOK_IS_STRING(tok
) || TOK_IS_IDENT(tok
) || TOK_IS_COMMENT(tok
)){
944 if(TOK_STRING_VAL(tok
)!=NULL
)
945 free(TOK_STRING_VAL(tok
));
948 tok
->type
=TOK_INVALID
;
952 void tok_init(Token
*tok
)
954 static Token dummy
=TOK_INIT
;
956 memcpy(tok
, &dummy
, sizeof(*tok
));