2 Copyright (c) 2003, Steve Dekorte
3 All rights reserved. See _BSDLicense.txt.
5 Aug 2004 - removed {} from op chars
6 - changed identifier to stop after 1 colon
16 //#define LEXER_DEBUG_TOKENS
17 #define TEST_INLINE inline
19 static IoToken
*IoLexer_currentToken(IoLexer
*self
)
21 return List_top(self
->tokenStream
);
24 IoLexer
*IoLexer_new(void)
26 IoLexer
*self
= (IoLexer
*)io_calloc(1, sizeof(IoLexer
));
27 self
->s
= (char *)io_calloc(1, 1);
29 self
->posStack
= Stack_new();
30 self
->tokenStack
= Stack_new();
31 self
->tokenStream
= List_new();
32 self
->charLineIndex
= List_new();
36 void IoLexer_free(IoLexer
*self
)
40 Stack_free(self
->posStack
);
41 Stack_free(self
->tokenStack
);
42 List_free(self
->tokenStream
);
43 List_free(self
->charLineIndex
);
44 if(self
->errorDescription
) io_free(self
->errorDescription
);
48 char *IoLexer_errorDescription(IoLexer
*self
)
50 IoToken
*et
= IoLexer_errorToken(self
);
52 if (!self
->errorDescription
)
54 self
->errorDescription
= io_calloc(1, 1024);
55 self
->errorDescription
[0] = 0;
60 sprintf(self
->errorDescription
,
61 "\"%s\" on line %i character %i",
63 IoToken_lineNumber(et
),
64 IoToken_charNumber(et
));
67 return self
->errorDescription
;
71 void IoLexer_buildLineIndex(IoLexer
*self
)
75 List_removeAll(self
->charLineIndex
);
77 List_append_(self
->charLineIndex
, s
);
83 List_append_(self
->charLineIndex
, s
);
88 List_append_(self
->charLineIndex
, s
);
92 // next/prev character ------------------------
94 #define UTF8_SEQLEN(c) ( \
102 #define INVALID_CHAR 0xfffe
104 static uchar_t
_IoLexer_DecodeUTF8(const unsigned char *s
)
112 if (!((s
[1] ^ 0x80) < 0x40))
114 return ((uchar_t
)(s
[0] & 0x1f) << 6) | (uchar_t
)(s
[1] ^ 0x80);
118 if (!((s
[1] ^ 0x80) < 0x40 && (s
[2] ^ 0x80) < 0x40 && (s
[0] >= 0xe1 || s
[1] >= 0xa0)))
120 return ((uchar_t
)(s
[0] & 0x0f) << 12) | ((uchar_t
)(s
[1] ^ 0x80) << 6) | (uchar_t
)(s
[2] ^ 0x80);
124 if (!((s
[1] ^ 0x80) < 0x40 && (s
[2] ^ 0x80) < 0x40 && (s
[3] ^ 0x80) < 0x40 && (s
[0] >= 0xf1 || s
[1] >= 0x90)))
126 return ((uchar_t
)(s
[0] & 0x07) << 18) | ((uchar_t
)(s
[1] ^ 0x80) << 12) | ((uchar_t
)(s
[2] ^ 0x80) << 6) | (uchar_t
)(s
[3] ^ 0x80);
130 if (!((s
[1] ^ 0x80) < 0x40 && (s
[2] ^ 0x80) < 0x40 && (s
[3] ^ 0x80) < 0x40 && (s
[4] ^ 0x80) < 0x40 && (s
[0] >= 0xf9 || s
[1] >= 0x88)))
132 return ((uchar_t
)(s
[0] & 0x03) << 24) | ((uchar_t
)(s
[1] ^ 0x80) << 18) | ((uchar_t
)(s
[2] ^ 0x80) << 12) | ((uchar_t
)(s
[3] ^ 0x80) << 6) | (uchar_t
)(s
[4] ^ 0x80);
136 if (!((s
[1] ^ 0x80) < 0x40 && (s
[2] ^ 0x80) < 0x40 && (s
[3] ^ 0x80) < 0x40 && (s
[4] ^ 0x80) < 0x40 && (s
[5] ^ 0x80) < 0x40 && (s
[0] >= 0xfd || s
[1] >= 0x84)))
138 return ((uchar_t
)(s
[0] & 0x01) << 30) | ((uchar_t
)(s
[1] ^ 0x80) << 24) | ((uchar_t
)(s
[2] ^ 0x80) << 18) | ((uchar_t
)(s
[3] ^ 0x80) << 12) | ((uchar_t
)(s
[4] ^ 0x80) << 6) | (uchar_t
)(s
[5] ^ 0x80);
144 TEST_INLINE uchar_t
IoLexer_nextChar(IoLexer
*self
)
146 unsigned char c
= (unsigned char) * (self
->current
);
160 seqlen
= UTF8_SEQLEN(c
);
162 for (i
= 0; i
< seqlen
; i
++)
164 if (self
->current
[i
] == 0)
166 // XXX: invalid or incomplete sequence
171 uch
= _IoLexer_DecodeUTF8((unsigned char*)self
->current
);
173 if (uch
== INVALID_CHAR
)
178 self
->current
+= seqlen
;
182 TEST_INLINE uchar_t
IoLexer_prevChar(IoLexer
*self
)
187 for (len
= 1; len
<= 6 && self
->current
- len
> self
->s
; len
++)
189 unsigned char c
= *(unsigned char *)(self
->current
- len
);
190 if (c
< 0x80 || c
>= 0xc2)
194 self
->current
-= len
;
195 uch
= _IoLexer_DecodeUTF8((unsigned char*)self
->current
);
196 if (uch
== INVALID_CHAR
)
202 TEST_INLINE
char *IoLexer_current(IoLexer
*self
)
204 return self
->current
;
207 TEST_INLINE
int IoLexer_onNULL(IoLexer
*self
)
209 return (*(self
->current
) == 0);
212 // ------------------------------------------
214 size_t IoLexer_currentLineNumberOld(IoLexer
*self
)
216 size_t lineNumber
= 1;
219 while (s
< self
->current
)
232 TEST_INLINE
size_t IoLexer_currentLineNumber(IoLexer
*self
)
234 // this should be even faster than a binary search
235 // since almost all results are very close to the last
237 List
*index
= self
->charLineIndex
;
238 size_t line
= self
->lineHint
;
239 size_t numLines
= List_size(index
);
240 void *current
= (void *)self
->current
;
242 if (current
< List_at_(index
, line
))
244 // walk down lines until char is bigger than one
245 while (line
> 0 && !(current
> List_at_(index
, line
)))
253 // walk up lines until char is less than or equal to one
254 while (line
< numLines
&& !(current
<= List_at_(index
, line
)))
261 self
->lineHint
= line
;
265 size_t realLine = IoLexer_currentLineNumberOld(self);
267 if (line != realLine)
269 printf("mismatch on currentLine %i != %i\n", (int)line, (int)realLine);
276 void IoLexer_clear(IoLexer
*self
)
278 LIST_FOREACH(self
->tokenStream
, i
, t
, IoToken_free((IoToken
*)t
) );
279 List_removeAll(self
->tokenStream
);
281 Stack_clear(self
->posStack
);
282 Stack_clear(self
->tokenStack
);
284 self
->current
= self
->s
;
285 self
->resultIndex
= 0;
287 self
->errorToken
= NULL
;
290 IoToken
*IoLexer_errorToken(IoLexer
*self
)
292 return self
->errorToken
;
295 // lexing -------------------------------------
297 void IoLexer_string_(IoLexer
*self
, const char *string
)
299 self
->s
= strcpy((char *)io_realloc(self
->s
, strlen(string
) + 1), string
);
300 self
->current
= self
->s
;
301 IoLexer_buildLineIndex(self
);
304 void IoLexer_printLast_(IoLexer
*self
, int max
)
306 char *s
= self
->s
+ self
->maxChar
;
309 for (i
= 0; i
< max
&& s
[i
]; i
++)
315 // --- token and character position stacks ---
317 char *IoLexer_lastPos(IoLexer
*self
)
319 return Stack_top(self
->posStack
);
322 TEST_INLINE
void IoLexer_pushPos(IoLexer
*self
)
324 intptr_t index
= self
->current
- self
->s
;
326 if (index
> (intptr_t)self
->maxChar
)
328 self
->maxChar
= index
;
331 Stack_push_(self
->tokenStack
, (void *)(intptr_t)(List_size(self
->tokenStream
) - 1));
332 Stack_push_(self
->posStack
, self
->current
);
340 TEST_INLINE
void IoLexer_popPos(IoLexer
*self
)
342 Stack_pop(self
->tokenStack
);
343 Stack_pop(self
->posStack
);
350 TEST_INLINE
void IoLexer_popPosBack(IoLexer
*self
)
352 intptr_t i
= (intptr_t)Stack_pop(self
->tokenStack
);
353 intptr_t topIndex
= (intptr_t)Stack_top(self
->tokenStack
);
357 List_setSize_(self
->tokenStream
, i
+ 1);
359 if (i
!= topIndex
) // ok to io_free token
361 IoToken
*parent
= IoLexer_currentToken(self
);
365 IoToken_nextToken_(parent
, NULL
);
370 self
->current
= Stack_pop(self
->posStack
);
372 printf("back: "); IoLexer_print(self
);
376 // ------------------------------------------
378 int IoLexer_lex(IoLexer
*self
)
381 IoLexer_pushPos(self
);
383 IoLexer_messageChain(self
);
385 if (*(self
->current
))
387 //printf("Lexing error after: ");
388 //IoLexer_printLast_(self, 30);
391 if (!self
->errorToken
)
393 if (List_size(self
->tokenStream
))
395 self
->errorToken
= IoLexer_currentToken(self
);
399 self
->errorToken
= IoLexer_addTokenString_length_type_(self
, self
->current
, 30, NO_TOKEN
);
402 IoToken_error_(self
->errorToken
, "Syntax error near this location");
409 // getting results --------------------------------
411 IoToken
*IoLexer_top(IoLexer
*self
)
413 return List_at_(self
->tokenStream
, self
->resultIndex
);
416 IoTokenType
IoLexer_topType(IoLexer
*self
)
418 if (!IoLexer_top(self
))
423 return IoLexer_top(self
)->type
;
426 IoToken
*IoLexer_pop(IoLexer
*self
)
428 IoToken
*t
= IoLexer_top(self
);
429 self
->resultIndex
++;
433 // stack management --------------------------------
435 void IoLexer_print(IoLexer
*self
)
437 IoToken
*first
= List_first(self
->tokenStream
);
441 IoToken_print(first
);
447 void IoLexer_printTokens(IoLexer
*self
)
451 for (i
= 0; i
< List_size(self
->tokenStream
); i
++)
453 IoToken
*t
= List_at_(self
->tokenStream
, i
);
455 printf("'%s'", t
->name
);
456 printf(" %s ", IoToken_typeName(t
));
458 if (i
< List_size(self
->tokenStream
) - 1)
467 // grabbing ---------------------------------------------
469 int IoLexer_grabLength(IoLexer
*self
)
471 char *s1
= IoLexer_lastPos(self
);
472 char *s2
= IoLexer_current(self
);
477 void IoLexer_grabTokenType_(IoLexer
*self
, IoTokenType type
)
479 char *s1
= IoLexer_lastPos(self
);
480 char *s2
= IoLexer_current(self
);
481 size_t len
= (s2
- s1
);
485 printf("IoLexer fatal error: empty token\n");
489 IoLexer_addTokenString_length_type_(self
, s1
, len
, type
);
492 IoToken
*IoLexer_addTokenString_length_type_(IoLexer
*self
, const char *s1
, size_t len
, IoTokenType type
)
494 IoToken
*top
= IoLexer_currentToken(self
);
495 IoToken
*t
= IoToken_new();
497 t
->lineNumber
= IoLexer_currentLineNumber(self
);
498 //t->charNumber = (int)(s1 - self->s);
499 t
->charNumber
= (int)(self
->current
- self
->s
);
501 if (t
->charNumber
< 0)
503 printf("bad t->charNumber = %i\n", t
->charNumber
);
506 IoToken_name_length_(t
, s1
, len
);
507 IoToken_type_(t
, type
);
511 IoToken_nextToken_(top
, t
);
514 List_push_(self
->tokenStream
, t
);
515 #ifdef LEXER_DEBUG_TOKENS
516 printf("token '%s' %s\n", t
->name
, IoToken_typeName(t
));
522 // reading ------------------------------------
524 void IoLexer_messageChain(IoLexer
*self
)
528 while ( IoLexer_readTerminator(self
) ||
529 IoLexer_readSeparator(self
) ||
530 IoLexer_readComment(self
))
532 } while ( IoLexer_readMessage(self
));
535 // message -------------------------------
537 static void IoLexer_readMessage_error(IoLexer
*self
, const char *name
)
539 IoLexer_popPosBack(self
);
540 self
->errorToken
= IoLexer_currentToken(self
);
541 IoToken_error_(self
->errorToken
, name
);
544 int IoLexer_readTokenChars_type_(IoLexer
*self
, const char *chars
, IoTokenType type
)
548 if (IoLexer_readTokenChar_type_(self
, *chars
, type
)) return 1;
555 const char *IoLexer_nameForGroupChar_(IoLexer
*self
, char groupChar
)
560 case '[': return "squareBrackets";
561 case '{': return "curlyBrackets";
564 printf("IoLexer: fatal error - invalid group char %c\n", groupChar
);
568 static char *specialChars
= ":._";
570 int IoLexer_readMessage(IoLexer
*self
)
574 IoLexer_pushPos(self
);
575 IoLexer_readPadding(self
);
577 foundSymbol
= IoLexer_readSymbol(self
);
582 while (IoLexer_readSeparator(self
) || IoLexer_readComment(self
))
585 groupChar
= *IoLexer_current(self
);
587 if (groupChar
&& (strchr("[{", groupChar
) || (!foundSymbol
&& groupChar
== '(')))
589 char *groupName
= (char *)IoLexer_nameForGroupChar_(self
, groupChar
);
590 IoLexer_addTokenString_length_type_(self
, groupName
, strlen(groupName
), IDENTIFIER_TOKEN
);
593 if (IoLexer_readTokenChars_type_(self
, "([{", OPENPAREN_TOKEN
))
595 IoLexer_readPadding(self
);
597 IoTokenType type
= IoLexer_currentToken(self
)->type
;
599 IoLexer_readPadding(self
);
600 // Empty argument: (... ,)
601 if (COMMA_TOKEN
== type
)
603 char c
= *IoLexer_current(self
);
605 if (',' == c
|| strchr(")]}", c
))
607 IoLexer_readMessage_error(self
, "missing argument in argument list");
612 if (groupChar
== '[') specialChars
= "._";
613 IoLexer_messageChain(self
);
614 if (groupChar
== '[') specialChars
= ":._";
615 IoLexer_readPadding(self
);
617 } while (IoLexer_readTokenChar_type_(self
, ',', COMMA_TOKEN
));
619 if (!IoLexer_readTokenChars_type_(self
, ")]}", CLOSEPAREN_TOKEN
))
622 char c = *IoLexer_current(self);
624 if (strchr("([{", c))
626 IoLexer_readMessage_error(self, "expected a message but instead found a open group character");
630 IoLexer_readMessage_error(self, "missing closing group character for argument list");
633 if (groupChar
== '(')
635 IoLexer_readMessage_error(self
, "unmatched ()s");
637 else if (groupChar
== '[')
639 IoLexer_readMessage_error(self
, "unmatched []s");
641 else if (groupChar
== '{')
643 IoLexer_readMessage_error(self
, "unmatched {}s");
645 //printf("Token %p error: %s - %s\n", t, t->error, IoToken_error(t));
649 IoLexer_popPos(self
);
655 IoLexer_popPos(self
);
659 IoLexer_popPosBack(self
);
663 int IoLexer_readPadding(IoLexer
*self
)
667 while (IoLexer_readWhitespace(self
) || IoLexer_readComment(self
))
675 // symbols ------------------------------------------
677 int IoLexer_readSymbol(IoLexer
*self
)
679 if ( IoLexer_readNumber(self
) ||
680 IoLexer_readOperator(self
) ||
681 IoLexer_readIdentifier(self
) ||
682 IoLexer_readQuote(self
)) return 1;
686 int IoLexer_readIdentifier(IoLexer
*self
)
688 IoLexer_pushPos(self
);
690 while ( IoLexer_readLetter(self
) ||
691 IoLexer_readDigit(self
) ||
692 IoLexer_readSpecialChar(self
))
695 if (IoLexer_grabLength(self
))
697 // avoid grabing : on last character if followed by =
699 char *current
= IoLexer_current(self
);
701 if (*(current
- 1) == ':' && *current
== '=')
703 IoLexer_prevChar(self
);
707 IoLexer_grabTokenType_(self
, IDENTIFIER_TOKEN
);
708 IoLexer_popPos(self
);
712 IoLexer_popPosBack(self
);
717 int IoLexer_readOperator(IoLexer
*self
)
720 IoLexer_pushPos(self
);
721 // ok if first character is a colon
722 c
= IoLexer_nextChar(self
);
723 //printf("IoLexer_nextChar(self) = %c %i\n", c, c);
727 IoLexer_popPosBack(self
);
732 IoLexer_prevChar(self
);
737 IoLexer_prevChar(self);
741 while (IoLexer_readOpChar(self
))
744 if (IoLexer_grabLength(self
))
746 IoLexer_grabTokenType_(self
, IDENTIFIER_TOKEN
);
747 IoLexer_popPos(self
);
751 IoLexer_popPosBack(self
);
755 // comments ------------------------------------------
757 int IoLexer_readComment(IoLexer
*self
)
759 return (IoLexer_readSlashStarComment(self
) ||
760 IoLexer_readSlashSlashComment(self
) ||
761 IoLexer_readPoundComment(self
));
764 int IoLexer_readSlashStarComment(IoLexer
*self
)
766 IoLexer_pushPos(self
);
768 if (IoLexer_readString_(self
, "/*"))
770 unsigned int nesting
= 1;
774 if (IoLexer_readString_(self
, "/*"))
776 IoLexer_nextChar(self
);
779 else if (IoLexer_readString_(self
, "*/"))
781 // otherwise we end up trimming the last char
782 if (nesting
> 1) IoLexer_nextChar(self
);
786 IoLexer_nextChar(self
);
788 IoLexer_popPos(self
);
792 IoLexer_popPosBack(self
);
796 int IoLexer_readSlashSlashComment(IoLexer
*self
)
798 IoLexer_pushPos(self
);
800 if (IoLexer_nextChar(self
) == '/')
802 if (IoLexer_nextChar(self
) == '/')
804 while (IoLexer_readNonReturn(self
)) { }
805 //IoLexer_grabTokenType_(self, COMMENT_TOKEN);
806 IoLexer_popPos(self
);
811 IoLexer_popPosBack(self
);
815 int IoLexer_readPoundComment(IoLexer
*self
)
817 IoLexer_pushPos(self
);
819 if (IoLexer_nextChar(self
) == '#')
821 while (IoLexer_readNonReturn(self
))
824 //IoLexer_grabTokenType_(self, COMMENT_TOKEN);
825 IoLexer_popPos(self
);
829 IoLexer_popPosBack(self
);
833 // quotes -----------------------------------------
835 int IoLexer_readQuote(IoLexer
*self
)
837 return (IoLexer_readTriQuote(self
) || IoLexer_readMonoQuote(self
));
840 int IoLexer_readMonoQuote(IoLexer
*self
)
842 int mbskip
= 0; // multi-byte character length
844 IoLexer_pushPos(self
);
846 if (IoLexer_nextChar(self
) == '"')
850 uchar_t c
= IoLexer_nextChar(self
);
852 if (mbskip
<= 0 && ismbchar(c
))
854 mbskip
= mbcharlen(c
);
869 IoLexer_nextChar(self
);
875 self
->errorToken
= IoLexer_currentToken(self
);
877 if (self
->errorToken
)
879 IoToken_error_(self
->errorToken
, "unterminated quote");
882 IoLexer_popPosBack(self
);
887 IoLexer_grabTokenType_(self
, MONOQUOTE_TOKEN
);
888 IoLexer_popPos(self
);
892 IoLexer_popPosBack(self
);
896 int IoLexer_readTriQuote(IoLexer
*self
)
898 IoLexer_pushPos(self
);
900 if (IoLexer_readString_(self
, "\"\"\""))
902 while (!IoLexer_readString_(self
, "\"\"\""))
904 uchar_t c
= IoLexer_nextChar(self
);
908 IoLexer_popPosBack(self
);
913 IoLexer_grabTokenType_(self
, TRIQUOTE_TOKEN
);
914 IoLexer_popPos(self
);
918 IoLexer_popPosBack(self
);
922 // helpers ----------------------------
924 int IoLexer_readTokenChar_type_(IoLexer
*self
, char c
, IoTokenType type
)
926 IoLexer_pushPos(self
);
928 if (IoLexer_readChar_(self
, c
))
930 IoLexer_grabTokenType_(self
, type
);
931 IoLexer_popPos(self
);
935 IoLexer_popPosBack(self
);
939 int IoLexer_readTokenString_(IoLexer
*self
, const char *s
)
941 IoLexer_pushPos(self
);
943 if (IoLexer_readString_(self
, s
))
945 IoLexer_grabTokenType_(self
, IDENTIFIER_TOKEN
);
946 IoLexer_popPos(self
);
950 IoLexer_popPosBack(self
);
955 int IoLexer_readString_(IoLexer
*self
, const char *s
)
959 if (IoLexer_onNULL(self
))
964 if (strncmp(self
->current
, s
, len
) == 0)
966 self
->current
+= len
;
973 TEST_INLINE
int IoLexer_readCharIn_(IoLexer
*self
, const char *s
)
975 if (!IoLexer_onNULL(self
))
977 uchar_t c
= IoLexer_nextChar(self
);
979 if (c
< 0x80 && strchr(s
, c
))
984 IoLexer_prevChar(self
);
989 TEST_INLINE
int IoLexer_readCharInRange_(IoLexer
*self
, uchar_t first
, uchar_t last
)
991 if (!IoLexer_onNULL(self
))
993 uchar_t c
= IoLexer_nextChar(self
);
995 if (c
>= first
&& c
<= last
)
1000 IoLexer_prevChar(self
);
1005 int IoLexer_readChar_(IoLexer
*self
, char c
)
1007 if (!IoLexer_onNULL(self
))
1009 uchar_t nc
= IoLexer_nextChar(self
);
1016 IoLexer_prevChar(self
);
1021 int IoLexer_readCharAnyCase_(IoLexer
*self
, char c
)
1023 if (!IoLexer_onNULL(self
))
1025 uchar_t nc
= IoLexer_nextChar(self
);
1027 if (nc
&& tolower(nc
) == tolower(c
))
1032 IoLexer_prevChar(self
);
1037 int IoLexer_readNonASCIIChar_(IoLexer
*self
)
1039 if (!IoLexer_onNULL(self
))
1041 uchar_t nc
= IoLexer_nextChar(self
);
1046 IoLexer_prevChar(self
);
1051 int IoLexer_readNonReturn(IoLexer
*self
)
1053 if (IoLexer_onNULL(self
)) return 0;
1054 if (IoLexer_nextChar(self
) != '\n') return 1;
1055 IoLexer_prevChar(self
);
1059 int IoLexer_readNonQuote(IoLexer
*self
)
1061 if (IoLexer_onNULL(self
)) return 0;
1062 if (IoLexer_nextChar(self
) != '"') return 1;
1063 IoLexer_prevChar(self
);
1067 // character definitions ----------------------------
1069 int IoLexer_readCharacters(IoLexer
*self
)
1073 while (IoLexer_readCharacter(self
))
1081 int IoLexer_readCharacter(IoLexer
*self
)
1084 IoLexer_readLetter(self
) ||
1085 IoLexer_readDigit(self
) ||
1086 IoLexer_readSpecialChar(self
) ||
1087 IoLexer_readOpChar(self
)
1091 int IoLexer_readOpChar(IoLexer
*self
)
1093 return IoLexer_readCharIn_(self
, ":'~!@$%^&*-+=|\\<>?/");
1096 int IoLexer_readSpecialChar(IoLexer
*self
)
1098 return IoLexer_readCharIn_(self
, specialChars
);
1101 int IoLexer_readDigit(IoLexer
*self
)
1103 return IoLexer_readCharInRange_(self
, '0', '9');
1106 int IoLexer_readLetter(IoLexer
*self
)
1108 return IoLexer_readCharInRange_(self
, 'A', 'Z') ||
1109 IoLexer_readCharInRange_(self
, 'a', 'z') ||
1110 IoLexer_readNonASCIIChar_(self
);
1113 // terminator -------------------------------
1115 int IoLexer_readTerminator(IoLexer
*self
)
1118 IoLexer_pushPos(self
);
1119 IoLexer_readSeparator(self
);
1121 while (IoLexer_readTerminatorChar(self
))
1124 IoLexer_readSeparator(self
);
1129 IoToken
*top
= IoLexer_currentToken(self
);
1131 // avoid double terminators
1132 if (top
&& IoToken_type(top
) == TERMINATOR_TOKEN
)
1137 IoLexer_addTokenString_length_type_(self
, ";", 1, TERMINATOR_TOKEN
);
1138 IoLexer_popPos(self
);
1142 IoLexer_popPosBack(self
);
1146 int IoLexer_readTerminatorChar(IoLexer
*self
)
1148 return IoLexer_readCharIn_(self
, ";\n");
1151 // separator --------------------------------
1153 int IoLexer_readSeparator(IoLexer
*self
)
1155 IoLexer_pushPos(self
);
1157 while (IoLexer_readSeparatorChar(self
))
1161 if (IoLexer_grabLength(self
))
1163 //IoLexer_grabTokenType_(self, SEPERATOR_TOKEN);
1164 IoLexer_popPos(self
);
1168 IoLexer_popPosBack(self
);
1172 int IoLexer_readSeparatorChar(IoLexer
*self
)
1174 if (IoLexer_readCharIn_(self
, " \f\r\t\v"))
1180 IoLexer_pushPos(self
);
1181 if (IoLexer_readCharIn_(self
, "\\"))
1183 while (IoLexer_readCharIn_(self
, " \f\r\t\v"))
1187 if (IoLexer_readCharIn_(self
, "\n"))
1189 IoLexer_popPos(self
);
1193 IoLexer_popPosBack(self
);
1198 // whitespace -----------------------------------
1200 int IoLexer_readWhitespace(IoLexer
*self
)
1202 IoLexer_pushPos(self
);
1204 while (IoLexer_readWhitespaceChar(self
))
1208 if (IoLexer_grabLength(self
))
1210 //IoLexer_grabTokenType_(self, WHITESPACE_TOKEN);
1211 IoLexer_popPos(self
);
1215 IoLexer_popPosBack(self
);
1219 int IoLexer_readWhitespaceChar(IoLexer
*self
)
1221 return IoLexer_readCharIn_(self
, " \f\r\t\v\n");
1224 int IoLexer_readDigits(IoLexer
*self
)
1228 IoLexer_pushPos(self
);
1230 while (IoLexer_readDigit(self
))
1237 IoLexer_popPosBack(self
);
1241 IoLexer_popPos(self
);
1245 int IoLexer_readNumber(IoLexer
*self
)
1247 return (IoLexer_readHexNumber(self
) || IoLexer_readDecimal(self
));
1250 int IoLexer_readExponent(IoLexer
*self
)
1252 if (IoLexer_readCharAnyCase_(self
, 'e'))
1254 IoLexer_readChar_(self
, '-') || IoLexer_readChar_(self
, '+');
1256 if (!IoLexer_readDigits(self
))
1266 int IoLexer_readDecimalPlaces(IoLexer
*self
)
1268 if (IoLexer_readChar_(self
, '.'))
1270 if (!IoLexer_readDigits(self
))
1280 int IoLexer_readDecimal(IoLexer
*self
)
1282 IoLexer_pushPos(self
);
1284 if (IoLexer_readDigits(self
))
1286 if (IoLexer_readDecimalPlaces(self
) == -1)
1293 if (IoLexer_readDecimalPlaces(self
) != 1)
1299 if (IoLexer_readExponent(self
) == -1)
1304 if (IoLexer_grabLength(self
))
1306 IoLexer_grabTokenType_(self
, NUMBER_TOKEN
);
1307 IoLexer_popPos(self
);
1311 IoLexer_popPosBack(self
);
1315 int IoLexer_readHexNumber(IoLexer
*self
)
1319 IoLexer_pushPos(self
);
1321 if (IoLexer_readChar_(self
, '0') && IoLexer_readCharAnyCase_(self
, 'x'))
1323 while (IoLexer_readDigits(self
) || IoLexer_readCharacters(self
))
1329 if (read
&& IoLexer_grabLength(self
))
1331 IoLexer_grabTokenType_(self
, HEXNUMBER_TOKEN
);
1332 IoLexer_popPos(self
);
1336 IoLexer_popPosBack(self
);