+ exponents are now valid: 1.212121e+18
[io/quag.git] / libs / iovm / source / IoLexer.c
blob9168ec77159d77743b181d822d7a42dd36fff255
1 /*
2 Copyright (c) 2003, Steve Dekorte
3 All rights reserved. See _BSDLicense.txt.
5 Aug 2004 - removed {} from op chars
6 - changed identifier to stop after 1 colon
7 */
9 #include "IoLexer.h"
10 #include <stdlib.h>
11 #include <string.h>
12 #include <ctype.h>
13 #include <stddef.h>
15 //#define LEXER_DEBUG
16 //#define LEXER_DEBUG_TOKENS
17 #define TEST_INLINE inline
19 static IoToken *IoLexer_currentToken(IoLexer *self)
21 return List_top(self->tokenStream);
24 IoLexer *IoLexer_new(void)
26 IoLexer *self = (IoLexer *)io_calloc(1, sizeof(IoLexer));
27 self->s = (char *)io_calloc(1, 1);
28 self->s[0] = 0;
29 self->posStack = Stack_new();
30 self->tokenStack = Stack_new();
31 self->tokenStream = List_new();
32 self->charLineIndex = List_new();
33 return self;
36 void IoLexer_free(IoLexer *self)
38 IoLexer_clear(self);
39 io_free(self->s);
40 Stack_free(self->posStack);
41 Stack_free(self->tokenStack);
42 List_free(self->tokenStream);
43 List_free(self->charLineIndex);
44 if(self->errorDescription) io_free(self->errorDescription);
45 io_free(self);
48 char *IoLexer_errorDescription(IoLexer *self)
50 IoToken *et = IoLexer_errorToken(self);
52 if (!self->errorDescription)
54 self->errorDescription = io_calloc(1, 1024);
55 self->errorDescription[0] = 0;
58 if (et)
60 sprintf(self->errorDescription,
61 "\"%s\" on line %i character %i",
62 et->error,
63 IoToken_lineNumber(et),
64 IoToken_charNumber(et));
67 return self->errorDescription;
71 void IoLexer_buildLineIndex(IoLexer *self)
73 char *s = self->s;
75 List_removeAll(self->charLineIndex);
77 List_append_(self->charLineIndex, s);
79 while (*s)
81 if (*s == '\n')
83 List_append_(self->charLineIndex, s);
85 s ++;
88 List_append_(self->charLineIndex, s);
89 self->lineHint = 0;
92 // next/prev character ------------------------
94 #define UTF8_SEQLEN(c) ( \
95 (c) < 0x80 ? 1 : \
96 (c) < 0xe0 ? 2 : \
97 (c) < 0xf0 ? 3 : \
98 (c) < 0xf8 ? 4 : \
99 (c) < 0xfc ? 5 : \
100 (c) < 0xfe ? 6 : 1 \
102 #define INVALID_CHAR 0xfffe
104 static uchar_t _IoLexer_DecodeUTF8(const unsigned char *s)
106 if (*s < 0x80)
107 return *s;
108 else if (*s < 0xc2)
109 return INVALID_CHAR;
110 else if (*s < 0xe0)
112 if (!((s[1] ^ 0x80) < 0x40))
113 return INVALID_CHAR;
114 return ((uchar_t)(s[0] & 0x1f) << 6) | (uchar_t)(s[1] ^ 0x80);
116 else if (*s < 0xf0)
118 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (s[0] >= 0xe1 || s[1] >= 0xa0)))
119 return INVALID_CHAR;
120 return ((uchar_t)(s[0] & 0x0f) << 12) | ((uchar_t)(s[1] ^ 0x80) << 6) | (uchar_t)(s[2] ^ 0x80);
122 else if (*s < 0xf8)
124 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (s[3] ^ 0x80) < 0x40 && (s[0] >= 0xf1 || s[1] >= 0x90)))
125 return INVALID_CHAR;
126 return ((uchar_t)(s[0] & 0x07) << 18) | ((uchar_t)(s[1] ^ 0x80) << 12) | ((uchar_t)(s[2] ^ 0x80) << 6) | (uchar_t)(s[3] ^ 0x80);
128 else if (*s < 0xfc)
130 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 && (s[0] >= 0xf9 || s[1] >= 0x88)))
131 return INVALID_CHAR;
132 return ((uchar_t)(s[0] & 0x03) << 24) | ((uchar_t)(s[1] ^ 0x80) << 18) | ((uchar_t)(s[2] ^ 0x80) << 12) | ((uchar_t)(s[3] ^ 0x80) << 6) | (uchar_t)(s[4] ^ 0x80);
134 else if (*s < 0xfe)
136 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 && (s[5] ^ 0x80) < 0x40 && (s[0] >= 0xfd || s[1] >= 0x84)))
137 return INVALID_CHAR;
138 return ((uchar_t)(s[0] & 0x01) << 30) | ((uchar_t)(s[1] ^ 0x80) << 24) | ((uchar_t)(s[2] ^ 0x80) << 18) | ((uchar_t)(s[3] ^ 0x80) << 12) | ((uchar_t)(s[4] ^ 0x80) << 6) | (uchar_t)(s[5] ^ 0x80);
140 else
141 return INVALID_CHAR;
144 TEST_INLINE uchar_t IoLexer_nextChar(IoLexer *self)
146 unsigned char c = (unsigned char) * (self->current);
147 int seqlen, i;
148 uchar_t uch;
150 if (c == 0)
152 return 0;
154 else if (c < 0x80)
156 self->current++;
157 return c;
160 seqlen = UTF8_SEQLEN(c);
162 for (i = 0; i < seqlen; i++)
164 if (self->current[i] == 0)
166 // XXX: invalid or incomplete sequence
167 return 0;
171 uch = _IoLexer_DecodeUTF8((unsigned char*)self->current);
173 if (uch == INVALID_CHAR)
175 return 0;
178 self->current += seqlen;
179 return uch;
182 TEST_INLINE uchar_t IoLexer_prevChar(IoLexer *self)
184 uchar_t uch;
185 int len;
187 for (len = 1; len <= 6 && self->current - len > self->s; len++)
189 unsigned char c = *(unsigned char *)(self->current - len);
190 if (c < 0x80 || c >= 0xc2)
191 break;
194 self->current -= len;
195 uch = _IoLexer_DecodeUTF8((unsigned char*)self->current);
196 if (uch == INVALID_CHAR)
197 return 0;
199 return uch;
202 TEST_INLINE char *IoLexer_current(IoLexer *self)
204 return self->current;
207 TEST_INLINE int IoLexer_onNULL(IoLexer *self)
209 return (*(self->current) == 0);
212 // ------------------------------------------
214 size_t IoLexer_currentLineNumberOld(IoLexer *self)
216 size_t lineNumber = 1;
217 char *s = self->s;
219 while (s < self->current)
221 if (*s == '\n')
223 lineNumber ++;
226 s ++;
229 return lineNumber;
232 TEST_INLINE size_t IoLexer_currentLineNumber(IoLexer *self)
234 // this should be even faster than a binary search
235 // since almost all results are very close to the last
237 List *index = self->charLineIndex;
238 size_t line = self->lineHint;
239 size_t numLines = List_size(index);
240 void *current = (void *)self->current;
242 if (current < List_at_(index, line))
244 // walk down lines until char is bigger than one
245 while (line > 0 && !(current > List_at_(index, line)))
247 line --;
249 line ++;
251 else
253 // walk up lines until char is less than or equal to one
254 while (line < numLines && !(current <= List_at_(index, line)))
256 line ++;
261 self->lineHint = line;
265 size_t realLine = IoLexer_currentLineNumberOld(self);
267 if (line != realLine)
269 printf("mismatch on currentLine %i != %i\n", (int)line, (int)realLine);
273 return line;
276 void IoLexer_clear(IoLexer *self)
278 LIST_FOREACH(self->tokenStream, i, t, IoToken_free((IoToken *)t) );
279 List_removeAll(self->tokenStream);
281 Stack_clear(self->posStack);
282 Stack_clear(self->tokenStack);
284 self->current = self->s;
285 self->resultIndex = 0;
286 self->maxChar = 0;
287 self->errorToken = NULL;
290 IoToken *IoLexer_errorToken(IoLexer *self)
292 return self->errorToken;
295 // lexing -------------------------------------
297 void IoLexer_string_(IoLexer *self, const char *string)
299 self->s = strcpy((char *)io_realloc(self->s, strlen(string) + 1), string);
300 self->current = self->s;
301 IoLexer_buildLineIndex(self);
304 void IoLexer_printLast_(IoLexer *self, int max)
306 char *s = self->s + self->maxChar;
307 int i;
309 for (i = 0; i < max && s[i]; i ++)
311 putchar(s[i]);
315 // --- token and character position stacks ---
317 char *IoLexer_lastPos(IoLexer *self)
319 return Stack_top(self->posStack);
322 TEST_INLINE void IoLexer_pushPos(IoLexer *self)
324 intptr_t index = self->current - self->s;
326 if (index > (intptr_t)self->maxChar)
328 self->maxChar = index;
331 Stack_push_(self->tokenStack, (void *)(intptr_t)(List_size(self->tokenStream) - 1));
332 Stack_push_(self->posStack, self->current);
334 #ifdef LEXER_DEBUG
335 printf("push: ");
336 IoLexer_print(self);
337 #endif
340 TEST_INLINE void IoLexer_popPos(IoLexer *self)
342 Stack_pop(self->tokenStack);
343 Stack_pop(self->posStack);
344 #ifdef LEXER_DEBUG
345 printf("pop: ");
346 IoLexer_print(self);
347 #endif
350 TEST_INLINE void IoLexer_popPosBack(IoLexer *self)
352 intptr_t i = (intptr_t)Stack_pop(self->tokenStack);
353 intptr_t topIndex = (intptr_t)Stack_top(self->tokenStack);
355 if (i > -1)
357 List_setSize_(self->tokenStream, i + 1);
359 if (i != topIndex) // ok to io_free token
361 IoToken *parent = IoLexer_currentToken(self);
363 if (parent)
365 IoToken_nextToken_(parent, NULL);
370 self->current = Stack_pop(self->posStack);
371 #ifdef LEXER_DEBUG
372 printf("back: "); IoLexer_print(self);
373 #endif
376 // ------------------------------------------
378 int IoLexer_lex(IoLexer *self)
380 IoLexer_clear(self);
381 IoLexer_pushPos(self);
383 IoLexer_messageChain(self);
385 if (*(self->current))
387 //printf("Lexing error after: ");
388 //IoLexer_printLast_(self, 30);
389 //printf("\n");
391 if (!self->errorToken)
393 if (List_size(self->tokenStream))
395 self->errorToken = IoLexer_currentToken(self);
397 else
399 self->errorToken = IoLexer_addTokenString_length_type_(self, self->current, 30, NO_TOKEN);
402 IoToken_error_(self->errorToken, "Syntax error near this location");
404 return -1;
406 return 0;
409 // getting results --------------------------------
411 IoToken *IoLexer_top(IoLexer *self)
413 return List_at_(self->tokenStream, self->resultIndex);
416 IoTokenType IoLexer_topType(IoLexer *self)
418 if (!IoLexer_top(self))
420 return 0;
423 return IoLexer_top(self)->type;
426 IoToken *IoLexer_pop(IoLexer *self)
428 IoToken *t = IoLexer_top(self);
429 self->resultIndex ++;
430 return t;
433 // stack management --------------------------------
435 void IoLexer_print(IoLexer *self)
437 IoToken *first = List_first(self->tokenStream);
439 if (first)
441 IoToken_print(first);
444 printf("\n");
447 void IoLexer_printTokens(IoLexer *self)
449 int i;
451 for (i = 0; i < List_size(self->tokenStream); i ++)
453 IoToken *t = List_at_(self->tokenStream, i);
455 printf("'%s'", t->name);
456 printf(" %s ", IoToken_typeName(t));
458 if (i < List_size(self->tokenStream) - 1)
460 printf(", ");
464 printf("\n");
467 // grabbing ---------------------------------------------
469 int IoLexer_grabLength(IoLexer *self)
471 char *s1 = IoLexer_lastPos(self);
472 char *s2 = IoLexer_current(self);
474 return s2 - s1;
477 void IoLexer_grabTokenType_(IoLexer *self, IoTokenType type)
479 char *s1 = IoLexer_lastPos(self);
480 char *s2 = IoLexer_current(self);
481 size_t len = (s2 - s1);
483 if (!len)
485 printf("IoLexer fatal error: empty token\n");
486 exit(1);
489 IoLexer_addTokenString_length_type_(self, s1, len, type);
492 IoToken *IoLexer_addTokenString_length_type_(IoLexer *self, const char *s1, size_t len, IoTokenType type)
494 IoToken *top = IoLexer_currentToken(self);
495 IoToken *t = IoToken_new();
497 t->lineNumber = IoLexer_currentLineNumber(self);
498 //t->charNumber = (int)(s1 - self->s);
499 t->charNumber = (int)(self->current - self->s);
501 if (t->charNumber < 0)
503 printf("bad t->charNumber = %i\n", t->charNumber);
506 IoToken_name_length_(t, s1, len);
507 IoToken_type_(t, type);
509 if (top)
511 IoToken_nextToken_(top, t);
514 List_push_(self->tokenStream, t);
515 #ifdef LEXER_DEBUG_TOKENS
516 printf("token '%s' %s\n", t->name, IoToken_typeName(t));
517 #endif
519 return t;
522 // reading ------------------------------------
524 void IoLexer_messageChain(IoLexer *self)
528 while ( IoLexer_readTerminator(self) ||
529 IoLexer_readSeparator(self) ||
530 IoLexer_readComment(self))
532 } while ( IoLexer_readMessage(self));
535 // message -------------------------------
537 static void IoLexer_readMessage_error(IoLexer *self, const char *name)
539 IoLexer_popPosBack(self);
540 self->errorToken = IoLexer_currentToken(self);
541 IoToken_error_(self->errorToken, name);
544 int IoLexer_readTokenChars_type_(IoLexer *self, const char *chars, IoTokenType type)
546 while (*chars)
548 if (IoLexer_readTokenChar_type_(self, *chars, type)) return 1;
549 chars ++;
552 return 0;
555 const char *IoLexer_nameForGroupChar_(IoLexer *self, char groupChar)
557 switch (groupChar)
559 case '(': return "";
560 case '[': return "squareBrackets";
561 case '{': return "curlyBrackets";
564 printf("IoLexer: fatal error - invalid group char %c\n", groupChar);
565 exit(1);
568 static char *specialChars = ":._";
570 int IoLexer_readMessage(IoLexer *self)
572 char foundSymbol;
574 IoLexer_pushPos(self);
575 IoLexer_readPadding(self);
577 foundSymbol = IoLexer_readSymbol(self);
581 char groupChar;
582 while (IoLexer_readSeparator(self) || IoLexer_readComment(self))
585 groupChar = *IoLexer_current(self);
587 if (groupChar && (strchr("[{", groupChar) || (!foundSymbol && groupChar == '(')))
589 char *groupName = (char *)IoLexer_nameForGroupChar_(self, groupChar);
590 IoLexer_addTokenString_length_type_(self, groupName, strlen(groupName), IDENTIFIER_TOKEN);
593 if (IoLexer_readTokenChars_type_(self, "([{", OPENPAREN_TOKEN))
595 IoLexer_readPadding(self);
596 do {
597 IoTokenType type = IoLexer_currentToken(self)->type;
599 IoLexer_readPadding(self);
600 // Empty argument: (... ,)
601 if (COMMA_TOKEN == type)
603 char c = *IoLexer_current(self);
605 if (',' == c || strchr(")]}", c))
607 IoLexer_readMessage_error(self, "missing argument in argument list");
608 return 0;
612 if (groupChar == '[') specialChars = "._";
613 IoLexer_messageChain(self);
614 if (groupChar == '[') specialChars = ":._";
615 IoLexer_readPadding(self);
617 } while (IoLexer_readTokenChar_type_(self, ',', COMMA_TOKEN));
619 if (!IoLexer_readTokenChars_type_(self, ")]}", CLOSEPAREN_TOKEN))
622 char c = *IoLexer_current(self);
624 if (strchr("([{", c))
626 IoLexer_readMessage_error(self, "expected a message but instead found a open group character");
628 else
630 IoLexer_readMessage_error(self, "missing closing group character for argument list");
633 if (groupChar == '(')
635 IoLexer_readMessage_error(self, "unmatched ()s");
637 else if (groupChar == '[')
639 IoLexer_readMessage_error(self, "unmatched []s");
641 else if (groupChar == '{')
643 IoLexer_readMessage_error(self, "unmatched {}s");
645 //printf("Token %p error: %s - %s\n", t, t->error, IoToken_error(t));
646 return 0;
649 IoLexer_popPos(self);
650 return 1;
653 if (foundSymbol)
655 IoLexer_popPos(self);
656 return 1;
659 IoLexer_popPosBack(self);
660 return 0;
663 int IoLexer_readPadding(IoLexer *self)
665 int r = 0;
667 while (IoLexer_readWhitespace(self) || IoLexer_readComment(self))
669 r = 1;
672 return r;
675 // symbols ------------------------------------------
677 int IoLexer_readSymbol(IoLexer *self)
679 if ( IoLexer_readNumber(self) ||
680 IoLexer_readOperator(self) ||
681 IoLexer_readIdentifier(self) ||
682 IoLexer_readQuote(self)) return 1;
683 return 0;
686 int IoLexer_readIdentifier(IoLexer *self)
688 IoLexer_pushPos(self);
690 while ( IoLexer_readLetter(self) ||
691 IoLexer_readDigit(self) ||
692 IoLexer_readSpecialChar(self))
695 if (IoLexer_grabLength(self))
697 // avoid grabing : on last character if followed by =
699 char *current = IoLexer_current(self);
701 if (*(current - 1) == ':' && *current == '=')
703 IoLexer_prevChar(self);
707 IoLexer_grabTokenType_(self, IDENTIFIER_TOKEN);
708 IoLexer_popPos(self);
709 return 1;
712 IoLexer_popPosBack(self);
714 return 0;
717 int IoLexer_readOperator(IoLexer *self)
719 uchar_t c;
720 IoLexer_pushPos(self);
721 // ok if first character is a colon
722 c = IoLexer_nextChar(self);
723 //printf("IoLexer_nextChar(self) = %c %i\n", c, c);
725 if (c == 0)
727 IoLexer_popPosBack(self);
728 return 0;
730 else
732 IoLexer_prevChar(self);
735 if (c != ':')
737 IoLexer_prevChar(self);
741 while (IoLexer_readOpChar(self))
744 if (IoLexer_grabLength(self))
746 IoLexer_grabTokenType_(self, IDENTIFIER_TOKEN);
747 IoLexer_popPos(self);
748 return 1;
751 IoLexer_popPosBack(self);
752 return 0;
755 // comments ------------------------------------------
757 int IoLexer_readComment(IoLexer *self)
759 return (IoLexer_readSlashStarComment(self) ||
760 IoLexer_readSlashSlashComment(self) ||
761 IoLexer_readPoundComment(self));
764 int IoLexer_readSlashStarComment(IoLexer *self)
766 IoLexer_pushPos(self);
768 if (IoLexer_readString_(self, "/*"))
770 unsigned int nesting = 1;
772 while (nesting > 0)
774 if (IoLexer_readString_(self, "/*"))
776 IoLexer_nextChar(self);
777 nesting++;
779 else if (IoLexer_readString_(self, "*/"))
781 // otherwise we end up trimming the last char
782 if (nesting > 1) IoLexer_nextChar(self);
783 nesting--;
785 else
786 IoLexer_nextChar(self);
788 IoLexer_popPos(self);
789 return 1;
792 IoLexer_popPosBack(self);
793 return 0;
796 int IoLexer_readSlashSlashComment(IoLexer *self)
798 IoLexer_pushPos(self);
800 if (IoLexer_nextChar(self) == '/')
802 if (IoLexer_nextChar(self) == '/')
804 while (IoLexer_readNonReturn(self)) { }
805 //IoLexer_grabTokenType_(self, COMMENT_TOKEN);
806 IoLexer_popPos(self);
807 return 1;
811 IoLexer_popPosBack(self);
812 return 0;
815 int IoLexer_readPoundComment(IoLexer *self)
817 IoLexer_pushPos(self);
819 if (IoLexer_nextChar(self) == '#')
821 while (IoLexer_readNonReturn(self))
824 //IoLexer_grabTokenType_(self, COMMENT_TOKEN);
825 IoLexer_popPos(self);
826 return 1;
829 IoLexer_popPosBack(self);
830 return 0;
833 // quotes -----------------------------------------
835 int IoLexer_readQuote(IoLexer *self)
837 return (IoLexer_readTriQuote(self) || IoLexer_readMonoQuote(self));
840 int IoLexer_readMonoQuote(IoLexer *self)
842 int mbskip = 0; // multi-byte character length
844 IoLexer_pushPos(self);
846 if (IoLexer_nextChar(self) == '"')
848 for (;;)
850 uchar_t c = IoLexer_nextChar(self);
852 if (mbskip <= 0 && ismbchar(c))
854 mbskip = mbcharlen(c);
857 if (mbskip-- > 0)
859 continue;
862 if (c == '"')
864 break;
867 if (c == '\\')
869 IoLexer_nextChar(self);
870 continue;
873 if (c == 0)
875 self->errorToken = IoLexer_currentToken(self);
877 if (self->errorToken)
879 IoToken_error_(self->errorToken, "unterminated quote");
882 IoLexer_popPosBack(self);
883 return 0;
887 IoLexer_grabTokenType_(self, MONOQUOTE_TOKEN);
888 IoLexer_popPos(self);
889 return 1;
892 IoLexer_popPosBack(self);
893 return 0;
896 int IoLexer_readTriQuote(IoLexer *self)
898 IoLexer_pushPos(self);
900 if (IoLexer_readString_(self, "\"\"\""))
902 while (!IoLexer_readString_(self, "\"\"\""))
904 uchar_t c = IoLexer_nextChar(self);
906 if (c == 0)
908 IoLexer_popPosBack(self);
909 return 0;
913 IoLexer_grabTokenType_(self, TRIQUOTE_TOKEN);
914 IoLexer_popPos(self);
915 return 1;
918 IoLexer_popPosBack(self);
919 return 0;
922 // helpers ----------------------------
924 int IoLexer_readTokenChar_type_(IoLexer *self, char c, IoTokenType type)
926 IoLexer_pushPos(self);
928 if (IoLexer_readChar_(self, c))
930 IoLexer_grabTokenType_(self, type);
931 IoLexer_popPos(self);
932 return 1;
935 IoLexer_popPosBack(self);
936 return 0;
939 int IoLexer_readTokenString_(IoLexer *self, const char *s)
941 IoLexer_pushPos(self);
943 if (IoLexer_readString_(self, s))
945 IoLexer_grabTokenType_(self, IDENTIFIER_TOKEN);
946 IoLexer_popPos(self);
947 return 1;
950 IoLexer_popPosBack(self);
951 return 0;
955 int IoLexer_readString_(IoLexer *self, const char *s)
957 int len = strlen(s);
959 if (IoLexer_onNULL(self))
961 return 0;
964 if (strncmp(self->current, s, len) == 0)
966 self->current += len;
967 return 1;
970 return 0;
973 TEST_INLINE int IoLexer_readCharIn_(IoLexer *self, const char *s)
975 if (!IoLexer_onNULL(self))
977 uchar_t c = IoLexer_nextChar(self);
979 if (c < 0x80 && strchr(s, c))
981 return 1;
984 IoLexer_prevChar(self);
986 return 0;
989 TEST_INLINE int IoLexer_readCharInRange_(IoLexer *self, uchar_t first, uchar_t last)
991 if (!IoLexer_onNULL(self))
993 uchar_t c = IoLexer_nextChar(self);
995 if (c >= first && c <= last)
997 return 1;
1000 IoLexer_prevChar(self);
1002 return 0;
1005 int IoLexer_readChar_(IoLexer *self, char c)
1007 if (!IoLexer_onNULL(self))
1009 uchar_t nc = IoLexer_nextChar(self);
1011 if (nc && nc == c)
1013 return 1;
1016 IoLexer_prevChar(self);
1018 return 0;
1021 int IoLexer_readCharAnyCase_(IoLexer *self, char c)
1023 if (!IoLexer_onNULL(self))
1025 uchar_t nc = IoLexer_nextChar(self);
1027 if (nc && tolower(nc) == tolower(c))
1029 return 1;
1032 IoLexer_prevChar(self);
1034 return 0;
1037 int IoLexer_readNonASCIIChar_(IoLexer *self)
1039 if (!IoLexer_onNULL(self))
1041 uchar_t nc = IoLexer_nextChar(self);
1043 if (nc >= 0x80)
1044 return 1;
1046 IoLexer_prevChar(self);
1048 return 0;
1051 int IoLexer_readNonReturn(IoLexer *self)
1053 if (IoLexer_onNULL(self)) return 0;
1054 if (IoLexer_nextChar(self) != '\n') return 1;
1055 IoLexer_prevChar(self);
1056 return 0;
1059 int IoLexer_readNonQuote(IoLexer *self)
1061 if (IoLexer_onNULL(self)) return 0;
1062 if (IoLexer_nextChar(self) != '"') return 1;
1063 IoLexer_prevChar(self);
1064 return 0;
1067 // character definitions ----------------------------
1069 int IoLexer_readCharacters(IoLexer *self)
1071 int read = 0;
1073 while (IoLexer_readCharacter(self))
1075 read = 1;
1078 return read;
1081 int IoLexer_readCharacter(IoLexer *self)
1083 return (
1084 IoLexer_readLetter(self) ||
1085 IoLexer_readDigit(self) ||
1086 IoLexer_readSpecialChar(self) ||
1087 IoLexer_readOpChar(self)
1091 int IoLexer_readOpChar(IoLexer *self)
1093 return IoLexer_readCharIn_(self, ":'~!@$%^&*-+=|\\<>?/");
1096 int IoLexer_readSpecialChar(IoLexer *self)
1098 return IoLexer_readCharIn_(self, specialChars);
1101 int IoLexer_readDigit(IoLexer *self)
1103 return IoLexer_readCharInRange_(self, '0', '9');
1106 int IoLexer_readLetter(IoLexer *self)
1108 return IoLexer_readCharInRange_(self, 'A', 'Z') ||
1109 IoLexer_readCharInRange_(self, 'a', 'z') ||
1110 IoLexer_readNonASCIIChar_(self);
1113 // terminator -------------------------------
1115 int IoLexer_readTerminator(IoLexer *self)
1117 int terminated = 0;
1118 IoLexer_pushPos(self);
1119 IoLexer_readSeparator(self);
1121 while (IoLexer_readTerminatorChar(self))
1123 terminated = 1;
1124 IoLexer_readSeparator(self);
1127 if (terminated)
1129 IoToken *top = IoLexer_currentToken(self);
1131 // avoid double terminators
1132 if (top && IoToken_type(top) == TERMINATOR_TOKEN)
1134 return 1;
1137 IoLexer_addTokenString_length_type_(self, ";", 1, TERMINATOR_TOKEN);
1138 IoLexer_popPos(self);
1139 return 1;
1142 IoLexer_popPosBack(self);
1143 return 0;
1146 int IoLexer_readTerminatorChar(IoLexer *self)
1148 return IoLexer_readCharIn_(self, ";\n");
1151 // separator --------------------------------
1153 int IoLexer_readSeparator(IoLexer *self)
1155 IoLexer_pushPos(self);
1157 while (IoLexer_readSeparatorChar(self))
1161 if (IoLexer_grabLength(self))
1163 //IoLexer_grabTokenType_(self, SEPERATOR_TOKEN);
1164 IoLexer_popPos(self);
1165 return 1;
1168 IoLexer_popPosBack(self);
1169 return 0;
1172 int IoLexer_readSeparatorChar(IoLexer *self)
1174 if (IoLexer_readCharIn_(self, " \f\r\t\v"))
1176 return 1;
1178 else
1180 IoLexer_pushPos(self);
1181 if (IoLexer_readCharIn_(self, "\\"))
1183 while (IoLexer_readCharIn_(self, " \f\r\t\v"))
1187 if (IoLexer_readCharIn_(self, "\n"))
1189 IoLexer_popPos(self);
1190 return 1;
1193 IoLexer_popPosBack(self);
1194 return 0;
1198 // whitespace -----------------------------------
1200 int IoLexer_readWhitespace(IoLexer *self)
1202 IoLexer_pushPos(self);
1204 while (IoLexer_readWhitespaceChar(self))
1208 if (IoLexer_grabLength(self))
1210 //IoLexer_grabTokenType_(self, WHITESPACE_TOKEN);
1211 IoLexer_popPos(self);
1212 return 1;
1215 IoLexer_popPosBack(self);
1216 return 0;
1219 int IoLexer_readWhitespaceChar(IoLexer *self)
1221 return IoLexer_readCharIn_(self, " \f\r\t\v\n");
1224 int IoLexer_readDigits(IoLexer *self)
1226 int read = 0;
1228 IoLexer_pushPos(self);
1230 while (IoLexer_readDigit(self))
1232 read = 1;
1235 if (!read)
1237 IoLexer_popPosBack(self);
1238 return 0;
1241 IoLexer_popPos(self);
1242 return read;
1245 int IoLexer_readNumber(IoLexer *self)
1247 return (IoLexer_readHexNumber(self) || IoLexer_readDecimal(self));
1250 int IoLexer_readExponent(IoLexer *self)
1252 if (IoLexer_readCharAnyCase_(self, 'e'))
1254 IoLexer_readChar_(self, '-') || IoLexer_readChar_(self, '+');
1256 if (!IoLexer_readDigits(self))
1258 return -1;
1261 return 1;
1263 return 0;
1266 int IoLexer_readDecimalPlaces(IoLexer *self)
1268 if (IoLexer_readChar_(self, '.'))
1270 if (!IoLexer_readDigits(self))
1272 return -1;
1275 return 1;
1277 return 0;
1280 int IoLexer_readDecimal(IoLexer *self)
1282 IoLexer_pushPos(self);
1284 if (IoLexer_readDigits(self))
1286 if (IoLexer_readDecimalPlaces(self) == -1)
1288 goto error;
1291 else
1293 if (IoLexer_readDecimalPlaces(self) != 1)
1295 goto error;
1299 if (IoLexer_readExponent(self) == -1)
1301 goto error;
1304 if (IoLexer_grabLength(self))
1306 IoLexer_grabTokenType_(self, NUMBER_TOKEN);
1307 IoLexer_popPos(self);
1308 return 1;
1310 error:
1311 IoLexer_popPosBack(self);
1312 return 0;
1315 int IoLexer_readHexNumber(IoLexer *self)
1317 int read = 0;
1319 IoLexer_pushPos(self);
1321 if (IoLexer_readChar_(self, '0') && IoLexer_readCharAnyCase_(self, 'x'))
1323 while (IoLexer_readDigits(self) || IoLexer_readCharacters(self))
1325 read ++;
1329 if (read && IoLexer_grabLength(self))
1331 IoLexer_grabTokenType_(self, HEXNUMBER_TOKEN);
1332 IoLexer_popPos(self);
1333 return 1;
1336 IoLexer_popPosBack(self);
1337 return 0;