Linux multi-monitor fullscreen support
[ryzomcore.git] / ryzom / tools / pd_parser / tokenizer.h
blobc1816989a0a527c85afaf547b3a822133ecbe0da
1 // Ryzom - MMORPG Framework <http://dev.ryzom.com/projects/ryzom/>
2 // Copyright (C) 2010 Winch Gate Property Limited
3 //
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU Affero General Public License as
6 // published by the Free Software Foundation, either version 3 of the
7 // License, or (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU Affero General Public License for more details.
14 // You should have received a copy of the GNU Affero General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
18 #ifndef RY_PD_TOKENIZER_H
19 #define RY_PD_TOKENIZER_H
21 // Nel Misc
22 #include "nel/misc/types_nl.h"
23 #include <nel/misc/debug.h>
24 #include <nel/misc/file.h>
26 // STL
27 #include <string>
28 #include <map>
29 #include <string.h>
30 #include <ctype.h>
33 enum TToken
35 TokenUnknown,
36 TokenEOF,
38 TokenIncludeScript,
40 TokenIdentifier,
41 TokenScopedIdentifier,
42 TokenRefineScope,
43 TokenNumber,
44 TokenString,
45 TokenCppCode,
46 TokenSlash,
47 TokenDescription,
49 TokenOpenBrace,
50 TokenCloseBrace,
51 TokenOpenParenthesis,
52 TokenCloseParenthesis,
53 TokenOpenBracket,
54 TokenCloseBracket,
55 TokenLessThan,
56 TokenGreaterThan,
57 TokenEqual,
58 TokenComma,
59 TokenDot,
60 TokenColon,
61 TokenSemiColon,
62 TokenMinus,
63 TokenPlus,
64 TokenTimes,
65 TokenAntiSlash,
66 TokenMod,
67 TokenSharp,
68 TokenAnd,
69 TokenOr,
70 TokenCirc,
71 TokenInterrog,
72 TokenExclam,
74 TokenClass,
75 TokenEnum,
76 TokenDimension,
77 TokenParent,
78 TokenFlag,
79 TokenFile,
80 TokenDb,
81 TokenType,
82 TokenKey,
83 TokenHidden,
84 TokenExtern,
85 TokenMirrored,
86 TokenImplements,
87 TokenMapped,
88 TokenDerived,
89 TokenInitFill,
90 TokenReserve,
91 TokenInclude,
92 TokenWriteTrigger,
93 TokenSeparated,
95 TokenUsePch,
97 TokenLogMsg,
98 TokenLogContext,
102 class CTokenizer
104 public:
106 class CToken
108 public:
109 CToken(uint start = 0, uint end = 0, TToken token = TokenUnknown, CTokenizer* tokenizer = NULL) : Tokenizer(tokenizer), Start(start), End(end), Token(token) {}
110 CToken(CTokenizer* tokenizer) : Tokenizer(tokenizer), Start(0), End(0), Token(TokenUnknown) {}
112 CTokenizer* Tokenizer;
113 uint Start;
114 uint End;
115 TToken Token;
117 std::string get() const { return Tokenizer->get(*this); }
120 CTokenizer() : _Buffer(NULL), _Size(0), _CurrentToken(0) { }
122 CTokenizer(const std::string &text)
124 init(text);
127 ~CTokenizer()
129 clear();
132 /// Init
133 void readFile(const std::string &filename)
135 clear();
137 NLMISC::CIFile f;
138 if (!f.open(filename))
139 return;
141 uint size = f.getFileSize();
142 char *buffer = new char[size+1];
143 f.serialBuffer((uint8*)buffer, size);
144 buffer[size] = '\0';
146 f.close();
148 _File = filename;
150 init(std::string(buffer));
152 delete buffer;
155 /// Init
156 void init(const std::string &text)
158 _Str = text;
159 init(_Str.c_str());
162 /// Init
163 void init(const char* text, uint size = 0)
165 _Size = (size > 0 ? size : (uint)strlen(text));
166 _Buffer = text;
167 nlassert(_Buffer != NULL);
168 _TempToken.Start = 0;
169 _TempToken.End = 0;
170 _CurrentToken = 0;
171 _Mark = 0;
173 initOneLetterTokens();
174 initKeywords();
178 bool tokenize()
180 while (true)
182 CToken token = nextToken();
184 nlassert(token.Tokenizer != NULL);
186 if (token.Token == TokenUnknown)
187 return false;
189 if (token.Token == TokenEOF)
190 break;
192 if (token.Token == TokenIncludeScript)
194 CToken scriptFile = nextToken();
195 if (scriptFile.Token != TokenString)
196 error(scriptFile);
198 std::string file = scriptFile.get();
200 CTokenizer* sub = new CTokenizer();
201 _Includes.push_back(sub);
202 sub->readFile(file);
203 if (!sub->tokenize())
204 return false;
205 _Tokens.insert(_Tokens.end(), sub->_Tokens.begin(), sub->_Tokens.end());
207 else
209 _Tokens.push_back(token);
213 std::vector<CToken>::iterator it;
214 for (it=_Tokens.begin(); it!=_Tokens.end(); ++it)
216 if ((*it).Token == TokenIdentifier)
218 std::vector<CToken>::iterator startit = it;
219 std::vector<CToken>::iterator endit = it;
220 while ((++it) != _Tokens.end() &&
221 (*it).Token == TokenRefineScope &&
222 (*it).Tokenizer == (*startit).Tokenizer &&
223 (++it) != _Tokens.end() &&
224 (*it).Token == TokenIdentifier &&
225 (*it).Tokenizer == (*startit).Tokenizer)
227 endit = it;
229 if (endit != startit)
231 CToken newToken(this);
232 newToken.Token = TokenScopedIdentifier;
233 newToken.Start = (*startit).Start;
234 newToken.End = (*endit).End;
235 ++endit;
236 it = _Tokens.erase(startit, endit);
237 it = _Tokens.insert(it, newToken);
242 return true;
246 bool next()
248 ++_CurrentToken;
249 return (!end());
253 const CToken &currentToken() const
255 nlassert(!end());
256 return _Tokens[_CurrentToken];
260 TToken current() const
262 return currentToken().Token;
266 bool end() const
268 return _CurrentToken >= _Tokens.size();
272 void push()
274 _Stack.push_back(_CurrentToken);
278 void pop()
280 nlassert(!_Stack.empty());
281 _CurrentToken = _Stack.back();
282 if (_CurrentToken < _Mark)
283 error(_Tokens[_Mark], "parse");
284 _Stack.pop_back();
288 void leaveMark()
290 _Mark = _CurrentToken;
293 /// get token text
294 std::string get(const CToken &token) const
296 nlassert(token.Tokenizer != NULL);
298 if (token.Tokenizer != this)
299 return token.Tokenizer->get(token);
301 std::string str(_Buffer+token.Start, token.End-token.Start);
302 if (token.Token == TokenString)
304 std::string::size_type pos = 0;
305 while ((pos = str.find('\\', pos)) != std::string::npos)
307 if (pos+1 == str.size())
308 break;
309 switch (str[pos+1])
311 case 'n':
312 str.erase(pos, 2);
313 str.insert(pos, "\n");
314 break;
315 case 'r':
316 str.erase(pos, 2);
317 str.insert(pos, "\r");
318 break;
319 case 't':
320 str.erase(pos, 2);
321 str.insert(pos, "\t");
322 break;
323 default:
324 str.erase(pos, 1);
325 ++pos;
326 break;
330 return str;
333 /// get file, line
334 void getFileLine(const CToken& token, uint &line, uint &col, std::string &file)
336 nlassert(token.Tokenizer != NULL);
338 if (token.Tokenizer != this)
340 token.Tokenizer->getFileLine(token, line, col, file);
341 return;
344 file = _File;
346 uint n = 0;
347 uint pos = token.Start;
349 line = 1;
350 col = 1;
352 while (n < pos)
354 if (_Buffer[n] == '\0')
355 break;
356 if (_Buffer[n] == '\t')
357 col += 4;
358 else if (_Buffer[n] != '\r')
359 ++col;
360 if (_Buffer[n] == '\n')
361 ++line, col = 1;
363 ++n;
367 /// error at
368 void error(const CToken& token, const char *errType = "syntax", const char *errMsg = NULL)
370 if (token.Tokenizer != this && token.Tokenizer != NULL)
372 token.Tokenizer->error(token, errType, errMsg);
373 return;
376 uint pos = token.Start;
377 uint n = 0;
378 uint line = 1, col = 1;
379 uint lineStartAt = 0, lineEndAt;
381 while (n < pos)
383 if (_Buffer[n] == '\0')
384 break;
385 if (_Buffer[n] == '\t')
386 col += 4;
387 else if (_Buffer[n] != '\r')
388 ++col;
389 else
390 lineStartAt = n+1;
391 if (_Buffer[n] == '\n')
392 ++line, col = 1, lineStartAt = n+1;
394 ++n;
397 lineEndAt = n;
398 while (_Buffer[lineEndAt] != '\0' && _Buffer[lineEndAt] != '\n' && _Buffer[lineEndAt] != '\r')
399 ++lineEndAt;
401 NLMISC::createDebug ();
403 std::string errorMsg = NLMISC::toString("PD_PARSE: file %s, %s error at line %d, column %d%s%s", _File.c_str(), errType, line, col, (errMsg != NULL ? ": " : ""), (errMsg != NULL ? errMsg : ""));
405 NLMISC::ErrorLog->displayRawNL("%s", errorMsg.c_str());
406 std::string extr(_Buffer+lineStartAt, lineEndAt-lineStartAt);
407 NLMISC::ErrorLog->displayRawNL("%s", extr.c_str());
408 uint i;
409 for (i=0; i<extr.size() && i<n-lineStartAt; ++i)
410 if (extr[i] != '\t')
411 extr[i] = ' ';
412 extr.erase(n-lineStartAt);
413 extr += '^';
414 NLMISC::ErrorLog->displayRawNL("%s", extr.c_str());
415 nlerror("%s", errorMsg.c_str());
418 private:
420 /// Original text
421 std::string _Str;
423 /// Parsed buffer
424 const char *_Buffer;
426 /// Buffer size
427 uint _Size;
429 /// Keywords
430 std::map<std::string, TToken> _Keywords;
432 /// One letter tokens
433 TToken _OneLetterTokens[256];
435 /// List of tokens
436 std::vector<CToken> _Tokens;
438 /// Current token
439 uint _CurrentToken;
441 /// State stack
442 std::vector<uint> _Stack;
444 /// Currently used token
445 CToken _TempToken;
447 /// Mark
448 uint _Mark;
450 /// Loaded file
451 std::string _File;
453 /// Subtokenizers
454 std::vector<CTokenizer*> _Includes;
457 void clear()
459 _Str.clear();
460 _Size = 0;
461 _TempToken.Start = 0;
462 _TempToken.End = 0;
463 _Buffer = NULL;
464 _Mark = 0;
465 _File.clear();
469 CToken nextToken()
471 skipSpaces();
472 if (posAtEnd())
473 return CToken(0, 0, TokenEOF, this);
475 _TempToken.Start = _TempToken.End;
476 _TempToken.Token = TokenUnknown;
477 _TempToken.Tokenizer = this;
479 char parse = popChar();
481 if (isalpha(parse) || parse == '_')
483 // identifier
484 while (!posAtEnd() && (isalnum(parse = getChar()) || parse == '_'))
485 popChar();
487 std::map<std::string, TToken>::iterator it;
488 _TempToken.Token = ((it = _Keywords.find(get(_TempToken))) != _Keywords.end() ? (*it).second : TokenIdentifier);
490 else if (isdigit(parse))
492 // number
493 while (!posAtEnd() && isdigit(getChar()))
494 popChar();
496 _TempToken.Token = TokenNumber;
498 else if (parse == '"')
500 // string
503 if (posAtEnd())
504 error(_TempToken);
506 parse = popChar();
507 if (parse == '"')
508 break;
510 if (parse == '\\')
512 if (posAtEnd())
513 error(_TempToken);
514 parse = popChar();
517 while (true);
518 _TempToken.Token = TokenString;
519 return CToken(_TempToken.Start+1, _TempToken.End-1, _TempToken.Token, this);
521 else if (parse == '@')
523 if (posAtEnd())
524 error(_TempToken);
526 parse = popChar();
528 if (parse == '[')
530 // user code
533 if (posAtEnd())
534 error(_TempToken);
536 parse = popChar();
537 if (parse == ']')
539 if (posAtEnd())
540 error(_TempToken);
541 if (popChar() == '@')
542 break;
543 else
544 pushChar();
547 while (true);
549 uint startTrim = _TempToken.Start+2;
550 uint endTrim = _TempToken.End-2;
552 while (startTrim < endTrim && (isspace(_Buffer[startTrim]) || _Buffer[startTrim] == '\r'))
553 ++startTrim;
554 while (startTrim < endTrim && (isspace(_Buffer[endTrim-1]) || _Buffer[endTrim-1] == '\r'))
555 --endTrim;
557 _TempToken.Token = TokenCppCode;
558 return CToken(startTrim, endTrim, _TempToken.Token, this);
560 else if (parse == '/')
562 // description
565 if (posAtEnd())
566 error(_TempToken);
568 parse = popChar();
569 if (parse == '/')
571 if (posAtEnd())
572 error(_TempToken);
573 if (popChar() == '@')
574 break;
575 else
576 pushChar();
579 while (true);
581 uint startTrim = _TempToken.Start+2;
582 uint endTrim = _TempToken.End-2;
584 while (startTrim < endTrim && (isspace(_Buffer[startTrim]) || _Buffer[startTrim] == '\r'))
585 ++startTrim;
586 while (startTrim < endTrim && (isspace(_Buffer[endTrim-1]) || _Buffer[endTrim-1] == '\r'))
587 --endTrim;
589 _TempToken.Token = TokenDescription;
590 return CToken(startTrim, endTrim, _TempToken.Token, this);
592 else
594 error(_TempToken);
597 else if (parse == '/')
599 _TempToken.Token = TokenSlash;
600 if (!posAtEnd())
602 parse = popChar();
603 if (parse == '/')
605 // skip to end of line
606 while (!posAtEnd() && (parse = popChar()) != '\n' && parse != '\r')
608 return nextToken();
610 else if (parse == '*')
612 // skip to comment close
613 while (true)
615 if (posAtEnd())
616 error(_TempToken);
618 parse = popChar();
619 if (parse == '*')
621 if (posAtEnd())
622 error(_TempToken);
623 if (popChar() == '/')
624 break;
625 else
626 pushChar();
629 return nextToken();
631 else
633 pushChar();
637 else if (parse == ':')
639 if (posAtEnd())
640 _TempToken.Token = TokenColon;
641 else
643 parse = popChar();
644 if (parse == ':')
646 _TempToken.Token = TokenRefineScope;
648 else
650 pushChar();
651 _TempToken.Token = TokenColon;
655 else if (getOLToken(parse) != TokenUnknown)
656 _TempToken.Token = getOLToken(parse);
658 if (_TempToken.Token == TokenUnknown)
659 error(_TempToken);
661 return _TempToken;
665 bool posAtEnd() const
667 return _TempToken.End >= _Size;
670 /// reset the buffer
671 void reset()
673 _TempToken.End = 0;
676 /// skip spaces
677 void skipSpaces()
679 while (!posAtEnd() && isspace(_Buffer[_TempToken.End]))
680 ++(_TempToken.End);
683 /// pop char
684 char popChar()
686 nlassert(!posAtEnd());
687 return _Buffer[(_TempToken.End)++];
690 /// get char
691 char getChar()
693 nlassert(!posAtEnd());
694 return _Buffer[_TempToken.End];
697 /// push char
698 void pushChar()
700 nlassert(_TempToken.End > 0);
701 --(_TempToken.End);
704 /// init one letter tokens
705 void initOneLetterTokens()
707 uint i;
708 for (i=0; i<256; ++i)
709 _OneLetterTokens[i] = TokenUnknown;
711 setOLToken('{', TokenOpenBrace);
712 setOLToken('}', TokenCloseBrace);
713 setOLToken('(', TokenOpenParenthesis);
714 setOLToken(')', TokenCloseParenthesis);
715 setOLToken('[', TokenOpenBracket);
716 setOLToken(']', TokenCloseBracket);
717 setOLToken('<', TokenLessThan);
718 setOLToken('>', TokenGreaterThan);
719 setOLToken('=', TokenEqual);
720 setOLToken(',', TokenComma);
721 setOLToken('.', TokenDot);
722 setOLToken(':', TokenColon);
723 setOLToken(';', TokenSemiColon);
724 setOLToken('-', TokenMinus);
725 setOLToken('+', TokenPlus);
726 setOLToken('*', TokenTimes);
727 setOLToken('\\', TokenAntiSlash);
728 setOLToken('%', TokenMod);
729 setOLToken('#', TokenSharp);
730 setOLToken('&', TokenAnd);
731 setOLToken('|', TokenOr);
732 setOLToken('^', TokenCirc);
733 setOLToken('?', TokenInterrog);
734 setOLToken('!', TokenExclam);
737 /// set one letter token
738 void setOLToken(char c, TToken token)
740 _OneLetterTokens[(uint)c] = token;
743 /// set one letter token
744 TToken getOLToken(char c) const
746 return _OneLetterTokens[(uint)c];
749 /// init keywords
750 void initKeywords()
752 _Keywords.clear();
754 _Keywords["verbatim"] = TokenIncludeScript;
756 _Keywords["class"] = TokenClass;
757 _Keywords["enum"] = TokenEnum;
758 _Keywords["dimension"] = TokenDimension;
759 _Keywords["parent"] = TokenParent;
760 _Keywords["flag"] = TokenFlag;
761 _Keywords["file"] = TokenFile;
762 _Keywords["db"] = TokenDb;
763 _Keywords["type"] = TokenType;
764 _Keywords["key"] = TokenKey;
765 _Keywords["hidden"] = TokenHidden;
766 _Keywords["extern"] = TokenExtern;
767 _Keywords["mirrored"] = TokenMirrored;
768 _Keywords["implements"] = TokenImplements;
769 _Keywords["mapped"] = TokenMapped;
770 _Keywords["derived"] = TokenDerived;
771 _Keywords["initfill"] = TokenInitFill;
772 _Keywords["logmsg"] = TokenLogMsg;
773 _Keywords["logcontext"] = TokenLogContext;
774 _Keywords["reserve"] = TokenReserve;
775 _Keywords["include"] = TokenInclude;
776 _Keywords["usepch"] = TokenUsePch;
777 _Keywords["writetriggered"] = TokenWriteTrigger;
778 _Keywords["separated"] = TokenSeparated;
783 #endif