1 // Ryzom - MMORPG Framework <http://dev.ryzom.com/projects/ryzom/>
2 // Copyright (C) 2010 Winch Gate Property Limited
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU Affero General Public License as
6 // published by the Free Software Foundation, either version 3 of the
7 // License, or (at your option) any later version.
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU Affero General Public License for more details.
14 // You should have received a copy of the GNU Affero General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
18 #ifndef RY_PD_TOKENIZER_H
19 #define RY_PD_TOKENIZER_H
22 #include "nel/misc/types_nl.h"
23 #include <nel/misc/debug.h>
24 #include <nel/misc/file.h>
41 TokenScopedIdentifier
,
52 TokenCloseParenthesis
,
109 CToken(uint start
= 0, uint end
= 0, TToken token
= TokenUnknown
, CTokenizer
* tokenizer
= NULL
) : Tokenizer(tokenizer
), Start(start
), End(end
), Token(token
) {}
110 CToken(CTokenizer
* tokenizer
) : Tokenizer(tokenizer
), Start(0), End(0), Token(TokenUnknown
) {}
112 CTokenizer
* Tokenizer
;
117 std::string
get() const { return Tokenizer
->get(*this); }
120 CTokenizer() : _Buffer(NULL
), _Size(0), _CurrentToken(0) { }
122 CTokenizer(const std::string
&text
)
133 void readFile(const std::string
&filename
)
138 if (!f
.open(filename
))
141 uint size
= f
.getFileSize();
142 char *buffer
= new char[size
+1];
143 f
.serialBuffer((uint8
*)buffer
, size
);
150 init(std::string(buffer
));
156 void init(const std::string
&text
)
163 void init(const char* text
, uint size
= 0)
165 _Size
= (size
> 0 ? size
: (uint
)strlen(text
));
167 nlassert(_Buffer
!= NULL
);
168 _TempToken
.Start
= 0;
173 initOneLetterTokens();
182 CToken token
= nextToken();
184 nlassert(token
.Tokenizer
!= NULL
);
186 if (token
.Token
== TokenUnknown
)
189 if (token
.Token
== TokenEOF
)
192 if (token
.Token
== TokenIncludeScript
)
194 CToken scriptFile
= nextToken();
195 if (scriptFile
.Token
!= TokenString
)
198 std::string file
= scriptFile
.get();
200 CTokenizer
* sub
= new CTokenizer();
201 _Includes
.push_back(sub
);
203 if (!sub
->tokenize())
205 _Tokens
.insert(_Tokens
.end(), sub
->_Tokens
.begin(), sub
->_Tokens
.end());
209 _Tokens
.push_back(token
);
213 std::vector
<CToken
>::iterator it
;
214 for (it
=_Tokens
.begin(); it
!=_Tokens
.end(); ++it
)
216 if ((*it
).Token
== TokenIdentifier
)
218 std::vector
<CToken
>::iterator startit
= it
;
219 std::vector
<CToken
>::iterator endit
= it
;
220 while ((++it
) != _Tokens
.end() &&
221 (*it
).Token
== TokenRefineScope
&&
222 (*it
).Tokenizer
== (*startit
).Tokenizer
&&
223 (++it
) != _Tokens
.end() &&
224 (*it
).Token
== TokenIdentifier
&&
225 (*it
).Tokenizer
== (*startit
).Tokenizer
)
229 if (endit
!= startit
)
231 CToken
newToken(this);
232 newToken
.Token
= TokenScopedIdentifier
;
233 newToken
.Start
= (*startit
).Start
;
234 newToken
.End
= (*endit
).End
;
236 it
= _Tokens
.erase(startit
, endit
);
237 it
= _Tokens
.insert(it
, newToken
);
253 const CToken
¤tToken() const
256 return _Tokens
[_CurrentToken
];
260 TToken
current() const
262 return currentToken().Token
;
268 return _CurrentToken
>= _Tokens
.size();
274 _Stack
.push_back(_CurrentToken
);
280 nlassert(!_Stack
.empty());
281 _CurrentToken
= _Stack
.back();
282 if (_CurrentToken
< _Mark
)
283 error(_Tokens
[_Mark
], "parse");
290 _Mark
= _CurrentToken
;
294 std::string
get(const CToken
&token
) const
296 nlassert(token
.Tokenizer
!= NULL
);
298 if (token
.Tokenizer
!= this)
299 return token
.Tokenizer
->get(token
);
301 std::string
str(_Buffer
+token
.Start
, token
.End
-token
.Start
);
302 if (token
.Token
== TokenString
)
304 std::string::size_type pos
= 0;
305 while ((pos
= str
.find('\\', pos
)) != std::string::npos
)
307 if (pos
+1 == str
.size())
313 str
.insert(pos
, "\n");
317 str
.insert(pos
, "\r");
321 str
.insert(pos
, "\t");
334 void getFileLine(const CToken
& token
, uint
&line
, uint
&col
, std::string
&file
)
336 nlassert(token
.Tokenizer
!= NULL
);
338 if (token
.Tokenizer
!= this)
340 token
.Tokenizer
->getFileLine(token
, line
, col
, file
);
347 uint pos
= token
.Start
;
354 if (_Buffer
[n
] == '\0')
356 if (_Buffer
[n
] == '\t')
358 else if (_Buffer
[n
] != '\r')
360 if (_Buffer
[n
] == '\n')
368 void error(const CToken
& token
, const char *errType
= "syntax", const char *errMsg
= NULL
)
370 if (token
.Tokenizer
!= this && token
.Tokenizer
!= NULL
)
372 token
.Tokenizer
->error(token
, errType
, errMsg
);
376 uint pos
= token
.Start
;
378 uint line
= 1, col
= 1;
379 uint lineStartAt
= 0, lineEndAt
;
383 if (_Buffer
[n
] == '\0')
385 if (_Buffer
[n
] == '\t')
387 else if (_Buffer
[n
] != '\r')
391 if (_Buffer
[n
] == '\n')
392 ++line
, col
= 1, lineStartAt
= n
+1;
398 while (_Buffer
[lineEndAt
] != '\0' && _Buffer
[lineEndAt
] != '\n' && _Buffer
[lineEndAt
] != '\r')
401 NLMISC::createDebug ();
403 std::string errorMsg
= NLMISC::toString("PD_PARSE: file %s, %s error at line %d, column %d%s%s", _File
.c_str(), errType
, line
, col
, (errMsg
!= NULL
? ": " : ""), (errMsg
!= NULL
? errMsg
: ""));
405 NLMISC::ErrorLog
->displayRawNL("%s", errorMsg
.c_str());
406 std::string
extr(_Buffer
+lineStartAt
, lineEndAt
-lineStartAt
);
407 NLMISC::ErrorLog
->displayRawNL("%s", extr
.c_str());
409 for (i
=0; i
<extr
.size() && i
<n
-lineStartAt
; ++i
)
412 extr
.erase(n
-lineStartAt
);
414 NLMISC::ErrorLog
->displayRawNL("%s", extr
.c_str());
415 nlerror("%s", errorMsg
.c_str());
430 std::map
<std::string
, TToken
> _Keywords
;
432 /// One letter tokens
433 TToken _OneLetterTokens
[256];
436 std::vector
<CToken
> _Tokens
;
442 std::vector
<uint
> _Stack
;
444 /// Currently used token
454 std::vector
<CTokenizer
*> _Includes
;
461 _TempToken
.Start
= 0;
473 return CToken(0, 0, TokenEOF
, this);
475 _TempToken
.Start
= _TempToken
.End
;
476 _TempToken
.Token
= TokenUnknown
;
477 _TempToken
.Tokenizer
= this;
479 char parse
= popChar();
481 if (isalpha(parse
) || parse
== '_')
484 while (!posAtEnd() && (isalnum(parse
= getChar()) || parse
== '_'))
487 std::map
<std::string
, TToken
>::iterator it
;
488 _TempToken
.Token
= ((it
= _Keywords
.find(get(_TempToken
))) != _Keywords
.end() ? (*it
).second
: TokenIdentifier
);
490 else if (isdigit(parse
))
493 while (!posAtEnd() && isdigit(getChar()))
496 _TempToken
.Token
= TokenNumber
;
498 else if (parse
== '"')
518 _TempToken
.Token
= TokenString
;
519 return CToken(_TempToken
.Start
+1, _TempToken
.End
-1, _TempToken
.Token
, this);
521 else if (parse
== '@')
541 if (popChar() == '@')
549 uint startTrim
= _TempToken
.Start
+2;
550 uint endTrim
= _TempToken
.End
-2;
552 while (startTrim
< endTrim
&& (isspace(_Buffer
[startTrim
]) || _Buffer
[startTrim
] == '\r'))
554 while (startTrim
< endTrim
&& (isspace(_Buffer
[endTrim
-1]) || _Buffer
[endTrim
-1] == '\r'))
557 _TempToken
.Token
= TokenCppCode
;
558 return CToken(startTrim
, endTrim
, _TempToken
.Token
, this);
560 else if (parse
== '/')
573 if (popChar() == '@')
581 uint startTrim
= _TempToken
.Start
+2;
582 uint endTrim
= _TempToken
.End
-2;
584 while (startTrim
< endTrim
&& (isspace(_Buffer
[startTrim
]) || _Buffer
[startTrim
] == '\r'))
586 while (startTrim
< endTrim
&& (isspace(_Buffer
[endTrim
-1]) || _Buffer
[endTrim
-1] == '\r'))
589 _TempToken
.Token
= TokenDescription
;
590 return CToken(startTrim
, endTrim
, _TempToken
.Token
, this);
597 else if (parse
== '/')
599 _TempToken
.Token
= TokenSlash
;
605 // skip to end of line
606 while (!posAtEnd() && (parse
= popChar()) != '\n' && parse
!= '\r')
610 else if (parse
== '*')
612 // skip to comment close
623 if (popChar() == '/')
637 else if (parse
== ':')
640 _TempToken
.Token
= TokenColon
;
646 _TempToken
.Token
= TokenRefineScope
;
651 _TempToken
.Token
= TokenColon
;
655 else if (getOLToken(parse
) != TokenUnknown
)
656 _TempToken
.Token
= getOLToken(parse
);
658 if (_TempToken
.Token
== TokenUnknown
)
665 bool posAtEnd() const
667 return _TempToken
.End
>= _Size
;
679 while (!posAtEnd() && isspace(_Buffer
[_TempToken
.End
]))
686 nlassert(!posAtEnd());
687 return _Buffer
[(_TempToken
.End
)++];
693 nlassert(!posAtEnd());
694 return _Buffer
[_TempToken
.End
];
700 nlassert(_TempToken
.End
> 0);
704 /// init one letter tokens
705 void initOneLetterTokens()
708 for (i
=0; i
<256; ++i
)
709 _OneLetterTokens
[i
] = TokenUnknown
;
711 setOLToken('{', TokenOpenBrace
);
712 setOLToken('}', TokenCloseBrace
);
713 setOLToken('(', TokenOpenParenthesis
);
714 setOLToken(')', TokenCloseParenthesis
);
715 setOLToken('[', TokenOpenBracket
);
716 setOLToken(']', TokenCloseBracket
);
717 setOLToken('<', TokenLessThan
);
718 setOLToken('>', TokenGreaterThan
);
719 setOLToken('=', TokenEqual
);
720 setOLToken(',', TokenComma
);
721 setOLToken('.', TokenDot
);
722 setOLToken(':', TokenColon
);
723 setOLToken(';', TokenSemiColon
);
724 setOLToken('-', TokenMinus
);
725 setOLToken('+', TokenPlus
);
726 setOLToken('*', TokenTimes
);
727 setOLToken('\\', TokenAntiSlash
);
728 setOLToken('%', TokenMod
);
729 setOLToken('#', TokenSharp
);
730 setOLToken('&', TokenAnd
);
731 setOLToken('|', TokenOr
);
732 setOLToken('^', TokenCirc
);
733 setOLToken('?', TokenInterrog
);
734 setOLToken('!', TokenExclam
);
737 /// set one letter token
738 void setOLToken(char c
, TToken token
)
740 _OneLetterTokens
[(uint
)c
] = token
;
743 /// set one letter token
744 TToken
getOLToken(char c
) const
746 return _OneLetterTokens
[(uint
)c
];
754 _Keywords
["verbatim"] = TokenIncludeScript
;
756 _Keywords
["class"] = TokenClass
;
757 _Keywords
["enum"] = TokenEnum
;
758 _Keywords
["dimension"] = TokenDimension
;
759 _Keywords
["parent"] = TokenParent
;
760 _Keywords
["flag"] = TokenFlag
;
761 _Keywords
["file"] = TokenFile
;
762 _Keywords
["db"] = TokenDb
;
763 _Keywords
["type"] = TokenType
;
764 _Keywords
["key"] = TokenKey
;
765 _Keywords
["hidden"] = TokenHidden
;
766 _Keywords
["extern"] = TokenExtern
;
767 _Keywords
["mirrored"] = TokenMirrored
;
768 _Keywords
["implements"] = TokenImplements
;
769 _Keywords
["mapped"] = TokenMapped
;
770 _Keywords
["derived"] = TokenDerived
;
771 _Keywords
["initfill"] = TokenInitFill
;
772 _Keywords
["logmsg"] = TokenLogMsg
;
773 _Keywords
["logcontext"] = TokenLogContext
;
774 _Keywords
["reserve"] = TokenReserve
;
775 _Keywords
["include"] = TokenInclude
;
776 _Keywords
["usepch"] = TokenUsePch
;
777 _Keywords
["writetriggered"] = TokenWriteTrigger
;
778 _Keywords
["separated"] = TokenSeparated
;