2 * coded by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
3 * Understanding is not required. Only obedience.
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, version 3 of the License ONLY.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 module gaem
.parser
.lexer
is aliced
;
19 import gaem
.parser
.tokens
;
22 // ////////////////////////////////////////////////////////////////////////// //
28 string
toString () const { import std
.string
: format
; return "%s (%s,%s)".format(file
, line
, col
); }
29 string
toStringNoFile () const { import std
.string
: format
; return "(%s,%s)".format(line
, col
); }
31 @property bool valid () const pure nothrow @safe @nogc { pragma(inline
, true); return (line
> 0 && col
> 0); }
35 // ////////////////////////////////////////////////////////////////////////// //
36 public class ErrorAt
: Exception
{
39 this (string msg
, Throwable next
=null, string file
=__FILE__
, usize line
=__LINE__
) pure nothrow @safe @nogc { super(msg
, file
, line
, next
); }
40 this (in Loc aloc
, string msg
, Throwable next
=null, string file
=__FILE__
, usize line
=__LINE__
) pure nothrow @safe @nogc { loc
= aloc
; super(msg
, file
, line
, next
); }
44 // ////////////////////////////////////////////////////////////////////////// //
60 Loc loc
, eloc
; // token start, token end (after last char)
61 Type type
= Type
.EOF
; // token type
68 void mustbeType (Token
.Type tp
, string msg
="identifier expected", string file
=__FILE__
, usize line
=__LINE__
) {
70 if (type
!= tp
) throw new ErrorAt(loc
, msg
, null, file
, line
);
72 void mustbeId (string msg
="identifier expected", string file
=__FILE__
, usize line
=__LINE__
) { pragma(inline
, true); mustbeType(Type
.Id
, msg
, file
, line
); }
73 void mustbeStr (string msg
="string expected", string file
=__FILE__
, usize line
=__LINE__
) { pragma(inline
, true); mustbeType(Type
.Str
, msg
, file
, line
); }
74 void mustbeNum (string msg
="number expected", string file
=__FILE__
, usize line
=__LINE__
) { pragma(inline
, true); mustbeType(Type
.Num
, msg
, file
, line
); }
76 string
toString () const @trusted {
77 import std
.string
: format
;
78 final switch (type
) with (Type
) {
79 case EOF
: return "(%s,%d): <EOF>".format(loc
.line
, loc
.col
);
80 case Kw
: return "(%s,%d): kw.%s <%s>".format(loc
.line
, loc
.col
, kw
, tkstr
);
81 case Id
: return "(%s,%d): Id:%s".format(loc
.line
, loc
.col
, tkstr
);
82 case Str
: return "(%s,%d): Str:%s".format(loc
.line
, loc
.col
, Lexer
.quote(tkstr
));
83 case Num
: return "(%s,%d): Num:%s".format(loc
.line
, loc
.col
, num
);
84 case Spec
: return "(%s,%d): Spec:<%s>".format(loc
.line
, loc
.col
, tkstr
);
90 // get immutable string
91 // this converts id to `string` via `.idup`, use with caution!
92 // `.idup` is used to not anchor the whole source string
93 @property string
istr () { pragma(inline
, true); return (tkstr
.length ? tkstr
.idup
: null); }
95 const pure nothrow @nogc:
96 bool opEquals (Keyword akw
) { pragma(inline
, true); return (type
== Type
.Kw
&& kw
== akw
); }
97 bool isKw (Keyword akw
) { pragma(inline
, true); return (type
== Type
.Kw
&& kw
== akw
); }
98 bool isKw () { pragma(inline
, true); return (type
== Type
.Kw
); }
101 const(char)[] str () { pragma(inline
, true); return tkstr
; }
102 Keyword
Kw () { pragma(inline
, true); return (type
== Type
.Kw ? kw
: Keyword
.NoKW
); }
103 bool isId () { pragma(inline
, true); return (type
== Type
.Id
); }
104 bool isStr () { pragma(inline
, true); return (type
== Type
.Str
); }
105 bool isNum () { pragma(inline
, true); return (type
== Type
.Num
); }
106 bool isSpec () { pragma(inline
, true); return (type
== Type
.Spec
); }
107 bool isEOF () { pragma(inline
, true); return (type
== Type
.EOF
); }
111 // ////////////////////////////////////////////////////////////////////////// //
112 public final class Lexer
{
116 Loc cpos
; // position for last `getChar()`
117 Loc pend
; // end of previous token, for better error messages
119 bool lastWasEOL
= true;
121 Token tokeof
; // will be fixed by `nextToken()`
124 this(T
) (const(char)[] atext
, T afname
=null) if (is(T
: const(char)[])) {
126 if (afname
.length
> 0) { static if (is(T
== string
)) cpos
.file
= afname
; else cpos
.file
= afname
.idup
; }
127 tokeof
.loc
.file
= cpos
.file
;
134 void error (string msg
, string file
=__FILE__
, usize line
=__LINE__
) {
135 pragma(inline
, true);
136 throw new ErrorAt((lookup
.length
== 0 ? loc
: lookup
[0].loc
), msg
, null, file
, line
);
139 static private void error (in ref Token tk
, string msg
, string file
=__FILE__
, usize line
=__LINE__
) {
140 pragma(inline
, true);
141 throw new ErrorAt(tk
.loc
, msg
, null, file
, line
);
144 static private void error() (in auto ref Loc loc
, string msg
, string file
=__FILE__
, usize line
=__LINE__
) {
145 pragma(inline
, true);
146 throw new ErrorAt(loc
, msg
, null, file
, line
);
149 const(char)[] line (uint idx
) {
153 while (pos
< text
.length
&& text
.ptr
[pos
] != '\n') ++pos
;
156 if (pos
>= text
.length
) return null;
158 while (epos
< text
.length
&& text
.ptr
[epos
] != '\n') ++epos
;
159 while (epos
> pos
&& text
.ptr
[epos
-1] <= ' ') --epos
;
160 return text
[pos
..epos
];
164 if (lookup
.length
> 0) {
165 pend
= lookup
.ptr
[0].eloc
;
166 ++pend
.col
; // for better error messages
167 ++pend
.tpos
; // to be consistent
168 foreach (immutable idx
; 1..lookup
.length
) lookup
.ptr
[idx
-1] = lookup
.ptr
[idx
];
170 lookup
.assumeSafeAppend
;
175 @property pure nothrow @safe @nogc {
176 bool empty () const { pragma(inline
, true); return (lookup
.length
== 0); }
177 ref inout(Token
) front () inout { pragma(inline
, true); return (lookup
.length ? lookup
.ptr
[0] : tokeof
); }
178 // current token's loc
179 auto loc () const { pragma(inline
, true); return front
.loc
; }
180 auto eloc () const { pragma(inline
, true); return front
.eloc
; }
181 auto peloc () const { pragma(inline
, true); return pend
; }
183 bool isId () const { pragma(inline
, true); return front
.isId
; }
184 bool isStr () const { pragma(inline
, true); return front
.isStr
; }
185 bool isNum () const { pragma(inline
, true); return front
.isNum
; }
186 bool isSpec () const { pragma(inline
, true); return front
.isSpec
; }
189 bool isKw (Keyword kw
) const pure nothrow @safe @nogc { pragma(inline
, true); return front
.isKw(kw
); }
190 bool isKw () const pure nothrow @safe @nogc { pragma(inline
, true); return front
.isKw(); }
192 bool opEquals (Keyword kw
) const pure nothrow @safe @nogc { pragma(inline
, true); return (front
== kw
); }
195 void expect (Keyword kw
, string file
=__FILE__
, usize line
=__LINE__
) {
196 if (!front
.isKw(kw
)) error(loc
, "`"~keywordtext(kw
)~"` expected", file
, line
);
200 // this converts id to `string` via `.idup`, use with caution!
201 // `.idup` is used to not anchor the whole source string
202 string
expectId (string msg
="identifier expected", string file
=__FILE__
, usize line
=__LINE__
) {
203 mustbeId(msg
, file
, line
);
204 auto res
= lookup
[0].istr
;
209 // this converts id to `string` via `.idup`, use with caution!
210 // `.idup` is used to not anchor the whole source string
211 string
expectStr (string msg
="string expected", string file
=__FILE__
, usize line
=__LINE__
) {
212 //pragma(inline, true);
213 mustbeStr(msg
, file
, line
);
214 auto res
= lookup
[0].istr
;
219 // `mustbe` doesn't eat token
220 void mustbeType (Token
.Type tp
, string msg
="identifier expected", string file
=__FILE__
, usize line
=__LINE__
) { pragma(inline
, true); return front
.mustbeType(tp
, msg
, file
, line
); }
221 void mustbeId (string msg
="identifier expected", string file
=__FILE__
, usize line
=__LINE__
) { pragma(inline
, true); return front
.mustbeId(msg
, file
, line
); }
222 void mustbeStr (string msg
="string expected", string file
=__FILE__
, usize line
=__LINE__
) { pragma(inline
, true); return front
.mustbeStr(msg
, file
, line
); }
223 void mustbeNum (string msg
="number expected", string file
=__FILE__
, usize line
=__LINE__
) { pragma(inline
, true); return front
.mustbeNum(msg
, file
, line
); }
225 bool eatKw (Keyword kw
) {
226 if (!isKw(kw
)) return false;
231 ref Token
peek (uint dist
) {
232 while (!eof
&& lookup
.length
<= dist
) nextToken();
233 return (dist
< lookup
.length ? lookup
.ptr
[dist
] : tokeof
);
236 ref Token
opIndex (usize dist
) { pragma(inline
, true); return peek(dist
); }
238 // return loc for next `getChar()`
239 Loc
nextLoc () nothrow @safe @nogc {
241 if (lastWasEOL
) { ++res
.line
; res
.col
= 1; } else ++res
.col
;
245 char peekChar (uint dist
=0) nothrow @trusted @nogc {
246 pragma(inline
, true);
247 return (tpos
+dist
>= text
.length ?
'\0' : (text
.ptr
[tpos
+dist
] ? text
.ptr
[tpos
+dist
] : ' '));
251 char getChar () nothrow @trusted @nogc {
252 if (tpos
>= text
.length
) { tpos
= text
.length
; eof
= true; }
253 if (eof
) return '\0';
255 char ch
= text
.ptr
[tpos
++];
256 if (ch
== '\0') ch
= ' ';
257 if (lastWasEOL
) { ++cpos
.line
; cpos
.col
= 1; } else ++cpos
.col
;
258 lastWasEOL
= (ch
== '\n');
262 // skip blanks and comments
263 //TODO: make special "comment" token(s)?
264 void skipBlanks () @safe {
268 switch (peekChar(1)) {
269 case '/': // single-line comment
270 do { ch
= getChar(); } while (ch
!= 0 && ch
!= '\n');
272 case '*': // multiline comment
273 getChar(); // skip slash
275 getChar(); // skip star
277 ch
= ' '; // we need this
281 if (ch
== 0) error(lc
, "unterminated comment");
282 if (ch
== '/' && pch
== '*') break;
288 if (ch
== 0 || ch
> 32) return;
293 private void nextToken () {
297 if (peekChar
== '\0') {
301 //++tokeof.eloc.col; // for better error messages
302 //++tokeof.eloc.tpos; // to be consistent
312 if (ch
== '"' || ch
== '\'') {
314 tk
.type
= Token
.Type
.Str
;
315 ++tkspos
; // skip quote
318 if (ch
== 0) error(tk
, "unterminated string");
319 if (ch
== ech
) break;
321 tk
.tkstr
= text
[tkspos
..tpos
-1]; // -1 due to eaten quote
323 //++tk.eloc.col; // for better error messages
324 //++tk.eloc.tpos; // to be consistent
332 tk
.type
= Token
.Type
.Num
;
333 getChar(); // skip dollar
334 int dv
= digitValue(peekChar
);
335 if (dv
< 0 || dv
> 15) error(tk
, "hex number expected");
337 dv
= digitValue(peekChar
);
338 if (dv
< 0 || dv
> 15) break;
343 if (isIdChar(ch
) || ch
== '.') error(tk
, "hex number expected");
345 tk
.tkstr
= text
[tkspos
..tpos
];
347 //++tk.eloc.col; // for better error messages
348 //++tk.eloc.tpos; // to be consistent
354 if (isDigit(ch
) ||
(ch
== '.' && isDigit(peekChar
))) {
356 tk
.type
= Token
.Type
.Num
;
357 if (ch
!= '.') n
= ch
-'0';
361 if (!isDigit(peekChar
)) break;
365 if (peekChar
== '.') ch
= getChar();
369 if (!isDigit(peekChar
)) error(tk
, "real number expected");
372 if (!isDigit(peekChar
)) break;
378 if (peekChar
== 'e' || peekChar
== 'E') {
382 if (peekChar
== '+') getChar(); else if (peekChar
== '-') { getChar(); neg = true; }
383 if (!isDigit(peekChar
)) error(tk
, "invalid number");
385 while (isDigit(peekChar
)) {
388 if (e
< 0) error(tk
, "invalid number (exponent overflow)");
390 //{ import std.conv : to; assert(0, to!string(e)); }
392 while (e
-- > 0) n
= n
/10;
394 while (e
-- > 0) n
= n
*10;
398 tk
.tkstr
= text
[tkspos
..tpos
];
400 //++tk.eloc.col; // for better error messages
401 //++tk.eloc.tpos; // to be consistent
403 if (isIdChar(ch
) || ch
== '.') error(tk
, "invalid number");
410 tk
.type
= Token
.Type
.Id
;
411 while (isIdChar(peekChar
)) getChar();
412 tk
.tkstr
= text
[tkspos
..tpos
];
414 //++tk.eloc.col; // for better error messages
415 //++tk.eloc.tpos; // to be consistent
416 if (auto kw
= tk
.tkstr
in keywords
) {
417 tk
.type
= Token
.Type
.Kw
;
427 if (auto xkw
= dbuf
[0..1] in keywords
) {
428 tk
.type
= Token
.Type
.Kw
;
430 foreach (uint dpos
; 1..dbuf
.length
) {
431 dbuf
[dpos
] = peekChar
;
432 if (auto kw
= dbuf
[0..dpos
+1] in keywords
) {
433 tk
.type
= Token
.Type
.Kw
;
435 getChar(); // eat token char
441 tk
.type
= Token
.Type
.Spec
;
443 tk
.tkstr
= text
[tkspos
..tpos
];
445 //++tk.eloc.col; // for better error messages
446 //++tk.eloc.tpos; // to be consistent
450 auto select(RetType
, string mode
="peek", A
...) (scope A args
) { pragma(inline
, true); return selectN
!(RetType
, mode
)(0, args
); }
452 auto selectN(RetType
, string mode
="peek", A
...) (usize n
, scope A args
) {
453 import std
.traits
: ReturnType
;
455 static assert(mode
== "peek" || mode
== "pop" || mode
== "pop-nondefault", "selectN: invalid mode: '"~mode
~"'");
457 template isGoodDg(usize idx
, T
) {
458 private import std
.traits
;
459 static if (idx
< A
.length
&& isCallable
!(A
[idx
]) && arity
!(args
[idx
]) == 1) {
460 enum isGoodDg
= is(Parameters
!(A
[idx
])[0] == T
);
462 enum isGoodDg
= false;
466 template isGoodArglessDg(usize idx
) {
467 private import std
.traits
;
468 static if (idx
< A
.length
&& isCallable
!(A
[idx
]) && arity
!(args
[idx
]) == 0) {
469 enum isGoodArglessDg
= true;
471 enum isGoodArglessDg
= false;
475 // sorry, but this has to be string mixin, due to possible empty `arg`
476 enum DoCallDg(string arg
) =
477 "static if (!is(ReturnType!(A[xidx]) == void)) return cast(RetType)(args[xidx]("~arg
~")); else { args[xidx]("~arg
~"); return RetType.init; }";
479 // we can't have inner mixin templates, so... sorry, it's string again
481 static if (isGoodDg
!(xidx
, Token
)) { mixin(DoCallDg
!"tk"); }
482 else static if (isGoodDg
!(xidx
, Loc
)) { mixin(DoCallDg
!"tk.loc"); }
483 else static if (isGoodDg
!(xidx
, Token
.Type
)) { mixin(DoCallDg
!"tk.type"); }
484 else static if (isGoodDg
!(xidx
, Keyword
)) { mixin(DoCallDg
!"tk.Kw"); }
485 else static if (isGoodArglessDg
!(xidx
)) { mixin(DoCallDg
!""); }
486 else static assert(0, "selectN: invalid delegate #"~xidx
.stringof
);
491 foreach (immutable aidx
, auto arg
; args
) {
492 static if (aidx
%2 == 0) {
493 static if (is(typeof(arg
) == Keyword
) ||
is(typeof(arg
) == Token
.Type
)) {
494 static if (is(typeof(arg
) == Keyword
)) found
= (tk
== arg
);
495 else static if (is(typeof(arg
) == Token
.Type
)) found
= (tk
.type
== arg
);
496 else static assert(0, "wtf?!");
499 static if (mode
!= "peek") popFront();
507 static if (mode
== "pop") popFront();
514 error(tk
, "selectN is out of nodes");
519 private immutable byte[256] digitValues
= {
521 foreach (ubyte idx
; '0'..'9'+1) res
[idx
] = cast(byte)(idx
-'0');
522 foreach (ubyte idx
; 'A'..'Z'+1) res
[idx
] = cast(byte)(idx
-'A'+10);
523 foreach (ubyte idx
; 'a'..'z'+1) res
[idx
] = cast(byte)(idx
-'a'+10);
527 private immutable bool[256] idStartChars
= {
528 bool[256] res
= false;
529 foreach (ubyte idx
; 'A'..'Z'+1) res
[idx
] = true;
530 foreach (ubyte idx
; 'a'..'z'+1) res
[idx
] = true;
535 private immutable bool[256] idChars
= {
536 bool[256] res
= false;
537 foreach (ubyte idx
; '0'..'9'+1) res
[idx
] = true;
538 foreach (ubyte idx
; 'A'..'Z'+1) res
[idx
] = true;
539 foreach (ubyte idx
; 'a'..'z'+1) res
[idx
] = true;
544 bool isDigit() (char ch
) { pragma(inline
, true); return (ch
>= '0' && ch
<= '9'); }
545 int digitValue() (char ch
) { pragma(inline
, true); return digitValues
.ptr
[cast(ubyte)ch
]; }
546 bool isIdStart() (char ch
) { pragma(inline
, true); return idStartChars
.ptr
[cast(ubyte)ch
]; }
547 bool isIdChar() (char ch
) { pragma(inline
, true); return idChars
.ptr
[cast(ubyte)ch
]; }
549 string
gmlQuote (const(char)[] s
) {
550 import std
.array
: appender
;
551 auto res
= appender
!string();
552 enum Prev
{ Nothing
, Char
, Spec
}
553 Prev prev
= Prev
.Nothing
;
554 foreach (char ch
; s
) {
555 if (ch
< ' ' || ch
== 127 || ch
== '"') {
556 import std
.conv
: to
;
557 final switch (prev
) with (Prev
) {
559 case Char
: res
.put(`"+`); break;
560 case Spec
: res
.put(`+`); break;
564 res
.put(to
!string(cast(uint)ch
));
567 final switch (prev
) with (Prev
) {
568 case Nothing
: res
.put('"'); break;
570 case Spec
: res
.put(`+"`); break;
576 if (prev
== Prev
.Nothing
) return `""`;
577 if (prev
== Prev
.Char
) res
.put('"');
581 /// quote string: append double quotes, screen all special chars;
582 /// so quoted string forms valid D string literal.
584 string
quote (const(char)[] s
) {
585 import std
.array
: appender
;
586 import std
.format
: formatElement
, FormatSpec
;
587 auto res
= appender
!string();
588 FormatSpec
!char fspc
; // defaults to 's'
589 formatElement(res
, s
, fspc
);
595 version(gml_lexer_test
) unittest {
598 auto s
= readText("scrDrawHUD.gml");
599 auto lex
= new Lexer(s
, "scrDrawHUD.gml");
602 //if (lex == Keyword.RCurly) writeln("*******************");
603 auto v
= lex
.select
!(int, "pop")(
604 Keyword
.LCurly
, (ref Token tk
) => 1,
605 Keyword
.RCurly
, (Keyword kw
) => 2,
606 Keyword
.Semi
, () => 6,
607 Keyword
.Sub
, (Loc loc
) => 99,
608 Token
.Type
.Num
, (ref Token tk
) => 3,
609 (ref Token tk
) => writeln(tk
),
611 if (v
) writeln("*** ", v
);
612 //writeln(v, ": ", lex.front);
615 } catch (ErrorAt e
) {
616 writeln("PARSE ERROR: ", e
.line
);