1 ! Copyright (C) 2008 Chris Double.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: kernel sequences strings arrays math.parser peg peg.ebnf peg.javascript.ast ;
4 IN: peg.javascript.tokenizer
6 #! Grammar for JavaScript. Based on OMeta-JS example from:
7 #! http://jarrett.cs.ucla.edu/ometa-js/#JavaScript_Compiler
11 EBNF: tokenize-javascript
15 SingleLineComment = "//" (!("\n") .)* "\n" => [[ ignore ]]
16 MultiLineComment = "/*" (!("*/") .)* "*/" => [[ ignore ]]
17 Space = " " | "\t" | "\r" | "\n" | SingleLineComment | MultiLineComment
18 Spaces = Space* => [[ ignore ]]
19 NameFirst = Letter | "$" => [[ CHAR: $ ]] | "_" => [[ CHAR: _ ]]
20 NameRest = NameFirst | Digit
21 iName = NameFirst NameRest* => [[ first2 swap prefix >string ]]
47 Name = !(Keyword) iName => [[ ast-name boa ]]
48 Number = Digits:ws '.' Digits:fs => [[ ws "." fs 3array concat >string string>number ast-number boa ]]
49 | Digits => [[ >string string>number ast-number boa ]]
51 EscapeChar = "\\n" => [[ 10 ]]
54 StringChars1 = (EscapeChar | !('"""') .)* => [[ >string ]]
55 StringChars2 = (EscapeChar | !('"') .)* => [[ >string ]]
56 StringChars3 = (EscapeChar | !("'") .)* => [[ >string ]]
57 Str = '"""' StringChars1:cs '"""' => [[ cs ast-string boa ]]
58 | '"' StringChars2:cs '"' => [[ cs ast-string boa ]]
59 | "'" StringChars3:cs "'" => [[ cs ast-string boa ]]
60 RegExpFlags = NameRest* => [[ >string ]]
61 NonTerminator = !("\n" | "\r") .
62 BackslashSequence = "\\" NonTerminator => [[ second ]]
63 RegExpFirstChar = !("*" | "\\" | "/") NonTerminator
65 RegExpChar = !("\\" | "/") NonTerminator
67 RegExpChars = RegExpChar*
68 RegExpBody = RegExpFirstChar RegExpChars => [[ first2 swap prefix >string ]]
69 RegExp = "/" RegExpBody:b "/" RegExpFlags:fl => [[ b fl ast-regexp boa ]]
70 Special = "(" | ")" | "{" | "}" | "[" | "]" | "," | ";"
71 | "?" | ":" | "!==" | "!=" | "===" | "==" | "=" | ">="
72 | ">>>=" | ">>>" | ">>=" | ">>" | ">" | "<=" | "<<=" | "<<"
73 | "<" | "++" | "+=" | "+" | "--" | "-=" | "-" | "*="
74 | "*" | "/=" | "/" | "%=" | "%" | "&&=" | "&&" | "||="
75 | "||" | "." | "!" | "&=" | "&" | "|=" | "|" | "^="
77 Tok = Spaces (Name | Keyword | Number | Str | RegExp | Special )