1 -- Copyright 2017 Murray Calavera. See LICENSE.
2 -- Standard ML LPeg lexer.
4 local l
= require('lexer')
7 local function mlword(words
)
8 return l
.word_match(words
, "'")
11 local ws
= token(l
.WHITESPACE
, l
.space^
1)
13 -- single line comments are valid in successor ml
14 local cl
= '(*)' * l
.nonnewline^
0
15 local comment
= token(l
.COMMENT
, cl
+ l
.nested_pair('(*', '*)'))
17 local string = token(l
.STRING
, lpeg
.P('#')^
-1 * l
.delimited_range('"', true))
19 local function num(digit
)
20 return digit
* (digit^
0 * lpeg
.P('_'))^
0 * digit^
1 + digit
23 local int
= num(l
.digit
)
24 local frac
= lpeg
.P('.') * int
25 local minus
= lpeg
.P('~')^
-1
26 local exp = lpeg
.S('eE') * minus
* int
27 local real
= int
* frac^
-1 * exp + int
* frac
* exp^
-1
28 local hex
= num(l
.xdigit
)
29 local bin
= num(lpeg
.S('01'))
31 local number = token(l
.NUMBER
,
33 + (lpeg
.P('0wx') + lpeg
.P('0xw')) * hex
34 + (lpeg
.P('0wb') + lpeg
.P('0bw')) * bin
35 + minus
* lpeg
.P('0x') * hex
36 + minus
* lpeg
.P('0b') * bin
41 local keyword
= token(l
.KEYWORD
, mlword
{
42 'abstype', 'and', 'andalso', 'as', 'case', 'do', 'datatype', 'else', 'end',
43 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix', 'infixr', 'let',
44 'local', 'nonfix', 'of', 'op', 'orelse', 'raise', 'rec', 'then',
45 'type', 'val', 'with', 'withtype', 'while',
47 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature',
51 -- includes valid symbols for identifiers
52 local operator
= token(l
.OPERATOR
, lpeg
.S('!*/+-^:@=<>()[]{},;._|#%&$?~`\\'))
54 local type = token(l
.TYPE
, mlword
{
55 'int', 'real', 'word', 'bool', 'char', 'string', 'unit',
56 'array', 'exn', 'list', 'option', 'order', 'ref', 'substring', 'vector'
59 -- `real`, `vector` and `substring` are a problem
60 local func
= token(l
.FUNCTION
, mlword
{
61 'app', 'before', 'ceil', 'chr', 'concat', 'exnMessage', 'exnName',
62 'explode', 'floor', 'foldl', 'foldr', 'getOpt', 'hd', 'ignore',
63 'implode', 'isSome', 'length', 'map', 'not', 'null', 'ord', 'print',
64 'real', 'rev', 'round', 'size', 'str', 'substring', 'tl', 'trunc',
66 'o', 'abs', 'mod', 'div'
69 -- non-symbolic identifiers only
70 local id
= (l
.alnum
+ "'" + '_')^
0
71 local aid
= l
.alpha
* id
72 local longid
= (aid
* lpeg
.P('.'))^
0 * aid
73 local identifier
= token(l
.IDENTIFIER
, l
.lower
* id
)
74 local typevar
= token(l
.VARIABLE
, "'" * id
)
75 local c
= mlword
{'true', 'false', 'nil'}
76 local const
= token(l
.CONSTANT
, l
.upper
* id
+ c
)
77 local structure
= token(l
.CLASS
, aid
* lpeg
.P('.'))
80 = token(l
.KEYWORD
, mlword
{'open', 'structure', 'functor'})
81 * ws
* token(l
.CLASS
, longid
)
84 = token(l
.KEYWORD
, lpeg
.P('structure')) * ws
85 * token(l
.CLASS
, aid
) * ws
86 * token(l
.OPERATOR
, lpeg
.P('=')) * ws
88 local struct_new
= struct_dec
* token(l
.KEYWORD
, lpeg
.P('struct'))
89 local struct_alias
= struct_dec
* token(l
.CLASS
, longid
)
91 local M
= {_NAME
= 'sml'}
97 {'struct_new', struct_new
},
98 {'struct_alias', struct_alias
},
99 {'structure', structure
},
102 {'keyword', keyword
},
105 {'operator', operator
},
106 {'typevar', typevar
},
108 {'identifier', identifier
},