build: set version to 0.5
[vis.git] / lua / lexers / sml.lua
blob093e67cfa25a0d42ff30909ae06c7ca1b0362acb
1 -- Copyright 2017 Murray Calavera. See LICENSE.
2 -- Standard ML LPeg lexer.
4 local l = require('lexer')
5 local token = l.token
7 local function mlword(words)
8 return l.word_match(words, "'")
9 end
11 local ws = token(l.WHITESPACE, l.space^1)
13 -- single line comments are valid in successor ml
14 local cl = '(*)' * l.nonnewline^0
15 local comment = token(l.COMMENT, cl + l.nested_pair('(*', '*)'))
17 local string = token(l.STRING, lpeg.P('#')^-1 * l.delimited_range('"', true))
19 local function num(digit)
20 return digit * (digit^0 * lpeg.P('_'))^0 * digit^1 + digit
21 end
23 local int = num(l.digit)
24 local frac = lpeg.P('.') * int
25 local minus = lpeg.P('~')^-1
26 local exp = lpeg.S('eE') * minus * int
27 local real = int * frac^-1 * exp + int * frac * exp^-1
28 local hex = num(l.xdigit)
29 local bin = num(lpeg.S('01'))
31 local number = token(l.NUMBER,
32 lpeg.P('0w') * int
33 + (lpeg.P('0wx') + lpeg.P('0xw')) * hex
34 + (lpeg.P('0wb') + lpeg.P('0bw')) * bin
35 + minus * lpeg.P('0x') * hex
36 + minus * lpeg.P('0b') * bin
37 + minus * real
38 + minus * int
41 local keyword = token(l.KEYWORD, mlword{
42 'abstype', 'and', 'andalso', 'as', 'case', 'do', 'datatype', 'else', 'end',
43 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix', 'infixr', 'let',
44 'local', 'nonfix', 'of', 'op', 'orelse', 'raise', 'rec', 'then',
45 'type', 'val', 'with', 'withtype', 'while',
47 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature',
48 'struct', 'structure'
51 -- includes valid symbols for identifiers
52 local operator = token(l.OPERATOR, lpeg.S('!*/+-^:@=<>()[]{},;._|#%&$?~`\\'))
54 local type = token(l.TYPE, mlword{
55 'int', 'real', 'word', 'bool', 'char', 'string', 'unit',
56 'array', 'exn', 'list', 'option', 'order', 'ref', 'substring', 'vector'
59 -- `real`, `vector` and `substring` are a problem
60 local func = token(l.FUNCTION, mlword{
61 'app', 'before', 'ceil', 'chr', 'concat', 'exnMessage', 'exnName',
62 'explode', 'floor', 'foldl', 'foldr', 'getOpt', 'hd', 'ignore',
63 'implode', 'isSome', 'length', 'map', 'not', 'null', 'ord', 'print',
64 'real', 'rev', 'round', 'size', 'str', 'substring', 'tl', 'trunc',
65 'valOf', 'vector',
66 'o', 'abs', 'mod', 'div'
69 -- non-symbolic identifiers only
70 local id = (l.alnum + "'" + '_')^0
71 local aid = l.alpha * id
72 local longid = (aid * lpeg.P('.'))^0 * aid
73 local identifier = token(l.IDENTIFIER, l.lower * id)
74 local typevar = token(l.VARIABLE, "'" * id)
75 local c = mlword{'true', 'false', 'nil'}
76 local const = token(l.CONSTANT, l.upper * id + c)
77 local structure = token(l.CLASS, aid * lpeg.P('.'))
79 local open
80 = token(l.KEYWORD, mlword{'open', 'structure', 'functor'})
81 * ws * token(l.CLASS, longid)
83 local struct_dec
84 = token(l.KEYWORD, lpeg.P('structure')) * ws
85 * token(l.CLASS, aid) * ws
86 * token(l.OPERATOR, lpeg.P('=')) * ws
88 local struct_new = struct_dec * token(l.KEYWORD, lpeg.P('struct'))
89 local struct_alias = struct_dec * token(l.CLASS, longid)
91 local M = {_NAME = 'sml'}
93 M._rules = {
94 {'whitespace', ws},
95 {'comment', comment},
96 {'number', number},
97 {'struct_new', struct_new},
98 {'struct_alias', struct_alias},
99 {'structure', structure},
100 {'open', open},
101 {'type', type},
102 {'keyword', keyword},
103 {'function', func},
104 {'string', string},
105 {'operator', operator},
106 {'typevar', typevar},
107 {'constant', const},
108 {'identifier', identifier},
111 return M