build: set version to 0.5
[vis.git] / lua / lexers / crystal.lua
blob5195387f71199cb9dc0d62b88ac8ca82ff2efb08
1 -- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
2 -- Copyright 2017 Michel Martens.
3 -- Crystal LPeg lexer (based on Ruby).
5 local l = require('lexer')
6 local token, word_match = l.token, l.word_match
7 local P, R, S = lpeg.P, lpeg.R, lpeg.S
9 local M = {_NAME = 'crystal'}
11 -- Whitespace.
12 local ws = token(l.WHITESPACE, l.space^1)
14 -- Comments.
15 local line_comment = '#' * l.nonnewline_esc^0
16 local comment = token(l.COMMENT, line_comment)
18 local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}'}
19 local literal_delimitted = P(function(input, index)
20 local delimiter = input:sub(index, index)
21 if not delimiter:find('[%w\r\n\f\t ]') then -- only non alpha-numerics
22 local match_pos, patt
23 if delimiter_matches[delimiter] then
24 -- Handle nested delimiter/matches in strings.
25 local s, e = delimiter, delimiter_matches[delimiter]
26 patt = l.delimited_range(s..e, false, false, true)
27 else
28 patt = l.delimited_range(delimiter)
29 end
30 match_pos = lpeg.match(patt, input, index)
31 return match_pos or #input + 1
32 end
33 end)
35 -- Strings.
36 local cmd_str = l.delimited_range('`')
37 local sq_str = l.delimited_range("'")
38 local dq_str = l.delimited_range('"')
39 local heredoc = '<<' * P(function(input, index)
40 local s, e, indented, _, delimiter =
41 input:find('(%-?)(["`]?)([%a_][%w_]*)%2[\n\r\f;]+', index)
42 if s == index and delimiter then
43 local end_heredoc = (#indented > 0 and '[\n\r\f]+ *' or '[\n\r\f]+')
44 local _, e = input:find(end_heredoc..delimiter, e)
45 return e and e + 1 or #input + 1
46 end
47 end)
48 -- TODO: regex_str fails with `obj.method /patt/` syntax.
49 local regex_str = #P('/') * l.last_char_includes('!%^&*([{-=+|:;,?<>~') *
50 l.delimited_range('/', true, false) * S('iomx')^0
51 local string = token(l.STRING, (sq_str + dq_str + heredoc + cmd_str) *
52 S('f')^-1) +
53 token(l.REGEX, regex_str)
55 local word_char = l.alnum + S('_!?')
57 -- Numbers.
58 local dec = l.digit^1 * ('_' * l.digit^1)^0 * S('ri')^-1
59 local bin = '0b' * S('01')^1 * ('_' * S('01')^1)^0
60 local integer = S('+-')^-1 * (bin + l.hex_num + l.oct_num + dec)
61 -- TODO: meta, control, etc. for numeric_literal.
62 local numeric_literal = '?' * (l.any - l.space) * -word_char
63 local number = token(l.NUMBER, l.float * S('ri')^-1 + integer + numeric_literal)
65 -- Keywords.
66 local keyword = token(l.KEYWORD, word_match({
67 'alias', 'begin', 'break', 'case', 'class', 'def', 'defined?', 'do', 'else',
68 'elsif', 'end', 'ensure', 'false', 'for', 'if', 'in', 'module', 'next', 'nil',
69 'not', 'redo', 'rescue', 'retry', 'return', 'self', 'super', 'then', 'true',
70 'undef', 'unless', 'until', 'when', 'while', 'yield', '__FILE__', '__LINE__'
71 }, '?!'))
73 -- Functions.
74 local func = token(l.FUNCTION, word_match({
75 'abort', 'at_exit', 'caller', 'delay', 'exit', 'fork', 'future',
76 'get_stack_top', 'gets', 'lazy', 'loop', 'main', 'p', 'print', 'printf',
77 'puts', 'raise', 'rand', 'read_line', 'require', 'sleep', 'spawn', 'sprintf',
78 'system', 'with_color',
79 -- Macros
80 'assert_responds_to', 'debugger', 'parallel', 'pp', 'record', 'redefine_main'
81 }, '?!')) * -S('.:|')
83 -- Identifiers.
84 local word = (l.alpha + '_') * word_char^0
85 local identifier = token(l.IDENTIFIER, word)
87 -- Variables.
88 local global_var = '$' * (word + S('!@L+`\'=~/\\,.;<>_*"$?:') + l.digit + '-' *
89 S('0FadiIKlpvw'))
90 local class_var = '@@' * word
91 local inst_var = '@' * word
92 local variable = token(l.VARIABLE, global_var + class_var + inst_var)
94 -- Symbols.
95 local symbol = token('symbol', ':' * P(function(input, index)
96 if input:sub(index - 2, index - 2) ~= ':' then return index end
97 end) * (word_char^1 + sq_str + dq_str))
99 -- Operators.
100 local operator = token(l.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~'))
102 M._rules = {
103 {'whitespace', ws},
104 {'keyword', keyword},
105 {'function', func},
106 {'identifier', identifier},
107 {'comment', comment},
108 {'string', string},
109 {'number', number},
110 {'variable', variable},
111 {'symbol', symbol},
112 {'operator', operator},
115 M._tokenstyles = {
116 symbol = l.STYLE_CONSTANT
119 local function disambiguate(text, pos, line, s)
120 return line:sub(1, s - 1):match('^%s*$') and
121 not text:sub(1, pos - 1):match('\\[ \t]*\r?\n$') and 1 or 0
124 M._foldsymbols = {
125 _patterns = {'%l+', '[%(%)%[%]{}]', '#'},
126 [l.KEYWORD] = {
127 begin = 1, class = 1, def = 1, ['do'] = 1, ['for'] = 1, ['module'] = 1,
128 case = 1,
129 ['if'] = disambiguate, ['while'] = disambiguate,
130 ['unless'] = disambiguate, ['until'] = disambiguate,
131 ['end'] = -1
133 [l.OPERATOR] = {
134 ['('] = 1, [')'] = -1, ['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1
136 [l.COMMENT] = {
137 ['#'] = l.fold_line_comments('#')
141 return M