vis: implement :set cursorline
[vis.git] / lexers / ruby.lua
blob910b906d64d6616b79bdb0c20705003f76d50155
1 -- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
2 -- Ruby LPeg lexer.
4 local l = require('lexer')
5 local token, word_match = l.token, l.word_match
6 local P, R, S = lpeg.P, lpeg.R, lpeg.S
8 local M = {_NAME = 'ruby'}
10 -- Whitespace.
11 local ws = token(l.WHITESPACE, l.space^1)
13 -- Comments.
14 local line_comment = '#' * l.nonnewline_esc^0
15 local block_comment = l.starts_line('=begin') * (l.any - l.newline * '=end')^0 *
16 (l.newline * '=end')^-1
17 local comment = token(l.COMMENT, block_comment + line_comment)
19 local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}'}
20 local literal_delimitted = P(function(input, index)
21 local delimiter = input:sub(index, index)
22 if not delimiter:find('[%w\r\n\f\t ]') then -- only non alpha-numerics
23 local match_pos, patt
24 if delimiter_matches[delimiter] then
25 -- Handle nested delimiter/matches in strings.
26 local s, e = delimiter, delimiter_matches[delimiter]
27 patt = l.delimited_range(s..e, false, false, true)
28 else
29 patt = l.delimited_range(delimiter)
30 end
31 match_pos = lpeg.match(patt, input, index)
32 return match_pos or #input + 1
33 end
34 end)
36 -- Strings.
37 local cmd_str = l.delimited_range('`')
38 local lit_cmd = '%x' * literal_delimitted
39 local lit_array = '%w' * literal_delimitted
40 local sq_str = l.delimited_range("'")
41 local dq_str = l.delimited_range('"')
42 local lit_str = '%' * S('qQ')^-1 * literal_delimitted
43 local heredoc = '<<' * P(function(input, index)
44 local s, e, indented, _, delimiter =
45 input:find('(%-?)(["`]?)([%a_][%w_]*)%2[\n\r\f;]+', index)
46 if s == index and delimiter then
47 local end_heredoc = (#indented > 0 and '[\n\r\f]+ *' or '[\n\r\f]+')
48 local _, e = input:find(end_heredoc..delimiter, e)
49 return e and e + 1 or #input + 1
50 end
51 end)
52 -- TODO: regex_str fails with `obj.method /patt/` syntax.
53 local regex_str = #P('/') * l.last_char_includes('!%^&*([{-=+|:;,?<>~') *
54 l.delimited_range('/', true, false) * S('iomx')^0
55 local lit_regex = '%r' * literal_delimitted * S('iomx')^0
56 local string = token(l.STRING, (sq_str + dq_str + lit_str + heredoc + cmd_str +
57 lit_cmd + lit_array) * S('f')^-1) +
58 token(l.REGEX, regex_str + lit_regex)
60 local word_char = l.alnum + S('_!?')
62 -- Numbers.
63 local dec = l.digit^1 * ('_' * l.digit^1)^0 * S('ri')^-1
64 local bin = '0b' * S('01')^1 * ('_' * S('01')^1)^0
65 local integer = S('+-')^-1 * (bin + l.hex_num + l.oct_num + dec)
66 -- TODO: meta, control, etc. for numeric_literal.
67 local numeric_literal = '?' * (l.any - l.space) * -word_char
68 local number = token(l.NUMBER, l.float * S('ri')^-1 + integer + numeric_literal)
70 -- Keywords.
71 local keyword = token(l.KEYWORD, word_match({
72 'BEGIN', 'END', 'alias', 'and', 'begin', 'break', 'case', 'class', 'def',
73 'defined?', 'do', 'else', 'elsif', 'end', 'ensure', 'false', 'for', 'if',
74 'in', 'module', 'next', 'nil', 'not', 'or', 'redo', 'rescue', 'retry',
75 'return', 'self', 'super', 'then', 'true', 'undef', 'unless', 'until', 'when',
76 'while', 'yield', '__FILE__', '__LINE__'
77 }, '?!'))
79 -- Functions.
80 local func = token(l.FUNCTION, word_match({
81 'at_exit', 'autoload', 'binding', 'caller', 'catch', 'chop', 'chop!', 'chomp',
82 'chomp!', 'eval', 'exec', 'exit', 'exit!', 'fail', 'fork', 'format', 'gets',
83 'global_variables', 'gsub', 'gsub!', 'iterator?', 'lambda', 'load',
84 'local_variables', 'loop', 'open', 'p', 'print', 'printf', 'proc', 'putc',
85 'puts', 'raise', 'rand', 'readline', 'readlines', 'require', 'select',
86 'sleep', 'split', 'sprintf', 'srand', 'sub', 'sub!', 'syscall', 'system',
87 'test', 'trace_var', 'trap', 'untrace_var'
88 }, '?!')) * -S('.:|')
90 -- Identifiers.
91 local word = (l.alpha + '_') * word_char^0
92 local identifier = token(l.IDENTIFIER, word)
94 -- Variables.
95 local global_var = '$' * (word + S('!@L+`\'=~/\\,.;<>_*"$?:') + l.digit + '-' *
96 S('0FadiIKlpvw'))
97 local class_var = '@@' * word
98 local inst_var = '@' * word
99 local variable = token(l.VARIABLE, global_var + class_var + inst_var)
101 -- Symbols.
102 local symbol = token('symbol', ':' * P(function(input, index)
103 if input:sub(index - 2, index - 2) ~= ':' then return index end
104 end) * (word_char^1 + sq_str + dq_str))
106 -- Operators.
107 local operator = token(l.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~'))
109 M._rules = {
110 {'whitespace', ws},
111 {'keyword', keyword},
112 {'function', func},
113 {'identifier', identifier},
114 {'comment', comment},
115 {'string', string},
116 {'number', number},
117 {'variable', variable},
118 {'symbol', symbol},
119 {'operator', operator},
122 M._tokenstyles = {
123 symbol = l.STYLE_CONSTANT
126 local function disambiguate(text, pos, line, s)
127 return line:sub(1, s - 1):match('^%s*$') and
128 not text:sub(1, pos - 1):match('\\[ \t]*\r?\n$') and 1 or 0
131 M._foldsymbols = {
132 _patterns = {'%l+', '[%(%)%[%]{}]', '=begin', '=end', '#'},
133 [l.KEYWORD] = {
134 begin = 1, class = 1, def = 1, ['do'] = 1, ['for'] = 1, ['module'] = 1,
135 case = 1,
136 ['if'] = disambiguate, ['while'] = disambiguate,
137 ['unless'] = disambiguate, ['until'] = disambiguate,
138 ['end'] = -1
140 [l.OPERATOR] = {
141 ['('] = 1, [')'] = -1, ['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1
143 [l.COMMENT] = {
144 ['=begin'] = 1, ['=end'] = -1, ['#'] = l.fold_line_comments('#')
148 return M