1 -- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
4 local l
= require('lexer')
5 local token
, word_match
= l
.token
, l
.word_match
6 local P
, R
, S
= lpeg
.P
, lpeg
.R
, lpeg
.S
8 local M
= {_NAME
= 'ruby'}
11 local ws
= token(l
.WHITESPACE
, l
.space^
1)
14 local line_comment
= '#' * l
.nonnewline_esc^
0
15 local block_comment
= l
.starts_line('=begin') * (l
.any
- l
.newline
* '=end')^
0 *
16 (l
.newline
* '=end')^
-1
17 local comment
= token(l
.COMMENT
, block_comment
+ line_comment
)
19 local delimiter_matches
= {['('] = ')', ['['] = ']', ['{'] = '}'}
20 local literal_delimitted
= P(function(input
, index
)
21 local delimiter
= input
:sub(index
, index
)
22 if not delimiter
:find('[%w\r\n\f\t ]') then -- only non alpha-numerics
24 if delimiter_matches
[delimiter
] then
25 -- Handle nested delimiter/matches in strings.
26 local s
, e
= delimiter
, delimiter_matches
[delimiter
]
27 patt
= l
.delimited_range(s
..e
, false, false, true)
29 patt
= l
.delimited_range(delimiter
)
31 match_pos
= lpeg
.match(patt
, input
, index
)
32 return match_pos
or #input
+ 1
37 local cmd_str
= l
.delimited_range('`')
38 local lit_cmd
= '%x' * literal_delimitted
39 local lit_array
= '%w' * literal_delimitted
40 local sq_str
= l
.delimited_range("'")
41 local dq_str
= l
.delimited_range('"')
42 local lit_str
= '%' * S('qQ')^
-1 * literal_delimitted
43 local heredoc
= '<<' * P(function(input
, index
)
44 local s
, e
, indented
, _
, delimiter
=
45 input
:find('(%-?)(["`]?)([%a_][%w_]*)%2[\n\r\f;]+', index
)
46 if s
== index
and delimiter
then
47 local end_heredoc
= (#indented
> 0 and '[\n\r\f]+ *' or '[\n\r\f]+')
48 local _
, e
= input
:find(end_heredoc
..delimiter
, e
)
49 return e
and e
+ 1 or #input
+ 1
52 -- TODO: regex_str fails with `obj.method /patt/` syntax.
53 local regex_str
= #P('/') * l
.last_char_includes('!%^&*([{-=+|:;,?<>~') *
54 l
.delimited_range('/', true, false) * S('iomx')^
0
55 local lit_regex
= '%r' * literal_delimitted
* S('iomx')^
0
56 local string = token(l
.STRING
, (sq_str
+ dq_str
+ lit_str
+ heredoc
+ cmd_str
+
57 lit_cmd
+ lit_array
) * S('f')^
-1) +
58 token(l
.REGEX
, regex_str
+ lit_regex
)
60 local word_char
= l
.alnum
+ S('_!?')
63 local dec
= l
.digit^
1 * ('_' * l
.digit^
1)^
0 * S('ri')^
-1
64 local bin
= '0b' * S('01')^
1 * ('_' * S('01')^
1)^
0
65 local integer
= S('+-')^
-1 * (bin
+ l
.hex_num
+ l
.oct_num
+ dec
)
66 -- TODO: meta, control, etc. for numeric_literal.
67 local numeric_literal
= '?' * (l
.any
- l
.space
) * -word_char
68 local number = token(l
.NUMBER
, l
.float
* S('ri')^
-1 + integer
+ numeric_literal
)
71 local keyword
= token(l
.KEYWORD
, word_match({
72 'BEGIN', 'END', 'alias', 'and', 'begin', 'break', 'case', 'class', 'def',
73 'defined?', 'do', 'else', 'elsif', 'end', 'ensure', 'false', 'for', 'if',
74 'in', 'module', 'next', 'nil', 'not', 'or', 'redo', 'rescue', 'retry',
75 'return', 'self', 'super', 'then', 'true', 'undef', 'unless', 'until', 'when',
76 'while', 'yield', '__FILE__', '__LINE__'
80 local func
= token(l
.FUNCTION
, word_match({
81 'at_exit', 'autoload', 'binding', 'caller', 'catch', 'chop', 'chop!', 'chomp',
82 'chomp!', 'eval', 'exec', 'exit', 'exit!', 'fail', 'fork', 'format', 'gets',
83 'global_variables', 'gsub', 'gsub!', 'iterator?', 'lambda', 'load',
84 'local_variables', 'loop', 'open', 'p', 'print', 'printf', 'proc', 'putc',
85 'puts', 'raise', 'rand', 'readline', 'readlines', 'require', 'select',
86 'sleep', 'split', 'sprintf', 'srand', 'sub', 'sub!', 'syscall', 'system',
87 'test', 'trace_var', 'trap', 'untrace_var'
91 local word
= (l
.alpha
+ '_') * word_char^
0
92 local identifier
= token(l
.IDENTIFIER
, word
)
95 local global_var
= '$' * (word
+ S('!@L+`\'=~/\\,.;<>_*"$?:') + l
.digit
+ '-' *
97 local class_var
= '@@' * word
98 local inst_var
= '@' * word
99 local variable
= token(l
.VARIABLE
, global_var
+ class_var
+ inst_var
)
102 local symbol
= token('symbol', ':' * P(function(input
, index
)
103 if input
:sub(index
- 2, index
- 2) ~= ':' then return index
end
104 end) * (word_char^
1 + sq_str
+ dq_str
))
107 local operator
= token(l
.OPERATOR
, S('!%^&*()[]{}-=+/|:;.,?<>~'))
111 {'keyword', keyword
},
113 {'identifier', identifier
},
114 {'comment', comment
},
117 {'variable', variable
},
119 {'operator', operator
},
123 symbol
= l
.STYLE_CONSTANT
126 local function disambiguate(text
, pos
, line
, s
)
127 return line
:sub(1, s
- 1):match('^%s*$') and
128 not text
:sub(1, pos
- 1):match('\\[ \t]*\r?\n$') and 1 or 0
132 _patterns
= {'%l+', '[%(%)%[%]{}]', '=begin', '=end', '#'},
134 begin
= 1, class
= 1, def
= 1, ['do'] = 1, ['for'] = 1, ['module'] = 1,
136 ['if'] = disambiguate
, ['while'] = disambiguate
,
137 ['unless'] = disambiguate
, ['until'] = disambiguate
,
141 ['('] = 1, [')'] = -1, ['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1
144 ['=begin'] = 1, ['=end'] = -1, ['#'] = l
.fold_line_comments('#')