vis: implement :set cursorline
[vis.git] / lexers / perl.lua
bloba80248c572f6ea814becf99df6a858770dcaa65f
1 -- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
2 -- Perl LPeg lexer.
4 local l = require('lexer')
5 local token, word_match = l.token, l.word_match
6 local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
8 local M = {_NAME = 'perl'}
10 -- Whitespace.
11 local ws = token(l.WHITESPACE, l.space^1)
13 -- Comments.
14 local line_comment = '#' * l.nonnewline_esc^0
15 local block_comment = l.starts_line('=') * l.alpha *
16 (l.any - l.newline * '=cut')^0 * (l.newline * '=cut')^-1
17 local comment = token(l.COMMENT, block_comment + line_comment)
19 local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}', ['<'] = '>'}
20 local literal_delimitted = P(function(input, index) -- for single delimiter sets
21 local delimiter = input:sub(index, index)
22 if not delimiter:find('%w') then -- only non alpha-numerics
23 local match_pos, patt
24 if delimiter_matches[delimiter] then
25 -- Handle nested delimiter/matches in strings.
26 local s, e = delimiter, delimiter_matches[delimiter]
27 patt = l.delimited_range(s..e, false, false, true)
28 else
29 patt = l.delimited_range(delimiter)
30 end
31 match_pos = lpeg.match(patt, input, index)
32 return match_pos or #input + 1
33 end
34 end)
35 local literal_delimitted2 = P(function(input, index) -- for 2 delimiter sets
36 local delimiter = input:sub(index, index)
37 -- Only consider non-alpha-numerics and non-spaces as delimiters. The
38 -- non-spaces are used to ignore operators like "-s".
39 if not delimiter:find('[%w ]') then
40 local match_pos, patt
41 if delimiter_matches[delimiter] then
42 -- Handle nested delimiter/matches in strings.
43 local s, e = delimiter, delimiter_matches[delimiter]
44 patt = l.delimited_range(s..e, false, false, true)
45 else
46 patt = l.delimited_range(delimiter)
47 end
48 first_match_pos = lpeg.match(patt, input, index)
49 final_match_pos = lpeg.match(patt, input, first_match_pos - 1)
50 if not final_match_pos then -- using (), [], {}, or <> notation
51 final_match_pos = lpeg.match(l.space^0 * patt, input, first_match_pos)
52 end
53 return final_match_pos or #input + 1
54 end
55 end)
57 -- Strings.
58 local sq_str = l.delimited_range("'")
59 local dq_str = l.delimited_range('"')
60 local cmd_str = l.delimited_range('`')
61 local heredoc = '<<' * P(function(input, index)
62 local s, e, delimiter = input:find('([%a_][%w_]*)[\n\r\f;]+', index)
63 if s == index and delimiter then
64 local end_heredoc = '[\n\r\f]+'
65 local _, e = input:find(end_heredoc..delimiter, e)
66 return e and e + 1 or #input + 1
67 end
68 end)
69 local lit_str = 'q' * P('q')^-1 * literal_delimitted
70 local lit_array = 'qw' * literal_delimitted
71 local lit_cmd = 'qx' * literal_delimitted
72 local lit_tr = (P('tr') + 'y') * literal_delimitted2 * S('cds')^0
73 local regex_str = #P('/') * l.last_char_includes('-<>+*!~\\=%&|^?:;([{') *
74 l.delimited_range('/', true) * S('imosx')^0
75 local lit_regex = 'qr' * literal_delimitted * S('imosx')^0
76 local lit_match = 'm' * literal_delimitted * S('cgimosx')^0
77 local lit_sub = 's' * literal_delimitted2 * S('ecgimosx')^0
78 local string = token(l.STRING, sq_str + dq_str + cmd_str + heredoc + lit_str +
79 lit_array + lit_cmd + lit_tr) +
80 token(l.REGEX, regex_str + lit_regex + lit_match + lit_sub)
82 -- Numbers.
83 local number = token(l.NUMBER, l.float + l.integer)
85 -- Keywords.
86 local keyword = token(l.KEYWORD, word_match{
87 'STDIN', 'STDOUT', 'STDERR', 'BEGIN', 'END', 'CHECK', 'INIT',
88 'require', 'use',
89 'break', 'continue', 'do', 'each', 'else', 'elsif', 'foreach', 'for', 'if',
90 'last', 'local', 'my', 'next', 'our', 'package', 'return', 'sub', 'unless',
91 'until', 'while', '__FILE__', '__LINE__', '__PACKAGE__',
92 'and', 'or', 'not', 'eq', 'ne', 'lt', 'gt', 'le', 'ge'
95 -- Functions.
96 local func = token(l.FUNCTION, word_match({
97 'abs', 'accept', 'alarm', 'atan2', 'bind', 'binmode', 'bless', 'caller',
98 'chdir', 'chmod', 'chomp', 'chop', 'chown', 'chr', 'chroot', 'closedir',
99 'close', 'connect', 'cos', 'crypt', 'dbmclose', 'dbmopen', 'defined',
100 'delete', 'die', 'dump', 'each', 'endgrent', 'endhostent', 'endnetent',
101 'endprotoent', 'endpwent', 'endservent', 'eof', 'eval', 'exec', 'exists',
102 'exit', 'exp', 'fcntl', 'fileno', 'flock', 'fork', 'format', 'formline',
103 'getc', 'getgrent', 'getgrgid', 'getgrnam', 'gethostbyaddr', 'gethostbyname',
104 'gethostent', 'getlogin', 'getnetbyaddr', 'getnetbyname', 'getnetent',
105 'getpeername', 'getpgrp', 'getppid', 'getpriority', 'getprotobyname',
106 'getprotobynumber', 'getprotoent', 'getpwent', 'getpwnam', 'getpwuid',
107 'getservbyname', 'getservbyport', 'getservent', 'getsockname', 'getsockopt',
108 'glob', 'gmtime', 'goto', 'grep', 'hex', 'import', 'index', 'int', 'ioctl',
109 'join', 'keys', 'kill', 'lcfirst', 'lc', 'length', 'link', 'listen',
110 'localtime', 'log', 'lstat', 'map', 'mkdir', 'msgctl', 'msgget', 'msgrcv',
111 'msgsnd', 'new', 'oct', 'opendir', 'open', 'ord', 'pack', 'pipe', 'pop',
112 'pos', 'printf', 'print', 'prototype', 'push', 'quotemeta', 'rand', 'readdir',
113 'read', 'readlink', 'recv', 'redo', 'ref', 'rename', 'reset', 'reverse',
114 'rewinddir', 'rindex', 'rmdir', 'scalar', 'seekdir', 'seek', 'select',
115 'semctl', 'semget', 'semop', 'send', 'setgrent', 'sethostent', 'setnetent',
116 'setpgrp', 'setpriority', 'setprotoent', 'setpwent', 'setservent',
117 'setsockopt', 'shift', 'shmctl', 'shmget', 'shmread', 'shmwrite', 'shutdown',
118 'sin', 'sleep', 'socket', 'socketpair', 'sort', 'splice', 'split', 'sprintf',
119 'sqrt', 'srand', 'stat', 'study', 'substr', 'symlink', 'syscall', 'sysread',
120 'sysseek', 'system', 'syswrite', 'telldir', 'tell', 'tied', 'tie', 'time',
121 'times', 'truncate', 'ucfirst', 'uc', 'umask', 'undef', 'unlink', 'unpack',
122 'unshift', 'untie', 'utime', 'values', 'vec', 'wait', 'waitpid', 'wantarray',
123 'warn', 'write'
124 }, '2'))
126 -- Identifiers.
127 local identifier = token(l.IDENTIFIER, l.word)
129 -- Variables.
130 local special_var = '$' * ('^' * S('ADEFHILMOPSTWX')^-1 +
131 S('\\"[]\'&`+*.,;=%~?@<>(|/!-') +
132 ':' * (l.any - ':') + P('$') * -l.word + l.digit^1)
133 local plain_var = ('$#' + S('$@%')) * P('$')^0 * l.word + '$#'
134 local variable = token(l.VARIABLE, special_var + plain_var)
136 -- Operators.
137 local operator = token(l.OPERATOR, S('-<>+*!~\\=/%&|^.?:;()[]{}'))
139 -- Markers.
140 local marker = token(l.COMMENT, word_match{'__DATA__', '__END__'} * l.any^0)
142 M._rules = {
143 {'whitespace', ws},
144 {'keyword', keyword},
145 {'marker', marker},
146 {'function', func},
147 {'string', string},
148 {'identifier', identifier},
149 {'comment', comment},
150 {'number', number},
151 {'variable', variable},
152 {'operator', operator},
155 M._foldsymbols = {
156 _patterns = {'[%[%]{}]', '#'},
157 [l.OPERATOR] = {['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1},
158 [l.COMMENT] = {['#'] = l.fold_line_comments('#')}
161 return M