1 -- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
4 local l
= require('lexer')
5 local token
, word_match
= l
.token
, l
.word_match
6 local P
, R
, S
, V
= lpeg
.P
, lpeg
.R
, lpeg
.S
, lpeg
.V
8 local M
= {_NAME
= 'perl'}
11 local ws
= token(l
.WHITESPACE
, l
.space^
1)
14 local line_comment
= '#' * l
.nonnewline_esc^
0
15 local block_comment
= l
.starts_line('=') * l
.alpha
*
16 (l
.any
- l
.newline
* '=cut')^
0 * (l
.newline
* '=cut')^
-1
17 local comment
= token(l
.COMMENT
, block_comment
+ line_comment
)
19 local delimiter_matches
= {['('] = ')', ['['] = ']', ['{'] = '}', ['<'] = '>'}
20 local literal_delimitted
= P(function(input
, index
) -- for single delimiter sets
21 local delimiter
= input
:sub(index
, index
)
22 if not delimiter
:find('%w') then -- only non alpha-numerics
24 if delimiter_matches
[delimiter
] then
25 -- Handle nested delimiter/matches in strings.
26 local s
, e
= delimiter
, delimiter_matches
[delimiter
]
27 patt
= l
.delimited_range(s
..e
, false, false, true)
29 patt
= l
.delimited_range(delimiter
)
31 match_pos
= lpeg
.match(patt
, input
, index
)
32 return match_pos
or #input
+ 1
35 local literal_delimitted2
= P(function(input
, index
) -- for 2 delimiter sets
36 local delimiter
= input
:sub(index
, index
)
37 -- Only consider non-alpha-numerics and non-spaces as delimiters. The
38 -- non-spaces are used to ignore operators like "-s".
39 if not delimiter
:find('[%w ]') then
41 if delimiter_matches
[delimiter
] then
42 -- Handle nested delimiter/matches in strings.
43 local s
, e
= delimiter
, delimiter_matches
[delimiter
]
44 patt
= l
.delimited_range(s
..e
, false, false, true)
46 patt
= l
.delimited_range(delimiter
)
48 first_match_pos
= lpeg
.match(patt
, input
, index
)
49 final_match_pos
= lpeg
.match(patt
, input
, first_match_pos
- 1)
50 if not final_match_pos
then -- using (), [], {}, or <> notation
51 final_match_pos
= lpeg
.match(l
.space^
0 * patt
, input
, first_match_pos
)
53 return final_match_pos
or #input
+ 1
58 local sq_str
= l
.delimited_range("'")
59 local dq_str
= l
.delimited_range('"')
60 local cmd_str
= l
.delimited_range('`')
61 local heredoc
= '<<' * P(function(input
, index
)
62 local s
, e
, delimiter
= input
:find('([%a_][%w_]*)[\n\r\f;]+', index
)
63 if s
== index
and delimiter
then
64 local end_heredoc
= '[\n\r\f]+'
65 local _
, e
= input
:find(end_heredoc
..delimiter
, e
)
66 return e
and e
+ 1 or #input
+ 1
69 local lit_str
= 'q' * P('q')^
-1 * literal_delimitted
70 local lit_array
= 'qw' * literal_delimitted
71 local lit_cmd
= 'qx' * literal_delimitted
72 local lit_tr
= (P('tr') + 'y') * literal_delimitted2
* S('cds')^
0
73 local regex_str
= #P('/') * l
.last_char_includes('-<>+*!~\\=%&|^?:;([{') *
74 l
.delimited_range('/', true) * S('imosx')^
0
75 local lit_regex
= 'qr' * literal_delimitted
* S('imosx')^
0
76 local lit_match
= 'm' * literal_delimitted
* S('cgimosx')^
0
77 local lit_sub
= 's' * literal_delimitted2
* S('ecgimosx')^
0
78 local string = token(l
.STRING
, sq_str
+ dq_str
+ cmd_str
+ heredoc
+ lit_str
+
79 lit_array
+ lit_cmd
+ lit_tr
) +
80 token(l
.REGEX
, regex_str
+ lit_regex
+ lit_match
+ lit_sub
)
83 local number = token(l
.NUMBER
, l
.float
+ l
.integer
)
86 local keyword
= token(l
.KEYWORD
, word_match
{
87 'STDIN', 'STDOUT', 'STDERR', 'BEGIN', 'END', 'CHECK', 'INIT',
89 'break', 'continue', 'do', 'each', 'else', 'elsif', 'foreach', 'for', 'if',
90 'last', 'local', 'my', 'next', 'our', 'package', 'return', 'sub', 'unless',
91 'until', 'while', '__FILE__', '__LINE__', '__PACKAGE__',
92 'and', 'or', 'not', 'eq', 'ne', 'lt', 'gt', 'le', 'ge'
96 local func
= token(l
.FUNCTION
, word_match({
97 'abs', 'accept', 'alarm', 'atan2', 'bind', 'binmode', 'bless', 'caller',
98 'chdir', 'chmod', 'chomp', 'chop', 'chown', 'chr', 'chroot', 'closedir',
99 'close', 'connect', 'cos', 'crypt', 'dbmclose', 'dbmopen', 'defined',
100 'delete', 'die', 'dump', 'each', 'endgrent', 'endhostent', 'endnetent',
101 'endprotoent', 'endpwent', 'endservent', 'eof', 'eval', 'exec', 'exists',
102 'exit', 'exp', 'fcntl', 'fileno', 'flock', 'fork', 'format', 'formline',
103 'getc', 'getgrent', 'getgrgid', 'getgrnam', 'gethostbyaddr', 'gethostbyname',
104 'gethostent', 'getlogin', 'getnetbyaddr', 'getnetbyname', 'getnetent',
105 'getpeername', 'getpgrp', 'getppid', 'getpriority', 'getprotobyname',
106 'getprotobynumber', 'getprotoent', 'getpwent', 'getpwnam', 'getpwuid',
107 'getservbyname', 'getservbyport', 'getservent', 'getsockname', 'getsockopt',
108 'glob', 'gmtime', 'goto', 'grep', 'hex', 'import', 'index', 'int', 'ioctl',
109 'join', 'keys', 'kill', 'lcfirst', 'lc', 'length', 'link', 'listen',
110 'localtime', 'log', 'lstat', 'map', 'mkdir', 'msgctl', 'msgget', 'msgrcv',
111 'msgsnd', 'new', 'oct', 'opendir', 'open', 'ord', 'pack', 'pipe', 'pop',
112 'pos', 'printf', 'print', 'prototype', 'push', 'quotemeta', 'rand', 'readdir',
113 'read', 'readlink', 'recv', 'redo', 'ref', 'rename', 'reset', 'reverse',
114 'rewinddir', 'rindex', 'rmdir', 'scalar', 'seekdir', 'seek', 'select',
115 'semctl', 'semget', 'semop', 'send', 'setgrent', 'sethostent', 'setnetent',
116 'setpgrp', 'setpriority', 'setprotoent', 'setpwent', 'setservent',
117 'setsockopt', 'shift', 'shmctl', 'shmget', 'shmread', 'shmwrite', 'shutdown',
118 'sin', 'sleep', 'socket', 'socketpair', 'sort', 'splice', 'split', 'sprintf',
119 'sqrt', 'srand', 'stat', 'study', 'substr', 'symlink', 'syscall', 'sysread',
120 'sysseek', 'system', 'syswrite', 'telldir', 'tell', 'tied', 'tie', 'time',
121 'times', 'truncate', 'ucfirst', 'uc', 'umask', 'undef', 'unlink', 'unpack',
122 'unshift', 'untie', 'utime', 'values', 'vec', 'wait', 'waitpid', 'wantarray',
127 local identifier
= token(l
.IDENTIFIER
, l
.word
)
130 local special_var
= '$' * ('^' * S('ADEFHILMOPSTWX')^
-1 +
131 S('\\"[]\'&`+*.,;=%~?@<>(|/!-') +
132 ':' * (l
.any
- ':') + P('$') * -l
.word
+ l
.digit^
1)
133 local plain_var
= ('$#' + S('$@%')) * P('$')^
0 * l
.word
+ '$#'
134 local variable
= token(l
.VARIABLE
, special_var
+ plain_var
)
137 local operator
= token(l
.OPERATOR
, S('-<>+*!~\\=/%&|^.?:;()[]{}'))
140 local marker
= token(l
.COMMENT
, word_match
{'__DATA__', '__END__'} * l
.any^
0)
144 {'keyword', keyword
},
148 {'identifier', identifier
},
149 {'comment', comment
},
151 {'variable', variable
},
152 {'operator', operator
},
156 _patterns
= {'[%[%]{}]', '#'},
157 [l
.OPERATOR
] = {['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1},
158 [l
.COMMENT
] = {['#'] = l
.fold_line_comments('#')}