1 -- Copyright 2006-2016 Mitchell mitchell.att.foicica.com. See LICENSE.
4 local l
= require('lexer')
5 local token
, word_match
= l
.token
, l
.word_match
6 local P
, R
, S
, V
= lpeg
.P
, lpeg
.R
, lpeg
.S
, lpeg
.V
8 local M
= {_NAME
= 'xml'}
11 local ws
= token(l
.WHITESPACE
, l
.space^
1)
13 -- Comments and CDATA.
14 local comment
= token(l
.COMMENT
, '<!--' * (l
.any
- '-->')^
0 * P('-->')^
-1)
15 local cdata
= token('cdata', '<![CDATA[' * (l
.any
- ']]>')^
0 * P(']]>')^
-1)
18 local sq_str
= l
.delimited_range("'", false, true)
19 local dq_str
= l
.delimited_range('"', false, true)
20 local string = #S('\'"') * l
.last_char_includes('=') *
21 token(l
.STRING
, sq_str
+ dq_str
)
23 local in_tag
= P(function(input
, index
)
24 local before
= input
:sub(1, index
- 1)
25 local s
, e
= before
:find('<[^>]-$'), before
:find('>[^<]-$')
26 if s
and e
then return s
> e
and index
or nil end
27 if s
then return index
end
28 return input
:find('^[^<]->', index
) and index
or nil
32 local number = #l
.digit
* l
.last_char_includes('=') *
33 token(l
.NUMBER
, l
.digit^
1 * P('%')^
-1) * in_tag
35 local alpha
= R('az', 'AZ', '\127\255')
36 local word_char
= l
.alnum
+ S('_-:.??')
37 local identifier
= (l
.alpha
+ S('_-:.??')) * word_char^
0
38 local namespace
= token(l
.OPERATOR
, ':') * token('namespace', identifier
)
41 local element
= token('element', '<' * P('/')^
-1 * identifier
) * namespace^
-1
44 local attribute
= token('attribute', identifier
) * namespace^
-1 *
48 local close_tag
= token('element', P('/')^
-1 * '>')
51 local equals
= token(l
.OPERATOR
, '=') * in_tag
54 local entity
= token('entity', '&' * word_match
{
55 'lt', 'gt', 'amp', 'apos', 'quot'
58 -- Doctypes and other markup tags.
59 local doctype
= token('doctype', P('<!DOCTYPE')) * ws
*
60 token('doctype', identifier
) * (ws
* identifier
)^
-1 *
61 (1 - P('>'))^
0 * token('doctype', '>')
63 -- Processing instructions.
64 local proc_insn
= token('proc_insn', P('<?') * (1 - P('?>'))^
0 * P('?>')^
-1)
71 {'proc_insn', proc_insn
},
73 {'close_tag', close_tag
},
74 {'attribute', attribute
},
82 element
= l
.STYLE_KEYWORD
,
83 namespace
= l
.STYLE_CLASS
,
84 attribute
= l
.STYLE_TYPE
,
85 cdata
= l
.STYLE_COMMENT
,
86 entity
= l
.STYLE_OPERATOR
,
87 doctype
= l
.STYLE_COMMENT
,
88 proc_insn
= l
.STYLE_COMMENT
,
89 --markup = l.STYLE_COMMENT
93 _patterns
= {'</?', '/>', '<!%-%-', '%-%->', '<!%[CDATA%[', '%]%]>'},
94 element
= {['<'] = 1, ['/>'] = -1, ['</'] = -1},
95 [l
.COMMENT
] = {['<!--'] = 1, ['-->'] = -1},
96 cdata
= {['<![CDATA['] = 1, [']]>'] = -1}