3 * Copyright (C) 2015 Florian Brosch
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 * Florian Brosch <flo.brosch@gmail.com>
27 * A cheap scanner used to highlight XML.
29 public class Valadoc
.Highlighter
.XmlScanner
: Object
, Scanner
{
30 private Queue
<CodeToken
> token_queue
= new Queue
<CodeToken
> ();
31 private unowned
string content
;
32 private unowned
string pos
;
35 public XmlScanner (string content
) {
36 this
.content
= content
;
40 public CodeToken
next () {
41 if (!token_queue
.is_empty ()) {
42 return token_queue
.pop_head ();
47 for (start
= pos
; pos
[0] != '\0'; pos
= pos
.next_char ()) {
49 unowned
string begin
= pos
;
50 if (queue_escape ()) {
51 return dispatch (start
, begin
);
53 } else if (pos
[0] == '<') {
55 unowned
string end
= pos
;
56 if (queue_end_element ()) {
57 return dispatch (start
, end
);
59 } else if (pos
[1] == '!' && pos
[2] == '-' && pos
[3] == '-') {
60 unowned
string end
= pos
;
61 if (queue_comment ()) {
62 return dispatch (start
, end
);
64 } else if (pos
[1] == '!' && pos
[2] == '[' && pos
[3] == 'C' && pos
[4] == 'D' && pos
[5] == 'A' && pos
[6] == 'T' && pos
[7] == 'A' && pos
[8] == '[') {
65 unowned
string end
= pos
;
67 token_queue
.push_tail (new
CodeToken (CodeTokenType
.XML_CDATA
, "<![CDATA["));
68 return dispatch (start
, end
);
70 unowned
string end
= pos
;
71 if (queue_start_element (start
, pos
[1] == '?')) {
72 return dispatch (start
, end
);
77 } else if (pos
[0] == ']' && pos
[1] == ']' && pos
[2] == '>') {
78 unowned
string end
= pos
;
80 token_queue
.push_tail (new
CodeToken (CodeTokenType
.XML_CDATA
, "]]>"));
81 return dispatch (start
, end
);
85 token_queue
.push_tail (new
CodeToken (CodeTokenType
.EOF
, ""));
86 return dispatch (start
, pos
);
89 private bool queue_start_element (string dispatch_start
, bool xml_decl
) {
90 assert (token_queue
.is_empty ());
92 unowned
string element_start
= pos
;
99 skip_optional_spaces (ref pos
);
101 if (skip_id (ref pos
) == false) {
102 token_queue
.clear ();
107 skip_optional_spaces (ref pos
);
109 queue_token (element_start
, pos
, CodeTokenType
.XML_ELEMENT
);
111 if (queue_attributes () == false) {
112 token_queue
.clear ();
117 unowned
string element_end_start
= pos
;
119 if (!xml_decl
&& pos
[0] == '>') {
120 pos
= pos
.offset (1);
121 } else if (!xml_decl
&& pos
[0] == '/' && pos
[1] == '>') {
122 pos
= pos
.offset (2);
123 } else if (xml_decl
&& pos
[0] == '?' && pos
[1] == '>') {
124 pos
= pos
.offset (2);
126 token_queue
.clear ();
131 queue_token (element_end_start
, pos
, CodeTokenType
.XML_ELEMENT
);
135 private bool queue_attributes () {
136 while (is_id_char (pos
[0])) {
137 unowned
string begin
= pos
;
139 if (skip_id (ref pos
) == false) {
143 skip_optional_spaces (ref pos
);
146 pos
= pos
.offset (1);
151 skip_optional_spaces (ref pos
);
153 queue_token (begin
, pos
, CodeTokenType
.XML_ATTRIBUTE
);
157 pos
= pos
.offset (1);
162 while (pos
[0] != '"' && pos
[0] != '\0') {
163 pos
= pos
.offset (1);
167 pos
= pos
.offset (1);
172 skip_optional_spaces (ref pos
);
174 queue_token (begin
, pos
, CodeTokenType
.XML_ATTRIBUTE_VALUE
);
180 private bool queue_end_element () {
181 unowned
string start
= pos
;
182 pos
= pos
.offset (2);
184 skip_optional_spaces (ref pos
);
186 if (skip_id (ref pos
) == false) {
191 skip_optional_spaces (ref pos
);
194 pos
= pos
.offset (1);
200 queue_token (start
, pos
, CodeTokenType
.XML_ELEMENT
);
204 private bool queue_escape () {
205 unowned
string start
= pos
;
206 pos
= pos
.offset (1);
208 if (skip_id (ref pos
) == false) {
214 pos
= pos
.offset (1);
220 queue_token (start
, pos
, CodeTokenType
.XML_ESCAPE
);
224 private bool queue_comment () {
225 unowned
string start
= pos
;
226 pos
= pos
.offset (4);
228 while (pos
[0] != '>' && pos
[0] != '\0') {
229 pos
= pos
.offset (1);
233 pos
= pos
.offset (1);
239 queue_token (start
, pos
, CodeTokenType
.XML_COMMENT
);
243 private static bool skip_id (ref unowned
string pos
) {
244 bool has_next_segment
= true;
247 while (has_next_segment
) {
250 while (is_id_char (pos
[0])) {
251 pos
= pos
.offset (1);
255 if (pos
[0] == ':' && has_id
) {
256 has_next_segment
= true;
257 pos
= pos
.offset (1);
259 has_next_segment
= false;
266 private static bool skip_optional_spaces (ref unowned
string pos
) {
267 bool skipped
= false;
269 while (pos
[0].isspace ()) {
270 pos
= pos
.offset (1);
277 private CodeToken
dispatch (string start
, string end
) {
278 assert (token_queue
.is_empty () == false);
280 if (((char*) start
) == ((char*) end
)) {
281 return token_queue
.pop_head ();
284 long length
= start
.pointer_to_offset (end
);
285 string content
= start
.substring (0, length
);
286 return new
CodeToken (CodeTokenType
.PLAIN
, content
);
289 private void queue_token (string start
, string end
, CodeTokenType token_type
) {
290 long length
= start
.pointer_to_offset (end
);
291 string content
= start
.substring (0, length
);
292 token_queue
.push_tail (new
CodeToken (token_type
, content
));
295 private static inline
bool is_id_char (char c
) {
296 return c
.isalnum () || c
== '_' || c
== '-';
299 internal static bool is_xml (string source
) {
300 unowned
string pos
= source
;
302 skip_optional_spaces (ref pos
);
306 if (pos
.has_prefix ("<!--")) {
311 if (pos
.has_prefix ("<![CDATA[")) {
317 bool proc_instr
= false;
318 pos
= pos
.offset (1);
321 pos
= pos
.offset (1);
326 if (skip_id (ref pos
) == false) {
330 skip_optional_spaces (ref pos
);
332 while (skip_id (ref pos
)) {
334 pos
= pos
.offset (1);
339 skip_optional_spaces (ref pos
);
342 pos
= pos
.offset (1);
347 while (pos
[0] != '\0' && pos
[0] != '\n' && pos
[0] != '"') {
348 pos
= pos
.offset (1);
352 pos
= pos
.offset (1);
357 skip_optional_spaces (ref pos
);
360 if (proc_instr
&& pos
[0] == '?' && pos
[1] == '>') {
364 if (!proc_instr
&& (pos
[0] == '>' || (pos
[0] == '/' && pos
[1] == '>'))) {