gio-unix-2.0: Add DesktopAppInfo.launch_uris_as_manager_with_fds()
[vala-gnome.git] / libvaladoc / highlighter / xmlscanner.vala
blob8c048b9b4a43d18fb0d9cce4716dbb578ee6ff9a
1 /* xmlscanner.vala
3 * Copyright (C) 2015 Florian Brosch
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 * Author:
20 * Florian Brosch <flo.brosch@gmail.com>
23 using GLib;
26 /**
27 * A cheap scanner used to highlight XML.
29 public class Valadoc.Highlighter.XmlScanner : Object, Scanner {
30 private Queue<CodeToken> token_queue = new Queue<CodeToken> ();
31 private unowned string content;
32 private unowned string pos;
35 public XmlScanner (string content) {
36 this.content = content;
37 this.pos = content;
40 public CodeToken next () {
41 if (!token_queue.is_empty ()) {
42 return token_queue.pop_head ();
45 unowned string start;
47 for (start = pos; pos[0] != '\0'; pos = pos.next_char ()) {
48 if (pos[0] == '&') {
49 unowned string begin = pos;
50 if (queue_escape ()) {
51 return dispatch (start, begin);
53 } else if (pos[0] == '<') {
54 if (pos[1] == '/') {
55 unowned string end = pos;
56 if (queue_end_element ()) {
57 return dispatch (start, end);
59 } else if (pos[1] == '!' && pos[2] == '-' && pos[3] == '-') {
60 unowned string end = pos;
61 if (queue_comment ()) {
62 return dispatch (start, end);
64 } else if (pos[1] == '!' && pos[2] == '[' && pos[3] == 'C' && pos[4] == 'D' && pos[5] == 'A' && pos[6] == 'T' && pos[7] == 'A' && pos[8] == '[') {
65 unowned string end = pos;
66 pos = pos.offset (9);
67 token_queue.push_tail (new CodeToken (CodeTokenType.XML_CDATA, "<![CDATA["));
68 return dispatch (start, end);
69 } else {
70 unowned string end = pos;
71 if (queue_start_element (start, pos[1] == '?')) {
72 return dispatch (start, end);
73 } else {
74 continue;
77 } else if (pos[0] == ']' && pos[1] == ']' && pos[2] == '>') {
78 unowned string end = pos;
79 pos = pos.offset (3);
80 token_queue.push_tail (new CodeToken (CodeTokenType.XML_CDATA, "]]>"));
81 return dispatch (start, end);
85 token_queue.push_tail (new CodeToken (CodeTokenType.EOF, ""));
86 return dispatch (start, pos);
89 private bool queue_start_element (string dispatch_start, bool xml_decl) {
90 assert (token_queue.is_empty ());
92 unowned string element_start = pos;
93 if (xml_decl) {
94 pos = pos.offset (2);
95 } else {
96 pos = pos.offset (1);
99 skip_optional_spaces (ref pos);
101 if (skip_id (ref pos) == false) {
102 token_queue.clear ();
103 pos = element_start;
104 return false;
107 skip_optional_spaces (ref pos);
109 queue_token (element_start, pos, CodeTokenType.XML_ELEMENT);
111 if (queue_attributes () == false) {
112 token_queue.clear ();
113 pos = element_start;
114 return false;
117 unowned string element_end_start = pos;
119 if (!xml_decl && pos[0] == '>') {
120 pos = pos.offset (1);
121 } else if (!xml_decl && pos[0] == '/' && pos[1] == '>') {
122 pos = pos.offset (2);
123 } else if (xml_decl && pos[0] == '?' && pos[1] == '>') {
124 pos = pos.offset (2);
125 } else {
126 token_queue.clear ();
127 pos = element_start;
128 return false;
131 queue_token (element_end_start, pos, CodeTokenType.XML_ELEMENT);
132 return true;
135 private bool queue_attributes () {
136 while (is_id_char (pos[0])) {
137 unowned string begin = pos;
139 if (skip_id (ref pos) == false) {
140 return false;
143 skip_optional_spaces (ref pos);
145 if (pos[0] == '=') {
146 pos = pos.offset (1);
147 } else {
148 return false;
151 skip_optional_spaces (ref pos);
153 queue_token (begin, pos, CodeTokenType.XML_ATTRIBUTE);
154 begin = pos;
156 if (pos[0] == '"') {
157 pos = pos.offset (1);
158 } else {
159 return false;
162 while (pos[0] != '"' && pos[0] != '\0') {
163 pos = pos.offset (1);
166 if (pos[0] == '"') {
167 pos = pos.offset (1);
168 } else {
169 return false;
172 skip_optional_spaces (ref pos);
174 queue_token (begin, pos, CodeTokenType.XML_ATTRIBUTE_VALUE);
177 return true;
180 private bool queue_end_element () {
181 unowned string start = pos;
182 pos = pos.offset (2);
184 skip_optional_spaces (ref pos);
186 if (skip_id (ref pos) == false) {
187 pos = start;
188 return false;
191 skip_optional_spaces (ref pos);
193 if (pos[0] == '>') {
194 pos = pos.offset (1);
195 } else {
196 pos = start;
197 return false;
200 queue_token (start, pos, CodeTokenType.XML_ELEMENT);
201 return true;
204 private bool queue_escape () {
205 unowned string start = pos;
206 pos = pos.offset (1);
208 if (skip_id (ref pos) == false) {
209 pos = start;
210 return false;
213 if (pos[0] == ';') {
214 pos = pos.offset (1);
215 } else {
216 pos = start;
217 return false;
220 queue_token (start, pos, CodeTokenType.XML_ESCAPE);
221 return true;
224 private bool queue_comment () {
225 unowned string start = pos;
226 pos = pos.offset (4);
228 while (pos[0] != '>' && pos[0] != '\0') {
229 pos = pos.offset (1);
232 if (pos[0] == '>') {
233 pos = pos.offset (1);
234 } else {
235 pos = start;
236 return false;
239 queue_token (start, pos, CodeTokenType.XML_COMMENT);
240 return true;
243 private static bool skip_id (ref unowned string pos) {
244 bool has_next_segment = true;
245 bool has_id = false;
247 while (has_next_segment) {
248 has_id = false;
250 while (is_id_char (pos[0])) {
251 pos = pos.offset (1);
252 has_id = true;
255 if (pos[0] == ':' && has_id) {
256 has_next_segment = true;
257 pos = pos.offset (1);
258 } else {
259 has_next_segment = false;
263 return has_id;
266 private static bool skip_optional_spaces (ref unowned string pos) {
267 bool skipped = false;
269 while (pos[0].isspace ()) {
270 pos = pos.offset (1);
271 skipped = true;
274 return skipped;
277 private CodeToken dispatch (string start, string end) {
278 assert (token_queue.is_empty () == false);
280 if (((char*) start) == ((char*) end)) {
281 return token_queue.pop_head ();
284 long length = start.pointer_to_offset (end);
285 string content = start.substring (0, length);
286 return new CodeToken (CodeTokenType.PLAIN, content);
289 private void queue_token (string start, string end, CodeTokenType token_type) {
290 long length = start.pointer_to_offset (end);
291 string content = start.substring (0, length);
292 token_queue.push_tail (new CodeToken (token_type, content));
295 private static inline bool is_id_char (char c) {
296 return c.isalnum () || c == '_' || c == '-';
299 internal static bool is_xml (string source) {
300 unowned string pos = source;
302 skip_optional_spaces (ref pos);
304 if (pos[0] == '<') {
305 // Comment:
306 if (pos.has_prefix ("<!--")) {
307 return true;
310 // CDATA:
311 if (pos.has_prefix ("<![CDATA[")) {
312 return true;
316 // Start Tag:
317 bool proc_instr = false;
318 pos = pos.offset (1);
320 if (pos[0] == '?') {
321 pos = pos.offset (1);
322 proc_instr = true;
325 // ID:
326 if (skip_id (ref pos) == false) {
327 return false;
330 skip_optional_spaces (ref pos);
332 while (skip_id (ref pos)) {
333 if (pos[0] == '=') {
334 pos = pos.offset (1);
335 } else {
336 return false;
339 skip_optional_spaces (ref pos);
341 if (pos[0] == '"') {
342 pos = pos.offset (1);
343 } else {
344 return false;
347 while (pos[0] != '\0' && pos[0] != '\n' && pos[0] != '"') {
348 pos = pos.offset (1);
351 if (pos[0] == '"') {
352 pos = pos.offset (1);
353 } else {
354 return false;
357 skip_optional_spaces (ref pos);
360 if (proc_instr && pos[0] == '?' && pos[1] == '>') {
361 return true;
364 if (!proc_instr && (pos[0] == '>' || (pos[0] == '/' && pos[1] == '>'))) {
365 return true;
368 return false;
369 } else {
370 return false;