gio-unix-2.0: Add DesktopAppInfo.launch_uris_as_manager_with_fds()
[vala-gnome.git] / libvaladoc / highlighter / codescanner.vala
blob6d65c2eb73c71e72123ee6b24a9c6e5b8f34a128
1 /* codescanner.vala
3 * Copyright (C) 2015 Florian Brosch
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 * Author:
20 * Florian Brosch <flo.brosch@gmail.com>
23 using GLib;
26 /**
27 * A cheap scanner used to highlight C and Vala source code.
29 public class Valadoc.Highlighter.CodeScanner : Object, Scanner {
30 private Vala.HashMap<string, CodeTokenType?> keywords;
31 private bool enable_string_templates;
32 private bool enabel_verbatim_string;
33 private bool enable_preprocessor_define;
34 private bool enable_preprocessor_include;
35 private bool enable_keyword_escape;
38 private Queue<CodeToken> token_queue = new Queue<CodeToken> ();
39 private unowned string content;
40 private unowned string pos;
43 public CodeScanner (string content, bool enable_string_templates, bool enabel_verbatim_string,
44 bool enable_preprocessor_define, bool enable_preprocessor_include, bool enable_keyword_escape,
45 Vala.HashMap<string, CodeTokenType?> keywords)
47 this.content = content;
48 this.pos = content;
50 this.enable_string_templates = enable_string_templates;
51 this.enabel_verbatim_string = enabel_verbatim_string;
52 this.enable_preprocessor_define = enable_preprocessor_define;
53 this.enable_preprocessor_include = enable_preprocessor_include;
54 this.enable_keyword_escape = enable_keyword_escape;
56 this.keywords = keywords;
59 public CodeToken next () {
60 if (!token_queue.is_empty ()) {
61 return token_queue.pop_head ();
65 unowned string start;
67 for (start = pos; pos[0] != '\0'; pos = pos.next_char ()) {
68 if (((char*) pos) == ((char*) content) || pos[0] == '\n') {
69 unowned string line_start = pos;
71 while (pos[0] == ' ' || pos[0] == '\t' || pos[0] == '\n') {
72 pos = pos.offset (1);
75 if (pos[0] == '\0') {
76 break;
77 } else if (enable_preprocessor_include && pos.has_prefix ("#include")) {
78 unowned string end = pos;
79 if (queue_c_include ()) {
80 return dispatch (start, end);
81 } else {
82 pos = line_start;
83 continue;
85 } else if (pos.has_prefix ("#if") || pos.has_prefix ("#else") || pos.has_prefix ("#elif") || pos.has_prefix ("#endif")
86 || (enable_preprocessor_define && (pos.has_prefix ("#defined") || pos.has_prefix ("#ifdef")))) {
88 unowned string end = pos;
89 queue_until ('\n', CodeTokenType.PREPROCESSOR);
90 return dispatch (start, end);
94 if (pos[0] == '\'') {
95 unowned string end = pos;
96 queue_string_literal ("\'");
97 return dispatch (start, end);
100 if (pos[0] == '"' || (enable_string_templates && pos[0] == '@' && pos[1] == '"')) {
101 unowned string end = pos;
102 if (enabel_verbatim_string && (pos.has_prefix ("\"\"\"") || (enable_string_templates && pos.has_prefix ("@\"\"\"")))) {
103 queue_string_literal ("\"\"\"");
104 } else {
105 queue_string_literal ("\"");
107 return dispatch (start, end);
110 if (pos[0] >= '0' && pos[0] <= '9') {
111 unowned string end = pos;
112 queue_numeric_literal ();
113 return dispatch (start, end);
116 if (pos.has_prefix ("/*")) {
117 unowned string end = pos;
118 queue_multiline_comment ();
119 return dispatch (start, end);
122 if (pos.has_prefix ("//")) {
123 unowned string end = pos;
124 queue_until ('\n', CodeTokenType.COMMENT);
125 return dispatch (start, end);
128 if ((((char*) pos) == ((char*) content) || !isidstartchar (pos[-1])) && isidstartchar (pos[0])) {
129 unowned string end = pos;
130 if (queue_keyword ()) {
131 return dispatch (start, end);
132 } else {
133 continue;
138 token_queue.push_tail (new CodeToken (CodeTokenType.EOF, ""));
139 return dispatch (start, pos);
142 private bool queue_c_include () {
143 unowned string include_start = pos;
144 unowned string start = pos;
145 pos = pos.offset (8);
147 while (pos[0] == ' ' || pos[0] == '\t') {
148 pos = pos.offset (1);
151 char? end_char = null;
152 if (pos[0] == '"') {
153 end_char = '"';
154 } else if (pos[0] == '<') {
155 end_char = '>';
158 if (end_char != null) {
159 queue_token (start, pos, CodeTokenType.PREPROCESSOR);
161 unowned string literal_start = pos;
162 pos = pos.offset (1);
164 while (pos[0] != end_char && pos[0] != '\n' && pos[0] != '\0') {
165 pos = pos.offset (1);
168 if (pos[0] == end_char) {
169 pos = pos.offset (1);
171 queue_token (literal_start, pos, CodeTokenType.LITERAL);
172 start = pos;
173 } else {
174 pos = include_start;
175 token_queue.clear ();
176 return false;
180 while (pos[0] == ' ' || pos[0] == '\t') {
181 pos = pos.offset (1);
184 if (pos[0] == '\n' || pos[0] == '\0') {
185 queue_token (start, pos, CodeTokenType.PREPROCESSOR);
186 return true;
187 } else {
188 pos = include_start;
189 token_queue.clear ();
190 return false;
194 private bool queue_keyword () {
195 unowned string start = pos;
196 if (pos[0] == '@') {
197 pos = pos.offset (1);
199 while (isidchar (pos[0])) {
200 pos = pos.offset (1);
203 long length = start.pointer_to_offset (pos);
204 string word = start.substring (0, length);
205 CodeTokenType? token_type = keywords.get (word);
206 if (token_type == null) {
207 pos = start;
208 return false;
211 token_queue.push_tail (new CodeToken (token_type, word));
212 return true;
215 private void queue_multiline_comment () {
216 unowned string start = pos;
217 pos = pos.offset (2);
219 while (!(pos[0] == '*' && pos[1] == '/') && pos[0] != '\0') {
220 pos = pos.offset (1);
223 if (pos[0] != '\0') {
224 pos = pos.offset (2);
227 queue_token (start, pos, CodeTokenType.COMMENT);
230 private void queue_until (char end_char, CodeTokenType token_type) {
231 unowned string start = pos;
232 pos = pos.offset (1);
234 while (pos[0] != end_char && pos[0] != '\0') {
235 pos = pos.offset (1);
238 if (pos[0] != '\0' && pos[0] != '\n') {
239 pos = pos.offset (1);
242 queue_token (start, pos, token_type);
245 private void queue_string_literal (string end_chars) {
246 unowned string start = pos;
247 bool is_template = false;
249 if (pos[0] == '@') {
250 pos = pos.offset (end_chars.length + 1);
251 is_template = true;
252 } else {
253 pos = pos.offset (end_chars.length);
256 while (!pos.has_prefix (end_chars) && pos[0] != '\0') {
257 long skip = 0;
259 if ((pos[0] == '%' && has_printf_format_prefix (out skip))
260 || (pos[0] == '\\' && has_escape_prefix (out skip))
261 || (is_template && pos[0] == '$' && has_template_literal_prefix (out skip)))
263 queue_token (start, pos, CodeTokenType.LITERAL);
265 unowned string sub_start = pos;
266 pos = pos.offset (skip);
267 queue_token (sub_start, pos, CodeTokenType.ESCAPE);
268 start = pos;
269 } else {
270 pos = pos.offset (1);
274 if (pos[0] != '\0') {
275 pos = pos.offset (end_chars.length);
278 queue_token (start, pos, CodeTokenType.LITERAL);
281 private bool has_template_literal_prefix (out long skip) {
282 if (isidchar (pos[1])) {
283 skip = 1;
284 while (isidchar (pos[skip])) {
285 skip++;
287 return true;
290 if (pos[1] == '(') {
291 int level = 1;
292 skip = 2;
294 while (level > 0) {
295 switch (pos[skip]) {
296 case '(':
297 level++;
298 break;
299 case ')':
300 level--;
301 break;
302 case '\0':
303 skip = 0;
304 return false;
306 skip++;
308 return true;
311 skip = 0;
312 return false;
315 private bool has_escape_prefix (out long skip) {
316 switch (pos[1]) {
317 case 'a':
318 case 'b':
319 case 'f':
320 case 'n':
321 case 'r':
322 case 't':
323 case 'v':
324 case '\\':
325 case '\'':
326 case '\"':
327 case '?':
328 skip = 2;
329 return true;
331 case 'x':
332 if (pos[2].isxdigit ()) {
333 for (skip = 2; pos[skip].isxdigit (); skip++) {
334 skip++;
337 skip++;
338 return true;
341 skip = 0;
342 return false;
344 default:
345 if (pos[1].isdigit ()) {
346 skip = 2;
348 if (pos[2].isdigit ()) {
349 skip++;
351 if (pos[3].isdigit ()) {
352 skip++;
356 return true;
359 skip = 0;
360 return false;
364 private bool has_printf_format_prefix (out long skip) {
365 // %[flag][min width][precision][length modifier][conversion specifier]
366 unowned string pos = this.pos;
367 unowned string start = pos;
369 // '%'
370 pos = pos.offset (1);
372 if (pos[0] == '%') {
373 pos = pos.offset (1);
374 skip = 2;
375 return true;
379 // flags:
380 while ("#0+- ".index_of_char (pos[0]) > 0) {
381 pos = pos.offset (1);
384 // min width:
385 while (pos[0].isdigit ()) {
386 pos = pos.offset (1);
389 // precision
390 if (pos[0] == '.' && pos[1].isdigit ()) {
391 pos = pos.offset (2);
392 while (pos[0].isdigit ()) {
393 pos = pos.offset (1);
397 // length:
398 switch (pos[0]) {
399 case 'h':
400 pos = pos.offset (1);
401 if (pos[0] == 'h') {
402 pos = pos.offset (1);
404 break;
406 case 'l':
407 pos = pos.offset (1);
408 if (pos[0] == 'l') {
409 pos = pos.offset (1);
411 break;
413 case 'j':
414 case 'z':
415 case 't':
416 case 'L':
417 pos = pos.offset (1);
418 break;
421 // conversion specifier:
422 switch (pos[0]) {
423 case 'd':
424 case 'i':
425 case 'u':
426 case 'o':
427 case 'x':
428 case 'X':
429 case 'f':
430 case 'F':
431 case 'e':
432 case 'E':
433 case 'g':
434 case 'G':
435 case 'a':
436 case 'A':
437 case 'c':
438 case 's':
439 case 'p':
440 case 'n':
441 pos = pos.offset (1);
442 break;
444 default:
445 skip = 0;
446 return false;
449 skip = start.pointer_to_offset (pos);
450 return true;
453 private enum NumericType {
454 INTEGER,
455 REAL,
456 NONE
459 // based on libvala
460 private void queue_numeric_literal () {
461 NumericType numeric_type = NumericType.INTEGER;
462 unowned string start = pos;
465 // integer part
466 if (pos[0] == '0' && pos[1] == 'x' && pos[2].isxdigit ()) {
467 // hexadecimal integer literal
468 pos = pos.offset (2);
469 while (pos[0].isxdigit ()) {
470 pos = pos.offset (1);
472 } else {
473 // decimal number
474 while (pos[0].isdigit ()) {
475 pos = pos.offset (1);
480 // fractional part
481 if (pos[0] == '.' && pos[1].isdigit ()) {
482 numeric_type = NumericType.REAL;
483 pos = pos.offset (1);
484 while (pos[0].isdigit ()) {
485 pos = pos.offset (1);
490 // exponent part
491 if (pos[0] == 'e' || pos[0] == 'E') {
492 numeric_type = NumericType.REAL;
493 pos = pos.offset (1);
494 if (pos[0] == '+' || pos[0] == '-') {
495 pos = pos.offset (1);
497 while (pos[0].isdigit ()) {
498 pos = pos.offset (1);
503 // type suffix
504 switch (pos[0]) {
505 case 'l':
506 case 'L':
507 if (numeric_type == NumericType.INTEGER) {
508 pos = pos.offset (1);
509 if (pos[0] == 'l' || pos[0] == 'L') {
510 pos = pos.offset (1);
513 break;
515 case 'u':
516 case 'U':
517 if (numeric_type == NumericType.INTEGER) {
518 pos = pos.offset (1);
519 if (pos[0] == 'l' || pos[0] == 'L') {
520 pos = pos.offset (1);
521 if (pos[0] == 'l' || pos[0] == 'L') {
522 pos = pos.offset (1);
526 break;
528 case 'f':
529 case 'F':
530 case 'd':
531 case 'D':
532 numeric_type = NumericType.REAL;
533 pos = pos.offset (1);
534 break;
537 if (pos[0].isalnum ()) {
538 numeric_type = NumericType.NONE;
541 queue_token (start, pos, (numeric_type != NumericType.NONE)
542 ? CodeTokenType.LITERAL
543 : CodeTokenType.PLAIN);
546 private CodeToken dispatch (string start, string end) {
547 assert (token_queue.is_empty () == false);
549 if (((char*) start) == ((char*) end)) {
550 return token_queue.pop_head ();
553 long length = start.pointer_to_offset (end);
554 string content = start.substring (0, length);
555 return new CodeToken (CodeTokenType.PLAIN, content);
558 private void queue_token (string start, string end, CodeTokenType token_type) {
559 long length = start.pointer_to_offset (end);
560 string content = start.substring (0, length);
561 token_queue.push_tail (new CodeToken (token_type, content));
564 private inline bool isidchar (char c) {
565 return c.isalnum () || c == '_';
568 private inline bool isidstartchar (char c) {
569 return c.isalnum () || c == '_' || (c == '@' && enable_keyword_escape);