3 * Copyright (C) 2015 Florian Brosch
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 * Florian Brosch <flo.brosch@gmail.com>
27 * A cheap scanner used to highlight C and Vala source code.
29 public class Valadoc
.Highlighter
.CodeScanner
: Object
, Scanner
{
30 private Vala
.HashMap
<string, CodeTokenType?
> keywords
;
31 private bool enable_string_templates
;
32 private bool enabel_verbatim_string
;
33 private bool enable_preprocessor_define
;
34 private bool enable_preprocessor_include
;
35 private bool enable_keyword_escape
;
38 private Queue
<CodeToken
> token_queue
= new Queue
<CodeToken
> ();
39 private unowned
string content
;
40 private unowned
string pos
;
43 public CodeScanner (string content
, bool enable_string_templates
, bool enabel_verbatim_string
,
44 bool enable_preprocessor_define
, bool enable_preprocessor_include
, bool enable_keyword_escape
,
45 Vala
.HashMap
<string, CodeTokenType?
> keywords
)
47 this
.content
= content
;
50 this
.enable_string_templates
= enable_string_templates
;
51 this
.enabel_verbatim_string
= enabel_verbatim_string
;
52 this
.enable_preprocessor_define
= enable_preprocessor_define
;
53 this
.enable_preprocessor_include
= enable_preprocessor_include
;
54 this
.enable_keyword_escape
= enable_keyword_escape
;
56 this
.keywords
= keywords
;
59 public CodeToken
next () {
60 if (!token_queue
.is_empty ()) {
61 return token_queue
.pop_head ();
67 for (start
= pos
; pos
[0] != '\0'; pos
= pos
.next_char ()) {
68 if (((char*) pos
) == ((char*) content
) || pos
[0] == '\n') {
69 unowned
string line_start
= pos
;
71 while (pos
[0] == ' ' || pos
[0] == '\t' || pos
[0] == '\n') {
77 } else if (enable_preprocessor_include
&& pos
.has_prefix ("#include")) {
78 unowned
string end
= pos
;
79 if (queue_c_include ()) {
80 return dispatch (start
, end
);
85 } else if (pos
.has_prefix ("#if") || pos
.has_prefix ("#else") || pos
.has_prefix ("#elif") || pos
.has_prefix ("#endif")
86 || (enable_preprocessor_define
&& (pos
.has_prefix ("#defined") || pos
.has_prefix ("#ifdef")))) {
88 unowned
string end
= pos
;
89 queue_until ('\n', CodeTokenType
.PREPROCESSOR
);
90 return dispatch (start
, end
);
95 unowned
string end
= pos
;
96 queue_string_literal ("\'");
97 return dispatch (start
, end
);
100 if (pos
[0] == '"' || (enable_string_templates
&& pos
[0] == '@' && pos
[1] == '"')) {
101 unowned
string end
= pos
;
102 if (enabel_verbatim_string
&& (pos
.has_prefix ("\"\"\"") || (enable_string_templates
&& pos
.has_prefix ("@\"\"\"")))) {
103 queue_string_literal ("\"\"\"");
105 queue_string_literal ("\"");
107 return dispatch (start
, end
);
110 if (pos
[0] >= '0' && pos
[0] <= '9') {
111 unowned
string end
= pos
;
112 queue_numeric_literal ();
113 return dispatch (start
, end
);
116 if (pos
.has_prefix ("/*")) {
117 unowned
string end
= pos
;
118 queue_multiline_comment ();
119 return dispatch (start
, end
);
122 if (pos
.has_prefix ("//")) {
123 unowned
string end
= pos
;
124 queue_until ('\n', CodeTokenType
.COMMENT
);
125 return dispatch (start
, end
);
128 if ((((char*) pos
) == ((char*) content
) || !isidstartchar (pos
[-1])) && isidstartchar (pos
[0])) {
129 unowned
string end
= pos
;
130 if (queue_keyword ()) {
131 return dispatch (start
, end
);
138 token_queue
.push_tail (new
CodeToken (CodeTokenType
.EOF
, ""));
139 return dispatch (start
, pos
);
142 private bool queue_c_include () {
143 unowned
string include_start
= pos
;
144 unowned
string start
= pos
;
145 pos
= pos
.offset (8);
147 while (pos
[0] == ' ' || pos
[0] == '\t') {
148 pos
= pos
.offset (1);
151 char? end_char
= null;
154 } else if (pos
[0] == '<') {
158 if (end_char
!= null) {
159 queue_token (start
, pos
, CodeTokenType
.PREPROCESSOR
);
161 unowned
string literal_start
= pos
;
162 pos
= pos
.offset (1);
164 while (pos
[0] != end_char
&& pos
[0] != '\n' && pos
[0] != '\0') {
165 pos
= pos
.offset (1);
168 if (pos
[0] == end_char
) {
169 pos
= pos
.offset (1);
171 queue_token (literal_start
, pos
, CodeTokenType
.LITERAL
);
175 token_queue
.clear ();
180 while (pos
[0] == ' ' || pos
[0] == '\t') {
181 pos
= pos
.offset (1);
184 if (pos
[0] == '\n' || pos
[0] == '\0') {
185 queue_token (start
, pos
, CodeTokenType
.PREPROCESSOR
);
189 token_queue
.clear ();
194 private bool queue_keyword () {
195 unowned
string start
= pos
;
197 pos
= pos
.offset (1);
199 while (isidchar (pos
[0])) {
200 pos
= pos
.offset (1);
203 long length
= start
.pointer_to_offset (pos
);
204 string word
= start
.substring (0, length
);
205 CodeTokenType? token_type
= keywords
.get (word
);
206 if (token_type
== null) {
211 token_queue
.push_tail (new
CodeToken (token_type
, word
));
215 private void queue_multiline_comment () {
216 unowned
string start
= pos
;
217 pos
= pos
.offset (2);
219 while (!(pos
[0] == '*' && pos
[1] == '/') && pos
[0] != '\0') {
220 pos
= pos
.offset (1);
223 if (pos
[0] != '\0') {
224 pos
= pos
.offset (2);
227 queue_token (start
, pos
, CodeTokenType
.COMMENT
);
230 private void queue_until (char end_char
, CodeTokenType token_type
) {
231 unowned
string start
= pos
;
232 pos
= pos
.offset (1);
234 while (pos
[0] != end_char
&& pos
[0] != '\0') {
235 pos
= pos
.offset (1);
238 if (pos
[0] != '\0' && pos
[0] != '\n') {
239 pos
= pos
.offset (1);
242 queue_token (start
, pos
, token_type
);
245 private void queue_string_literal (string end_chars
) {
246 unowned
string start
= pos
;
247 bool is_template
= false;
250 pos
= pos
.offset (end_chars
.length
+ 1);
253 pos
= pos
.offset (end_chars
.length
);
256 while (!pos
.has_prefix (end_chars
) && pos
[0] != '\0') {
259 if ((pos
[0] == '%' && has_printf_format_prefix (out skip
))
260 || (pos
[0] == '\\' && has_escape_prefix (out skip
))
261 || (is_template
&& pos
[0] == '$' && has_template_literal_prefix (out skip
)))
263 queue_token (start
, pos
, CodeTokenType
.LITERAL
);
265 unowned
string sub_start
= pos
;
266 pos
= pos
.offset (skip
);
267 queue_token (sub_start
, pos
, CodeTokenType
.ESCAPE
);
270 pos
= pos
.offset (1);
274 if (pos
[0] != '\0') {
275 pos
= pos
.offset (end_chars
.length
);
278 queue_token (start
, pos
, CodeTokenType
.LITERAL
);
281 private bool has_template_literal_prefix (out long skip
) {
282 if (isidchar (pos
[1])) {
284 while (isidchar (pos
[skip
])) {
315 private bool has_escape_prefix (out long skip
) {
332 if (pos
[2].isxdigit ()) {
333 for (skip
= 2; pos
[skip
].isxdigit (); skip
++) {
345 if (pos
[1].isdigit ()) {
348 if (pos
[2].isdigit ()) {
351 if (pos
[3].isdigit ()) {
364 private bool has_printf_format_prefix (out long skip
) {
365 // %[flag][min width][precision][length modifier][conversion specifier]
366 unowned
string pos
= this
.pos
;
367 unowned
string start
= pos
;
370 pos
= pos
.offset (1);
373 pos
= pos
.offset (1);
380 while ("#0+- ".index_of_char (pos
[0]) > 0) {
381 pos
= pos
.offset (1);
385 while (pos
[0].isdigit ()) {
386 pos
= pos
.offset (1);
390 if (pos
[0] == '.' && pos
[1].isdigit ()) {
391 pos
= pos
.offset (2);
392 while (pos
[0].isdigit ()) {
393 pos
= pos
.offset (1);
400 pos
= pos
.offset (1);
402 pos
= pos
.offset (1);
407 pos
= pos
.offset (1);
409 pos
= pos
.offset (1);
417 pos
= pos
.offset (1);
421 // conversion specifier:
441 pos
= pos
.offset (1);
449 skip
= start
.pointer_to_offset (pos
);
453 private enum NumericType
{
460 private void queue_numeric_literal () {
461 NumericType numeric_type
= NumericType
.INTEGER
;
462 unowned
string start
= pos
;
466 if (pos
[0] == '0' && pos
[1] == 'x' && pos
[2].isxdigit ()) {
467 // hexadecimal integer literal
468 pos
= pos
.offset (2);
469 while (pos
[0].isxdigit ()) {
470 pos
= pos
.offset (1);
474 while (pos
[0].isdigit ()) {
475 pos
= pos
.offset (1);
481 if (pos
[0] == '.' && pos
[1].isdigit ()) {
482 numeric_type
= NumericType
.REAL
;
483 pos
= pos
.offset (1);
484 while (pos
[0].isdigit ()) {
485 pos
= pos
.offset (1);
491 if (pos
[0] == 'e' || pos
[0] == 'E') {
492 numeric_type
= NumericType
.REAL
;
493 pos
= pos
.offset (1);
494 if (pos
[0] == '+' || pos
[0] == '-') {
495 pos
= pos
.offset (1);
497 while (pos
[0].isdigit ()) {
498 pos
= pos
.offset (1);
507 if (numeric_type
== NumericType
.INTEGER
) {
508 pos
= pos
.offset (1);
509 if (pos
[0] == 'l' || pos
[0] == 'L') {
510 pos
= pos
.offset (1);
517 if (numeric_type
== NumericType
.INTEGER
) {
518 pos
= pos
.offset (1);
519 if (pos
[0] == 'l' || pos
[0] == 'L') {
520 pos
= pos
.offset (1);
521 if (pos
[0] == 'l' || pos
[0] == 'L') {
522 pos
= pos
.offset (1);
532 numeric_type
= NumericType
.REAL
;
533 pos
= pos
.offset (1);
537 if (pos
[0].isalnum ()) {
538 numeric_type
= NumericType
.NONE
;
541 queue_token (start
, pos
, (numeric_type
!= NumericType
.NONE
)
542 ? CodeTokenType
.LITERAL
543 : CodeTokenType
.PLAIN
);
546 private CodeToken
dispatch (string start
, string end
) {
547 assert (token_queue
.is_empty () == false);
549 if (((char*) start
) == ((char*) end
)) {
550 return token_queue
.pop_head ();
553 long length
= start
.pointer_to_offset (end
);
554 string content
= start
.substring (0, length
);
555 return new
CodeToken (CodeTokenType
.PLAIN
, content
);
558 private void queue_token (string start
, string end
, CodeTokenType token_type
) {
559 long length
= start
.pointer_to_offset (end
);
560 string content
= start
.substring (0, length
);
561 token_queue
.push_tail (new
CodeToken (token_type
, content
));
564 private inline
bool isidchar (char c
) {
565 return c
.isalnum () || c
== '_';
568 private inline
bool isidstartchar (char c
) {
569 return c
.isalnum () || c
== '_' || (c
== '@' && enable_keyword_escape
);