utf8: add unit test for g_utf8_make_valid
[glib.git] / glib / gscanner.c
blob52b770a61fdf89f1a949bb38ea0c9f5c949ef71a
1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
4 * GScanner: Flexible lexical scanner for general purpose.
5 * Copyright (C) 1997, 1998 Tim Janik
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
22 * Modified by the GLib Team and others 1997-2000. See the AUTHORS
23 * file for a list of people on the GLib Team. See the ChangeLog
24 * files for a list of changes. These files are distributed with
25 * GLib at ftp://ftp.gtk.org/pub/gtk/.
29 * MT safe
32 #include "config.h"
34 #include <errno.h>
35 #include <stdlib.h>
36 #include <stdarg.h>
37 #include <string.h>
38 #include <stdio.h>
40 #include "gscanner.h"
42 #include "gprintfint.h"
43 #include "gstrfuncs.h"
44 #include "gstring.h"
45 #include "gtestutils.h"
47 #ifdef G_OS_UNIX
48 #include <unistd.h>
49 #endif
50 #ifdef G_OS_WIN32
51 #include <io.h>
52 #endif
55 /**
56 * SECTION:scanner
57 * @title: Lexical Scanner
58 * @short_description: a general purpose lexical scanner
60 * The #GScanner and its associated functions provide a
61 * general purpose lexical scanner.
64 /**
65 * GScannerMsgFunc:
66 * @scanner: a #GScanner
67 * @message: the message
68 * @error: %TRUE if the message signals an error,
69 * %FALSE if it signals a warning.
71 * Specifies the type of the message handler function.
74 /**
75 * G_CSET_a_2_z:
77 * The set of lowercase ASCII alphabet characters.
78 * Used for specifying valid identifier characters
79 * in #GScannerConfig.
82 /**
83 * G_CSET_A_2_Z:
85 * The set of uppercase ASCII alphabet characters.
86 * Used for specifying valid identifier characters
87 * in #GScannerConfig.
90 /**
91 * G_CSET_DIGITS:
93 * The set of ASCII digits.
94 * Used for specifying valid identifier characters
95 * in #GScannerConfig.
98 /**
99 * G_CSET_LATINC:
101 * The set of uppercase ISO 8859-1 alphabet characters
102 * which are not ASCII characters.
103 * Used for specifying valid identifier characters
104 * in #GScannerConfig.
108 * G_CSET_LATINS:
110 * The set of lowercase ISO 8859-1 alphabet characters
111 * which are not ASCII characters.
112 * Used for specifying valid identifier characters
113 * in #GScannerConfig.
117 * GTokenType:
118 * @G_TOKEN_EOF: the end of the file
119 * @G_TOKEN_LEFT_PAREN: a '(' character
120 * @G_TOKEN_LEFT_CURLY: a '{' character
121 * @G_TOKEN_LEFT_BRACE: a '[' character
122 * @G_TOKEN_RIGHT_CURLY: a '}' character
123 * @G_TOKEN_RIGHT_PAREN: a ')' character
124 * @G_TOKEN_RIGHT_BRACE: a ']' character
125 * @G_TOKEN_EQUAL_SIGN: a '=' character
126 * @G_TOKEN_COMMA: a ',' character
127 * @G_TOKEN_NONE: not a token
128 * @G_TOKEN_ERROR: an error occurred
129 * @G_TOKEN_CHAR: a character
130 * @G_TOKEN_BINARY: a binary integer
131 * @G_TOKEN_OCTAL: an octal integer
132 * @G_TOKEN_INT: an integer
133 * @G_TOKEN_HEX: a hex integer
134 * @G_TOKEN_FLOAT: a floating point number
135 * @G_TOKEN_STRING: a string
136 * @G_TOKEN_SYMBOL: a symbol
137 * @G_TOKEN_IDENTIFIER: an identifier
138 * @G_TOKEN_IDENTIFIER_NULL: a null identifier
139 * @G_TOKEN_COMMENT_SINGLE: one line comment
140 * @G_TOKEN_COMMENT_MULTI: multi line comment
142 * The possible types of token returned from each
143 * g_scanner_get_next_token() call.
147 * GTokenValue:
148 * @v_symbol: token symbol value
149 * @v_identifier: token identifier value
150 * @v_binary: token binary integer value
151 * @v_octal: octal integer value
152 * @v_int: integer value
153 * @v_int64: 64-bit integer value
154 * @v_float: floating point value
155 * @v_hex: hex integer value
156 * @v_string: string value
157 * @v_comment: comment value
158 * @v_char: character value
159 * @v_error: error value
161 * A union holding the value of the token.
165 * GErrorType:
166 * @G_ERR_UNKNOWN: unknown error
167 * @G_ERR_UNEXP_EOF: unexpected end of file
168 * @G_ERR_UNEXP_EOF_IN_STRING: unterminated string constant
169 * @G_ERR_UNEXP_EOF_IN_COMMENT: unterminated comment
170 * @G_ERR_NON_DIGIT_IN_CONST: non-digit character in a number
171 * @G_ERR_DIGIT_RADIX: digit beyond radix in a number
172 * @G_ERR_FLOAT_RADIX: non-decimal floating point number
173 * @G_ERR_FLOAT_MALFORMED: malformed floating point number
175 * The possible errors, used in the @v_error field
176 * of #GTokenValue, when the token is a %G_TOKEN_ERROR.
180 * GScanner:
181 * @user_data: unused
182 * @max_parse_errors: unused
183 * @parse_errors: g_scanner_error() increments this field
184 * @input_name: name of input stream, featured by the default message handler
185 * @qdata: quarked data
186 * @config: link into the scanner configuration
187 * @token: token parsed by the last g_scanner_get_next_token()
188 * @value: value of the last token from g_scanner_get_next_token()
189 * @line: line number of the last token from g_scanner_get_next_token()
190 * @position: char number of the last token from g_scanner_get_next_token()
191 * @next_token: token parsed by the last g_scanner_peek_next_token()
192 * @next_value: value of the last token from g_scanner_peek_next_token()
193 * @next_line: line number of the last token from g_scanner_peek_next_token()
194 * @next_position: char number of the last token from g_scanner_peek_next_token()
195 * @msg_handler: handler function for _warn and _error
197 * The data structure representing a lexical scanner.
199 * You should set @input_name after creating the scanner, since
200 * it is used by the default message handler when displaying
201 * warnings and errors. If you are scanning a file, the filename
202 * would be a good choice.
204 * The @user_data and @max_parse_errors fields are not used.
205 * If you need to associate extra data with the scanner you
206 * can place them here.
208 * If you want to use your own message handler you can set the
209 * @msg_handler field. The type of the message handler function
210 * is declared by #GScannerMsgFunc.
214 * GScannerConfig:
215 * @cset_skip_characters: specifies which characters should be skipped
216 * by the scanner (the default is the whitespace characters: space,
217 * tab, carriage-return and line-feed).
218 * @cset_identifier_first: specifies the characters which can start
219 * identifiers (the default is #G_CSET_a_2_z, "_", and #G_CSET_A_2_Z).
220 * @cset_identifier_nth: specifies the characters which can be used
221 * in identifiers, after the first character (the default is
222 * #G_CSET_a_2_z, "_0123456789", #G_CSET_A_2_Z, #G_CSET_LATINS,
223 * #G_CSET_LATINC).
224 * @cpair_comment_single: specifies the characters at the start and
225 * end of single-line comments. The default is "#\n" which means
226 * that single-line comments start with a '#' and continue until
227 * a '\n' (end of line).
228 * @case_sensitive: specifies if symbols are case sensitive (the
229 * default is %FALSE).
230 * @skip_comment_multi: specifies if multi-line comments are skipped
231 * and not returned as tokens (the default is %TRUE).
232 * @skip_comment_single: specifies if single-line comments are skipped
233 * and not returned as tokens (the default is %TRUE).
234 * @scan_comment_multi: specifies if multi-line comments are recognized
235 * (the default is %TRUE).
236 * @scan_identifier: specifies if identifiers are recognized (the
237 * default is %TRUE).
238 * @scan_identifier_1char: specifies if single-character
239 * identifiers are recognized (the default is %FALSE).
240 * @scan_identifier_NULL: specifies if %NULL is reported as
241 * %G_TOKEN_IDENTIFIER_NULL (the default is %FALSE).
242 * @scan_symbols: specifies if symbols are recognized (the default
243 * is %TRUE).
244 * @scan_binary: specifies if binary numbers are recognized (the
245 * default is %FALSE).
246 * @scan_octal: specifies if octal numbers are recognized (the
247 * default is %TRUE).
248 * @scan_float: specifies if floating point numbers are recognized
249 * (the default is %TRUE).
250 * @scan_hex: specifies if hexadecimal numbers are recognized (the
251 * default is %TRUE).
252 * @scan_hex_dollar: specifies if '$' is recognized as a prefix for
253 * hexadecimal numbers (the default is %FALSE).
254 * @scan_string_sq: specifies if strings can be enclosed in single
255 * quotes (the default is %TRUE).
256 * @scan_string_dq: specifies if strings can be enclosed in double
257 * quotes (the default is %TRUE).
258 * @numbers_2_int: specifies if binary, octal and hexadecimal numbers
259 * are reported as #G_TOKEN_INT (the default is %TRUE).
260 * @int_2_float: specifies if all numbers are reported as %G_TOKEN_FLOAT
261 * (the default is %FALSE).
262 * @identifier_2_string: specifies if identifiers are reported as strings
263 * (the default is %FALSE).
264 * @char_2_token: specifies if characters are reported by setting
265 * `token = ch` or as %G_TOKEN_CHAR (the default is %TRUE).
266 * @symbol_2_token: specifies if symbols are reported by setting
267 * `token = v_symbol` or as %G_TOKEN_SYMBOL (the default is %FALSE).
268 * @scope_0_fallback: specifies if a symbol is searched for in the
269 * default scope in addition to the current scope (the default is %FALSE).
270 * @store_int64: use value.v_int64 rather than v_int
272 * Specifies the #GScanner parser configuration. Most settings can
273 * be changed during the parsing phase and will affect the lexical
274 * parsing of the next unpeeked token.
277 /* --- defines --- */
278 #define to_lower(c) ( \
279 (guchar) ( \
280 ( (((guchar)(c))>='A' && ((guchar)(c))<='Z') * ('a'-'A') ) | \
281 ( (((guchar)(c))>=192 && ((guchar)(c))<=214) * (224-192) ) | \
282 ( (((guchar)(c))>=216 && ((guchar)(c))<=222) * (248-216) ) | \
283 ((guchar)(c)) \
286 #define READ_BUFFER_SIZE (4000)
289 /* --- typedefs --- */
290 typedef struct _GScannerKey GScannerKey;
292 struct _GScannerKey
294 guint scope_id;
295 gchar *symbol;
296 gpointer value;
300 /* --- variables --- */
301 static const GScannerConfig g_scanner_config_template =
304 " \t\r\n"
305 ) /* cset_skip_characters */,
307 G_CSET_a_2_z
309 G_CSET_A_2_Z
310 ) /* cset_identifier_first */,
312 G_CSET_a_2_z
314 G_CSET_A_2_Z
315 G_CSET_DIGITS
316 G_CSET_LATINS
317 G_CSET_LATINC
318 ) /* cset_identifier_nth */,
319 ( "#\n" ) /* cpair_comment_single */,
321 FALSE /* case_sensitive */,
323 TRUE /* skip_comment_multi */,
324 TRUE /* skip_comment_single */,
325 TRUE /* scan_comment_multi */,
326 TRUE /* scan_identifier */,
327 FALSE /* scan_identifier_1char */,
328 FALSE /* scan_identifier_NULL */,
329 TRUE /* scan_symbols */,
330 FALSE /* scan_binary */,
331 TRUE /* scan_octal */,
332 TRUE /* scan_float */,
333 TRUE /* scan_hex */,
334 FALSE /* scan_hex_dollar */,
335 TRUE /* scan_string_sq */,
336 TRUE /* scan_string_dq */,
337 TRUE /* numbers_2_int */,
338 FALSE /* int_2_float */,
339 FALSE /* identifier_2_string */,
340 TRUE /* char_2_token */,
341 FALSE /* symbol_2_token */,
342 FALSE /* scope_0_fallback */,
343 FALSE /* store_int64 */,
347 /* --- prototypes --- */
348 static inline
349 GScannerKey* g_scanner_lookup_internal (GScanner *scanner,
350 guint scope_id,
351 const gchar *symbol);
352 static gboolean g_scanner_key_equal (gconstpointer v1,
353 gconstpointer v2);
354 static guint g_scanner_key_hash (gconstpointer v);
355 static void g_scanner_get_token_ll (GScanner *scanner,
356 GTokenType *token_p,
357 GTokenValue *value_p,
358 guint *line_p,
359 guint *position_p);
360 static void g_scanner_get_token_i (GScanner *scanner,
361 GTokenType *token_p,
362 GTokenValue *value_p,
363 guint *line_p,
364 guint *position_p);
366 static guchar g_scanner_peek_next_char (GScanner *scanner);
367 static guchar g_scanner_get_char (GScanner *scanner,
368 guint *line_p,
369 guint *position_p);
370 static void g_scanner_msg_handler (GScanner *scanner,
371 gchar *message,
372 gboolean is_error);
375 /* --- functions --- */
376 static inline gint
377 g_scanner_char_2_num (guchar c,
378 guchar base)
380 if (c >= '0' && c <= '9')
381 c -= '0';
382 else if (c >= 'A' && c <= 'Z')
383 c -= 'A' - 10;
384 else if (c >= 'a' && c <= 'z')
385 c -= 'a' - 10;
386 else
387 return -1;
389 if (c < base)
390 return c;
392 return -1;
396 * g_scanner_new:
397 * @config_templ: the initial scanner settings
399 * Creates a new #GScanner.
401 * The @config_templ structure specifies the initial settings
402 * of the scanner, which are copied into the #GScanner
403 * @config field. If you pass %NULL then the default settings
404 * are used.
406 * Returns: the new #GScanner
408 GScanner *
409 g_scanner_new (const GScannerConfig *config_templ)
411 GScanner *scanner;
413 if (!config_templ)
414 config_templ = &g_scanner_config_template;
416 scanner = g_new0 (GScanner, 1);
418 scanner->user_data = NULL;
419 scanner->max_parse_errors = 1;
420 scanner->parse_errors = 0;
421 scanner->input_name = NULL;
422 g_datalist_init (&scanner->qdata);
424 scanner->config = g_new0 (GScannerConfig, 1);
426 scanner->config->case_sensitive = config_templ->case_sensitive;
427 scanner->config->cset_skip_characters = config_templ->cset_skip_characters;
428 if (!scanner->config->cset_skip_characters)
429 scanner->config->cset_skip_characters = "";
430 scanner->config->cset_identifier_first = config_templ->cset_identifier_first;
431 scanner->config->cset_identifier_nth = config_templ->cset_identifier_nth;
432 scanner->config->cpair_comment_single = config_templ->cpair_comment_single;
433 scanner->config->skip_comment_multi = config_templ->skip_comment_multi;
434 scanner->config->skip_comment_single = config_templ->skip_comment_single;
435 scanner->config->scan_comment_multi = config_templ->scan_comment_multi;
436 scanner->config->scan_identifier = config_templ->scan_identifier;
437 scanner->config->scan_identifier_1char = config_templ->scan_identifier_1char;
438 scanner->config->scan_identifier_NULL = config_templ->scan_identifier_NULL;
439 scanner->config->scan_symbols = config_templ->scan_symbols;
440 scanner->config->scan_binary = config_templ->scan_binary;
441 scanner->config->scan_octal = config_templ->scan_octal;
442 scanner->config->scan_float = config_templ->scan_float;
443 scanner->config->scan_hex = config_templ->scan_hex;
444 scanner->config->scan_hex_dollar = config_templ->scan_hex_dollar;
445 scanner->config->scan_string_sq = config_templ->scan_string_sq;
446 scanner->config->scan_string_dq = config_templ->scan_string_dq;
447 scanner->config->numbers_2_int = config_templ->numbers_2_int;
448 scanner->config->int_2_float = config_templ->int_2_float;
449 scanner->config->identifier_2_string = config_templ->identifier_2_string;
450 scanner->config->char_2_token = config_templ->char_2_token;
451 scanner->config->symbol_2_token = config_templ->symbol_2_token;
452 scanner->config->scope_0_fallback = config_templ->scope_0_fallback;
453 scanner->config->store_int64 = config_templ->store_int64;
455 scanner->token = G_TOKEN_NONE;
456 scanner->value.v_int64 = 0;
457 scanner->line = 1;
458 scanner->position = 0;
460 scanner->next_token = G_TOKEN_NONE;
461 scanner->next_value.v_int64 = 0;
462 scanner->next_line = 1;
463 scanner->next_position = 0;
465 scanner->symbol_table = g_hash_table_new (g_scanner_key_hash, g_scanner_key_equal);
466 scanner->input_fd = -1;
467 scanner->text = NULL;
468 scanner->text_end = NULL;
469 scanner->buffer = NULL;
470 scanner->scope_id = 0;
472 scanner->msg_handler = g_scanner_msg_handler;
474 return scanner;
477 static inline void
478 g_scanner_free_value (GTokenType *token_p,
479 GTokenValue *value_p)
481 switch (*token_p)
483 case G_TOKEN_STRING:
484 case G_TOKEN_IDENTIFIER:
485 case G_TOKEN_IDENTIFIER_NULL:
486 case G_TOKEN_COMMENT_SINGLE:
487 case G_TOKEN_COMMENT_MULTI:
488 g_free (value_p->v_string);
489 break;
491 default:
492 break;
495 *token_p = G_TOKEN_NONE;
498 static void
499 g_scanner_destroy_symbol_table_entry (gpointer _key,
500 gpointer _value,
501 gpointer _data)
503 GScannerKey *key = _key;
505 g_free (key->symbol);
506 g_free (key);
510 * g_scanner_destroy:
511 * @scanner: a #GScanner
513 * Frees all memory used by the #GScanner.
515 void
516 g_scanner_destroy (GScanner *scanner)
518 g_return_if_fail (scanner != NULL);
520 g_datalist_clear (&scanner->qdata);
521 g_hash_table_foreach (scanner->symbol_table,
522 g_scanner_destroy_symbol_table_entry, NULL);
523 g_hash_table_destroy (scanner->symbol_table);
524 g_scanner_free_value (&scanner->token, &scanner->value);
525 g_scanner_free_value (&scanner->next_token, &scanner->next_value);
526 g_free (scanner->config);
527 g_free (scanner->buffer);
528 g_free (scanner);
531 static void
532 g_scanner_msg_handler (GScanner *scanner,
533 gchar *message,
534 gboolean is_error)
536 g_return_if_fail (scanner != NULL);
538 _g_fprintf (stderr, "%s:%d: ",
539 scanner->input_name ? scanner->input_name : "<memory>",
540 scanner->line);
541 if (is_error)
542 _g_fprintf (stderr, "error: ");
543 _g_fprintf (stderr, "%s\n", message);
547 * g_scanner_error:
548 * @scanner: a #GScanner
549 * @format: the message format. See the printf() documentation
550 * @...: the parameters to insert into the format string
552 * Outputs an error message, via the #GScanner message handler.
554 void
555 g_scanner_error (GScanner *scanner,
556 const gchar *format,
557 ...)
559 g_return_if_fail (scanner != NULL);
560 g_return_if_fail (format != NULL);
562 scanner->parse_errors++;
564 if (scanner->msg_handler)
566 va_list args;
567 gchar *string;
569 va_start (args, format);
570 string = g_strdup_vprintf (format, args);
571 va_end (args);
573 scanner->msg_handler (scanner, string, TRUE);
575 g_free (string);
580 * g_scanner_warn:
581 * @scanner: a #GScanner
582 * @format: the message format. See the printf() documentation
583 * @...: the parameters to insert into the format string
585 * Outputs a warning message, via the #GScanner message handler.
587 void
588 g_scanner_warn (GScanner *scanner,
589 const gchar *format,
590 ...)
592 g_return_if_fail (scanner != NULL);
593 g_return_if_fail (format != NULL);
595 if (scanner->msg_handler)
597 va_list args;
598 gchar *string;
600 va_start (args, format);
601 string = g_strdup_vprintf (format, args);
602 va_end (args);
604 scanner->msg_handler (scanner, string, FALSE);
606 g_free (string);
610 static gboolean
611 g_scanner_key_equal (gconstpointer v1,
612 gconstpointer v2)
614 const GScannerKey *key1 = v1;
615 const GScannerKey *key2 = v2;
617 return (key1->scope_id == key2->scope_id) && (strcmp (key1->symbol, key2->symbol) == 0);
620 static guint
621 g_scanner_key_hash (gconstpointer v)
623 const GScannerKey *key = v;
624 gchar *c;
625 guint h;
627 h = key->scope_id;
628 for (c = key->symbol; *c; c++)
629 h = (h << 5) - h + *c;
631 return h;
634 static inline GScannerKey*
635 g_scanner_lookup_internal (GScanner *scanner,
636 guint scope_id,
637 const gchar *symbol)
639 GScannerKey *key_p;
640 GScannerKey key;
642 key.scope_id = scope_id;
644 if (!scanner->config->case_sensitive)
646 gchar *d;
647 const gchar *c;
649 key.symbol = g_new (gchar, strlen (symbol) + 1);
650 for (d = key.symbol, c = symbol; *c; c++, d++)
651 *d = to_lower (*c);
652 *d = 0;
653 key_p = g_hash_table_lookup (scanner->symbol_table, &key);
654 g_free (key.symbol);
656 else
658 key.symbol = (gchar*) symbol;
659 key_p = g_hash_table_lookup (scanner->symbol_table, &key);
662 return key_p;
666 * g_scanner_add_symbol:
667 * @scanner: a #GScanner
668 * @symbol: the symbol to add
669 * @value: the value of the symbol
671 * Adds a symbol to the default scope.
673 * Deprecated: 2.2: Use g_scanner_scope_add_symbol() instead.
677 * g_scanner_scope_add_symbol:
678 * @scanner: a #GScanner
679 * @scope_id: the scope id
680 * @symbol: the symbol to add
681 * @value: the value of the symbol
683 * Adds a symbol to the given scope.
685 void
686 g_scanner_scope_add_symbol (GScanner *scanner,
687 guint scope_id,
688 const gchar *symbol,
689 gpointer value)
691 GScannerKey *key;
693 g_return_if_fail (scanner != NULL);
694 g_return_if_fail (symbol != NULL);
696 key = g_scanner_lookup_internal (scanner, scope_id, symbol);
698 if (!key)
700 key = g_new (GScannerKey, 1);
701 key->scope_id = scope_id;
702 key->symbol = g_strdup (symbol);
703 key->value = value;
704 if (!scanner->config->case_sensitive)
706 gchar *c;
708 c = key->symbol;
709 while (*c != 0)
711 *c = to_lower (*c);
712 c++;
715 g_hash_table_insert (scanner->symbol_table, key, key);
717 else
718 key->value = value;
722 * g_scanner_remove_symbol:
723 * @scanner: a #GScanner
724 * @symbol: the symbol to remove
726 * Removes a symbol from the default scope.
728 * Deprecated: 2.2: Use g_scanner_scope_remove_symbol() instead.
732 * g_scanner_scope_remove_symbol:
733 * @scanner: a #GScanner
734 * @scope_id: the scope id
735 * @symbol: the symbol to remove
737 * Removes a symbol from a scope.
739 void
740 g_scanner_scope_remove_symbol (GScanner *scanner,
741 guint scope_id,
742 const gchar *symbol)
744 GScannerKey *key;
746 g_return_if_fail (scanner != NULL);
747 g_return_if_fail (symbol != NULL);
749 key = g_scanner_lookup_internal (scanner, scope_id, symbol);
751 if (key)
753 g_hash_table_remove (scanner->symbol_table, key);
754 g_free (key->symbol);
755 g_free (key);
760 * g_scanner_freeze_symbol_table:
761 * @scanner: a #GScanner
763 * There is no reason to use this macro, since it does nothing.
765 * Deprecated: 2.2: This macro does nothing.
769 * g_scanner_thaw_symbol_table:
770 * @scanner: a #GScanner
772 * There is no reason to use this macro, since it does nothing.
774 * Deprecated: 2.2: This macro does nothing.
778 * g_scanner_lookup_symbol:
779 * @scanner: a #GScanner
780 * @symbol: the symbol to look up
782 * Looks up a symbol in the current scope and return its value.
783 * If the symbol is not bound in the current scope, %NULL is
784 * returned.
786 * Returns: the value of @symbol in the current scope, or %NULL
787 * if @symbol is not bound in the current scope
789 gpointer
790 g_scanner_lookup_symbol (GScanner *scanner,
791 const gchar *symbol)
793 GScannerKey *key;
794 guint scope_id;
796 g_return_val_if_fail (scanner != NULL, NULL);
798 if (!symbol)
799 return NULL;
801 scope_id = scanner->scope_id;
802 key = g_scanner_lookup_internal (scanner, scope_id, symbol);
803 if (!key && scope_id && scanner->config->scope_0_fallback)
804 key = g_scanner_lookup_internal (scanner, 0, symbol);
806 if (key)
807 return key->value;
808 else
809 return NULL;
813 * g_scanner_scope_lookup_symbol:
814 * @scanner: a #GScanner
815 * @scope_id: the scope id
816 * @symbol: the symbol to look up
818 * Looks up a symbol in a scope and return its value. If the
819 * symbol is not bound in the scope, %NULL is returned.
821 * Returns: the value of @symbol in the given scope, or %NULL
822 * if @symbol is not bound in the given scope.
825 gpointer
826 g_scanner_scope_lookup_symbol (GScanner *scanner,
827 guint scope_id,
828 const gchar *symbol)
830 GScannerKey *key;
832 g_return_val_if_fail (scanner != NULL, NULL);
834 if (!symbol)
835 return NULL;
837 key = g_scanner_lookup_internal (scanner, scope_id, symbol);
839 if (key)
840 return key->value;
841 else
842 return NULL;
846 * g_scanner_set_scope:
847 * @scanner: a #GScanner
848 * @scope_id: the new scope id
850 * Sets the current scope.
852 * Returns: the old scope id
854 guint
855 g_scanner_set_scope (GScanner *scanner,
856 guint scope_id)
858 guint old_scope_id;
860 g_return_val_if_fail (scanner != NULL, 0);
862 old_scope_id = scanner->scope_id;
863 scanner->scope_id = scope_id;
865 return old_scope_id;
868 static void
869 g_scanner_foreach_internal (gpointer _key,
870 gpointer _value,
871 gpointer _user_data)
873 GScannerKey *key;
874 gpointer *d;
875 GHFunc func;
876 gpointer user_data;
877 guint *scope_id;
879 d = _user_data;
880 func = (GHFunc) d[0];
881 user_data = d[1];
882 scope_id = d[2];
883 key = _value;
885 if (key->scope_id == *scope_id)
886 func (key->symbol, key->value, user_data);
890 * g_scanner_foreach_symbol:
891 * @scanner: a #GScanner
892 * @func: the function to call with each symbol
893 * @data: data to pass to the function
895 * Calls a function for each symbol in the default scope.
897 * Deprecated: 2.2: Use g_scanner_scope_foreach_symbol() instead.
901 * g_scanner_scope_foreach_symbol:
902 * @scanner: a #GScanner
903 * @scope_id: the scope id
904 * @func: the function to call for each symbol/value pair
905 * @user_data: user data to pass to the function
907 * Calls the given function for each of the symbol/value pairs
908 * in the given scope of the #GScanner. The function is passed
909 * the symbol and value of each pair, and the given @user_data
910 * parameter.
912 void
913 g_scanner_scope_foreach_symbol (GScanner *scanner,
914 guint scope_id,
915 GHFunc func,
916 gpointer user_data)
918 gpointer d[3];
920 g_return_if_fail (scanner != NULL);
922 d[0] = (gpointer) func;
923 d[1] = user_data;
924 d[2] = &scope_id;
926 g_hash_table_foreach (scanner->symbol_table, g_scanner_foreach_internal, d);
930 * g_scanner_peek_next_token:
931 * @scanner: a #GScanner
933 * Parses the next token, without removing it from the input stream.
934 * The token data is placed in the @next_token, @next_value, @next_line,
935 * and @next_position fields of the #GScanner structure.
937 * Note that, while the token is not removed from the input stream
938 * (i.e. the next call to g_scanner_get_next_token() will return the
939 * same token), it will not be reevaluated. This can lead to surprising
940 * results when changing scope or the scanner configuration after peeking
941 * the next token. Getting the next token after switching the scope or
942 * configuration will return whatever was peeked before, regardless of
943 * any symbols that may have been added or removed in the new scope.
945 * Returns: the type of the token
947 GTokenType
948 g_scanner_peek_next_token (GScanner *scanner)
950 g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
952 if (scanner->next_token == G_TOKEN_NONE)
954 scanner->next_line = scanner->line;
955 scanner->next_position = scanner->position;
956 g_scanner_get_token_i (scanner,
957 &scanner->next_token,
958 &scanner->next_value,
959 &scanner->next_line,
960 &scanner->next_position);
963 return scanner->next_token;
967 * g_scanner_get_next_token:
968 * @scanner: a #GScanner
970 * Parses the next token just like g_scanner_peek_next_token()
971 * and also removes it from the input stream. The token data is
972 * placed in the @token, @value, @line, and @position fields of
973 * the #GScanner structure.
975 * Returns: the type of the token
977 GTokenType
978 g_scanner_get_next_token (GScanner *scanner)
980 g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
982 if (scanner->next_token != G_TOKEN_NONE)
984 g_scanner_free_value (&scanner->token, &scanner->value);
986 scanner->token = scanner->next_token;
987 scanner->value = scanner->next_value;
988 scanner->line = scanner->next_line;
989 scanner->position = scanner->next_position;
990 scanner->next_token = G_TOKEN_NONE;
992 else
993 g_scanner_get_token_i (scanner,
994 &scanner->token,
995 &scanner->value,
996 &scanner->line,
997 &scanner->position);
999 return scanner->token;
1003 * g_scanner_cur_token:
1004 * @scanner: a #GScanner
1006 * Gets the current token type. This is simply the @token
1007 * field in the #GScanner structure.
1009 * Returns: the current token type
1011 GTokenType
1012 g_scanner_cur_token (GScanner *scanner)
1014 g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
1016 return scanner->token;
1020 * g_scanner_cur_value:
1021 * @scanner: a #GScanner
1023 * Gets the current token value. This is simply the @value
1024 * field in the #GScanner structure.
1026 * Returns: the current token value
1028 GTokenValue
1029 g_scanner_cur_value (GScanner *scanner)
1031 GTokenValue v;
1033 v.v_int64 = 0;
1035 g_return_val_if_fail (scanner != NULL, v);
1037 /* MSC isn't capable of handling return scanner->value; ? */
1039 v = scanner->value;
1041 return v;
1045 * g_scanner_cur_line:
1046 * @scanner: a #GScanner
1048 * Returns the current line in the input stream (counting
1049 * from 1). This is the line of the last token parsed via
1050 * g_scanner_get_next_token().
1052 * Returns: the current line
1054 guint
1055 g_scanner_cur_line (GScanner *scanner)
1057 g_return_val_if_fail (scanner != NULL, 0);
1059 return scanner->line;
1063 * g_scanner_cur_position:
1064 * @scanner: a #GScanner
1066 * Returns the current position in the current line (counting
1067 * from 0). This is the position of the last token parsed via
1068 * g_scanner_get_next_token().
1070 * Returns: the current position on the line
1072 guint
1073 g_scanner_cur_position (GScanner *scanner)
1075 g_return_val_if_fail (scanner != NULL, 0);
1077 return scanner->position;
1081 * g_scanner_eof:
1082 * @scanner: a #GScanner
1084 * Returns %TRUE if the scanner has reached the end of
1085 * the file or text buffer.
1087 * Returns: %TRUE if the scanner has reached the end of
1088 * the file or text buffer
1090 gboolean
1091 g_scanner_eof (GScanner *scanner)
1093 g_return_val_if_fail (scanner != NULL, TRUE);
1095 return scanner->token == G_TOKEN_EOF || scanner->token == G_TOKEN_ERROR;
1099 * g_scanner_input_file:
1100 * @scanner: a #GScanner
1101 * @input_fd: a file descriptor
1103 * Prepares to scan a file.
1105 void
1106 g_scanner_input_file (GScanner *scanner,
1107 gint input_fd)
1109 g_return_if_fail (scanner != NULL);
1110 g_return_if_fail (input_fd >= 0);
1112 if (scanner->input_fd >= 0)
1113 g_scanner_sync_file_offset (scanner);
1115 scanner->token = G_TOKEN_NONE;
1116 scanner->value.v_int64 = 0;
1117 scanner->line = 1;
1118 scanner->position = 0;
1119 scanner->next_token = G_TOKEN_NONE;
1121 scanner->input_fd = input_fd;
1122 scanner->text = NULL;
1123 scanner->text_end = NULL;
1125 if (!scanner->buffer)
1126 scanner->buffer = g_new (gchar, READ_BUFFER_SIZE + 1);
1130 * g_scanner_input_text:
1131 * @scanner: a #GScanner
1132 * @text: the text buffer to scan
1133 * @text_len: the length of the text buffer
1135 * Prepares to scan a text buffer.
1137 void
1138 g_scanner_input_text (GScanner *scanner,
1139 const gchar *text,
1140 guint text_len)
1142 g_return_if_fail (scanner != NULL);
1143 if (text_len)
1144 g_return_if_fail (text != NULL);
1145 else
1146 text = NULL;
1148 if (scanner->input_fd >= 0)
1149 g_scanner_sync_file_offset (scanner);
1151 scanner->token = G_TOKEN_NONE;
1152 scanner->value.v_int64 = 0;
1153 scanner->line = 1;
1154 scanner->position = 0;
1155 scanner->next_token = G_TOKEN_NONE;
1157 scanner->input_fd = -1;
1158 scanner->text = text;
1159 scanner->text_end = text + text_len;
1161 if (scanner->buffer)
1163 g_free (scanner->buffer);
1164 scanner->buffer = NULL;
1168 static guchar
1169 g_scanner_peek_next_char (GScanner *scanner)
1171 if (scanner->text < scanner->text_end)
1173 return *scanner->text;
1175 else if (scanner->input_fd >= 0)
1177 gint count;
1178 gchar *buffer;
1180 buffer = scanner->buffer;
1183 count = read (scanner->input_fd, buffer, READ_BUFFER_SIZE);
1185 while (count == -1 && (errno == EINTR || errno == EAGAIN));
1187 if (count < 1)
1189 scanner->input_fd = -1;
1191 return 0;
1193 else
1195 scanner->text = buffer;
1196 scanner->text_end = buffer + count;
1198 return *buffer;
1201 else
1202 return 0;
1206 * g_scanner_sync_file_offset:
1207 * @scanner: a #GScanner
1209 * Rewinds the filedescriptor to the current buffer position
1210 * and blows the file read ahead buffer. This is useful for
1211 * third party uses of the scanners filedescriptor, which hooks
1212 * onto the current scanning position.
1214 void
1215 g_scanner_sync_file_offset (GScanner *scanner)
1217 g_return_if_fail (scanner != NULL);
1219 /* for file input, rewind the filedescriptor to the current
1220 * buffer position and blow the file read ahead buffer. useful
1221 * for third party uses of our file descriptor, which hooks
1222 * onto the current scanning position.
1225 if (scanner->input_fd >= 0 && scanner->text_end > scanner->text)
1227 gint buffered;
1229 buffered = scanner->text_end - scanner->text;
1230 if (lseek (scanner->input_fd, - buffered, SEEK_CUR) >= 0)
1232 /* we succeeded, blow our buffer's contents now */
1233 scanner->text = NULL;
1234 scanner->text_end = NULL;
1236 else
1237 errno = 0;
1241 static guchar
1242 g_scanner_get_char (GScanner *scanner,
1243 guint *line_p,
1244 guint *position_p)
1246 guchar fchar;
1248 if (scanner->text < scanner->text_end)
1249 fchar = *(scanner->text++);
1250 else if (scanner->input_fd >= 0)
1252 gint count;
1253 gchar *buffer;
1255 buffer = scanner->buffer;
1258 count = read (scanner->input_fd, buffer, READ_BUFFER_SIZE);
1260 while (count == -1 && (errno == EINTR || errno == EAGAIN));
1262 if (count < 1)
1264 scanner->input_fd = -1;
1265 fchar = 0;
1267 else
1269 scanner->text = buffer + 1;
1270 scanner->text_end = buffer + count;
1271 fchar = *buffer;
1272 if (!fchar)
1274 g_scanner_sync_file_offset (scanner);
1275 scanner->text_end = scanner->text;
1276 scanner->input_fd = -1;
1280 else
1281 fchar = 0;
1283 if (fchar == '\n')
1285 (*position_p) = 0;
1286 (*line_p)++;
1288 else if (fchar)
1290 (*position_p)++;
1293 return fchar;
1297 * g_scanner_unexp_token:
1298 * @scanner: a #GScanner
1299 * @expected_token: the expected token
1300 * @identifier_spec: a string describing how the scanner's user
1301 * refers to identifiers (%NULL defaults to "identifier").
1302 * This is used if @expected_token is %G_TOKEN_IDENTIFIER or
1303 * %G_TOKEN_IDENTIFIER_NULL.
1304 * @symbol_spec: a string describing how the scanner's user refers
1305 * to symbols (%NULL defaults to "symbol"). This is used if
1306 * @expected_token is %G_TOKEN_SYMBOL or any token value greater
1307 * than %G_TOKEN_LAST.
1308 * @symbol_name: the name of the symbol, if the scanner's current
1309 * token is a symbol.
1310 * @message: a message string to output at the end of the
1311 * warning/error, or %NULL.
1312 * @is_error: if %TRUE it is output as an error. If %FALSE it is
1313 * output as a warning.
1315 * Outputs a message through the scanner's msg_handler,
1316 * resulting from an unexpected token in the input stream.
1317 * Note that you should not call g_scanner_peek_next_token()
1318 * followed by g_scanner_unexp_token() without an intermediate
1319 * call to g_scanner_get_next_token(), as g_scanner_unexp_token()
1320 * evaluates the scanner's current token (not the peeked token)
1321 * to construct part of the message.
1323 void
1324 g_scanner_unexp_token (GScanner *scanner,
1325 GTokenType expected_token,
1326 const gchar *identifier_spec,
1327 const gchar *symbol_spec,
1328 const gchar *symbol_name,
1329 const gchar *message,
1330 gint is_error)
1332 gchar *token_string;
1333 guint token_string_len;
1334 gchar *expected_string;
1335 guint expected_string_len;
1336 gchar *message_prefix;
1337 gboolean print_unexp;
1338 void (*msg_handler) (GScanner*, const gchar*, ...);
1340 g_return_if_fail (scanner != NULL);
1342 if (is_error)
1343 msg_handler = g_scanner_error;
1344 else
1345 msg_handler = g_scanner_warn;
1347 if (!identifier_spec)
1348 identifier_spec = "identifier";
1349 if (!symbol_spec)
1350 symbol_spec = "symbol";
1352 token_string_len = 56;
1353 token_string = g_new (gchar, token_string_len + 1);
1354 expected_string_len = 64;
1355 expected_string = g_new (gchar, expected_string_len + 1);
1356 print_unexp = TRUE;
1358 switch (scanner->token)
1360 case G_TOKEN_EOF:
1361 _g_snprintf (token_string, token_string_len, "end of file");
1362 break;
1364 default:
1365 if (scanner->token >= 1 && scanner->token <= 255)
1367 if ((scanner->token >= ' ' && scanner->token <= '~') ||
1368 strchr (scanner->config->cset_identifier_first, scanner->token) ||
1369 strchr (scanner->config->cset_identifier_nth, scanner->token))
1370 _g_snprintf (token_string, token_string_len, "character '%c'", scanner->token);
1371 else
1372 _g_snprintf (token_string, token_string_len, "character '\\%o'", scanner->token);
1373 break;
1375 else if (!scanner->config->symbol_2_token)
1377 _g_snprintf (token_string, token_string_len, "(unknown) token <%d>", scanner->token);
1378 break;
1380 /* fall through */
1381 case G_TOKEN_SYMBOL:
1382 if (expected_token == G_TOKEN_SYMBOL ||
1383 (scanner->config->symbol_2_token &&
1384 expected_token > G_TOKEN_LAST))
1385 print_unexp = FALSE;
1386 if (symbol_name)
1387 _g_snprintf (token_string,
1388 token_string_len,
1389 "%s%s '%s'",
1390 print_unexp ? "" : "invalid ",
1391 symbol_spec,
1392 symbol_name);
1393 else
1394 _g_snprintf (token_string,
1395 token_string_len,
1396 "%s%s",
1397 print_unexp ? "" : "invalid ",
1398 symbol_spec);
1399 break;
1401 case G_TOKEN_ERROR:
1402 print_unexp = FALSE;
1403 expected_token = G_TOKEN_NONE;
1404 switch (scanner->value.v_error)
1406 case G_ERR_UNEXP_EOF:
1407 _g_snprintf (token_string, token_string_len, "scanner: unexpected end of file");
1408 break;
1410 case G_ERR_UNEXP_EOF_IN_STRING:
1411 _g_snprintf (token_string, token_string_len, "scanner: unterminated string constant");
1412 break;
1414 case G_ERR_UNEXP_EOF_IN_COMMENT:
1415 _g_snprintf (token_string, token_string_len, "scanner: unterminated comment");
1416 break;
1418 case G_ERR_NON_DIGIT_IN_CONST:
1419 _g_snprintf (token_string, token_string_len, "scanner: non digit in constant");
1420 break;
1422 case G_ERR_FLOAT_RADIX:
1423 _g_snprintf (token_string, token_string_len, "scanner: invalid radix for floating constant");
1424 break;
1426 case G_ERR_FLOAT_MALFORMED:
1427 _g_snprintf (token_string, token_string_len, "scanner: malformed floating constant");
1428 break;
1430 case G_ERR_DIGIT_RADIX:
1431 _g_snprintf (token_string, token_string_len, "scanner: digit is beyond radix");
1432 break;
1434 case G_ERR_UNKNOWN:
1435 default:
1436 _g_snprintf (token_string, token_string_len, "scanner: unknown error");
1437 break;
1439 break;
1441 case G_TOKEN_CHAR:
1442 _g_snprintf (token_string, token_string_len, "character '%c'", scanner->value.v_char);
1443 break;
1445 case G_TOKEN_IDENTIFIER:
1446 case G_TOKEN_IDENTIFIER_NULL:
1447 if (expected_token == G_TOKEN_IDENTIFIER ||
1448 expected_token == G_TOKEN_IDENTIFIER_NULL)
1449 print_unexp = FALSE;
1450 _g_snprintf (token_string,
1451 token_string_len,
1452 "%s%s '%s'",
1453 print_unexp ? "" : "invalid ",
1454 identifier_spec,
1455 scanner->token == G_TOKEN_IDENTIFIER ? scanner->value.v_string : "null");
1456 break;
1458 case G_TOKEN_BINARY:
1459 case G_TOKEN_OCTAL:
1460 case G_TOKEN_INT:
1461 case G_TOKEN_HEX:
1462 if (scanner->config->store_int64)
1463 _g_snprintf (token_string, token_string_len, "number '%" G_GUINT64_FORMAT "'", scanner->value.v_int64);
1464 else
1465 _g_snprintf (token_string, token_string_len, "number '%lu'", scanner->value.v_int);
1466 break;
1468 case G_TOKEN_FLOAT:
1469 _g_snprintf (token_string, token_string_len, "number '%.3f'", scanner->value.v_float);
1470 break;
1472 case G_TOKEN_STRING:
1473 if (expected_token == G_TOKEN_STRING)
1474 print_unexp = FALSE;
1475 _g_snprintf (token_string,
1476 token_string_len,
1477 "%s%sstring constant \"%s\"",
1478 print_unexp ? "" : "invalid ",
1479 scanner->value.v_string[0] == 0 ? "empty " : "",
1480 scanner->value.v_string);
1481 token_string[token_string_len - 2] = '"';
1482 token_string[token_string_len - 1] = 0;
1483 break;
1485 case G_TOKEN_COMMENT_SINGLE:
1486 case G_TOKEN_COMMENT_MULTI:
1487 _g_snprintf (token_string, token_string_len, "comment");
1488 break;
1490 case G_TOKEN_NONE:
1491 /* somehow the user's parsing code is screwed, there isn't much
1492 * we can do about it.
1493 * Note, a common case to trigger this is
1494 * g_scanner_peek_next_token(); g_scanner_unexp_token();
1495 * without an intermediate g_scanner_get_next_token().
1497 g_assert_not_reached ();
1498 break;
1502 switch (expected_token)
1504 gboolean need_valid;
1505 gchar *tstring;
1506 case G_TOKEN_EOF:
1507 _g_snprintf (expected_string, expected_string_len, "end of file");
1508 break;
1509 default:
1510 if (expected_token >= 1 && expected_token <= 255)
1512 if ((expected_token >= ' ' && expected_token <= '~') ||
1513 strchr (scanner->config->cset_identifier_first, expected_token) ||
1514 strchr (scanner->config->cset_identifier_nth, expected_token))
1515 _g_snprintf (expected_string, expected_string_len, "character '%c'", expected_token);
1516 else
1517 _g_snprintf (expected_string, expected_string_len, "character '\\%o'", expected_token);
1518 break;
1520 else if (!scanner->config->symbol_2_token)
1522 _g_snprintf (expected_string, expected_string_len, "(unknown) token <%d>", expected_token);
1523 break;
1525 /* fall through */
1526 case G_TOKEN_SYMBOL:
1527 need_valid = (scanner->token == G_TOKEN_SYMBOL ||
1528 (scanner->config->symbol_2_token &&
1529 scanner->token > G_TOKEN_LAST));
1530 _g_snprintf (expected_string,
1531 expected_string_len,
1532 "%s%s",
1533 need_valid ? "valid " : "",
1534 symbol_spec);
1535 /* FIXME: should we attempt to lookup the symbol_name for symbol_2_token? */
1536 break;
1537 case G_TOKEN_CHAR:
1538 _g_snprintf (expected_string, expected_string_len, "%scharacter",
1539 scanner->token == G_TOKEN_CHAR ? "valid " : "");
1540 break;
1541 case G_TOKEN_BINARY:
1542 tstring = "binary";
1543 _g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
1544 scanner->token == expected_token ? "valid " : "", tstring);
1545 break;
1546 case G_TOKEN_OCTAL:
1547 tstring = "octal";
1548 _g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
1549 scanner->token == expected_token ? "valid " : "", tstring);
1550 break;
1551 case G_TOKEN_INT:
1552 tstring = "integer";
1553 _g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
1554 scanner->token == expected_token ? "valid " : "", tstring);
1555 break;
1556 case G_TOKEN_HEX:
1557 tstring = "hexadecimal";
1558 _g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
1559 scanner->token == expected_token ? "valid " : "", tstring);
1560 break;
1561 case G_TOKEN_FLOAT:
1562 tstring = "float";
1563 _g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
1564 scanner->token == expected_token ? "valid " : "", tstring);
1565 break;
1566 case G_TOKEN_STRING:
1567 _g_snprintf (expected_string,
1568 expected_string_len,
1569 "%sstring constant",
1570 scanner->token == G_TOKEN_STRING ? "valid " : "");
1571 break;
1572 case G_TOKEN_IDENTIFIER:
1573 case G_TOKEN_IDENTIFIER_NULL:
1574 need_valid = (scanner->token == G_TOKEN_IDENTIFIER_NULL ||
1575 scanner->token == G_TOKEN_IDENTIFIER);
1576 _g_snprintf (expected_string,
1577 expected_string_len,
1578 "%s%s",
1579 need_valid ? "valid " : "",
1580 identifier_spec);
1581 break;
1582 case G_TOKEN_COMMENT_SINGLE:
1583 tstring = "single-line";
1584 _g_snprintf (expected_string, expected_string_len, "%scomment (%s)",
1585 scanner->token == expected_token ? "valid " : "", tstring);
1586 break;
1587 case G_TOKEN_COMMENT_MULTI:
1588 tstring = "multi-line";
1589 _g_snprintf (expected_string, expected_string_len, "%scomment (%s)",
1590 scanner->token == expected_token ? "valid " : "", tstring);
1591 break;
1592 case G_TOKEN_NONE:
1593 case G_TOKEN_ERROR:
1594 /* this is handled upon printout */
1595 break;
1598 if (message && message[0] != 0)
1599 message_prefix = " - ";
1600 else
1602 message_prefix = "";
1603 message = "";
1605 if (expected_token == G_TOKEN_ERROR)
1607 msg_handler (scanner,
1608 "failure around %s%s%s",
1609 token_string,
1610 message_prefix,
1611 message);
1613 else if (expected_token == G_TOKEN_NONE)
1615 if (print_unexp)
1616 msg_handler (scanner,
1617 "unexpected %s%s%s",
1618 token_string,
1619 message_prefix,
1620 message);
1621 else
1622 msg_handler (scanner,
1623 "%s%s%s",
1624 token_string,
1625 message_prefix,
1626 message);
1628 else
1630 if (print_unexp)
1631 msg_handler (scanner,
1632 "unexpected %s, expected %s%s%s",
1633 token_string,
1634 expected_string,
1635 message_prefix,
1636 message);
1637 else
1638 msg_handler (scanner,
1639 "%s, expected %s%s%s",
1640 token_string,
1641 expected_string,
1642 message_prefix,
1643 message);
1646 g_free (token_string);
1647 g_free (expected_string);
1650 static void
1651 g_scanner_get_token_i (GScanner *scanner,
1652 GTokenType *token_p,
1653 GTokenValue *value_p,
1654 guint *line_p,
1655 guint *position_p)
1659 g_scanner_free_value (token_p, value_p);
1660 g_scanner_get_token_ll (scanner, token_p, value_p, line_p, position_p);
1662 while (((*token_p > 0 && *token_p < 256) &&
1663 strchr (scanner->config->cset_skip_characters, *token_p)) ||
1664 (*token_p == G_TOKEN_CHAR &&
1665 strchr (scanner->config->cset_skip_characters, value_p->v_char)) ||
1666 (*token_p == G_TOKEN_COMMENT_MULTI &&
1667 scanner->config->skip_comment_multi) ||
1668 (*token_p == G_TOKEN_COMMENT_SINGLE &&
1669 scanner->config->skip_comment_single));
1671 switch (*token_p)
1673 case G_TOKEN_IDENTIFIER:
1674 if (scanner->config->identifier_2_string)
1675 *token_p = G_TOKEN_STRING;
1676 break;
1678 case G_TOKEN_SYMBOL:
1679 if (scanner->config->symbol_2_token)
1680 *token_p = (GTokenType) value_p->v_symbol;
1681 break;
1683 case G_TOKEN_BINARY:
1684 case G_TOKEN_OCTAL:
1685 case G_TOKEN_HEX:
1686 if (scanner->config->numbers_2_int)
1687 *token_p = G_TOKEN_INT;
1688 break;
1690 default:
1691 break;
1694 if (*token_p == G_TOKEN_INT &&
1695 scanner->config->int_2_float)
1697 *token_p = G_TOKEN_FLOAT;
1698 if (scanner->config->store_int64)
1700 #ifdef _MSC_VER
1701 /* work around error C2520, see gvaluetransform.c */
1702 value_p->v_float = (__int64)value_p->v_int64;
1703 #else
1704 value_p->v_float = value_p->v_int64;
1705 #endif
1707 else
1708 value_p->v_float = value_p->v_int;
1711 errno = 0;
1714 static void
1715 g_scanner_get_token_ll (GScanner *scanner,
1716 GTokenType *token_p,
1717 GTokenValue *value_p,
1718 guint *line_p,
1719 guint *position_p)
1721 GScannerConfig *config;
1722 GTokenType token;
1723 gboolean in_comment_multi;
1724 gboolean in_comment_single;
1725 gboolean in_string_sq;
1726 gboolean in_string_dq;
1727 GString *gstring;
1728 GTokenValue value;
1729 guchar ch;
1731 config = scanner->config;
1732 (*value_p).v_int64 = 0;
1734 if ((scanner->text >= scanner->text_end && scanner->input_fd < 0) ||
1735 scanner->token == G_TOKEN_EOF)
1737 *token_p = G_TOKEN_EOF;
1738 return;
1741 in_comment_multi = FALSE;
1742 in_comment_single = FALSE;
1743 in_string_sq = FALSE;
1744 in_string_dq = FALSE;
1745 gstring = NULL;
1747 do /* while (ch != 0) */
1749 gboolean dotted_float = FALSE;
1751 ch = g_scanner_get_char (scanner, line_p, position_p);
1753 value.v_int64 = 0;
1754 token = G_TOKEN_NONE;
1756 /* this is *evil*, but needed ;(
1757 * we first check for identifier first character, because it
1758 * might interfere with other key chars like slashes or numbers
1760 if (config->scan_identifier &&
1761 ch && strchr (config->cset_identifier_first, ch))
1762 goto identifier_precedence;
1764 switch (ch)
1766 case 0:
1767 token = G_TOKEN_EOF;
1768 (*position_p)++;
1769 /* ch = 0; */
1770 break;
1772 case '/':
1773 if (!config->scan_comment_multi ||
1774 g_scanner_peek_next_char (scanner) != '*')
1775 goto default_case;
1776 g_scanner_get_char (scanner, line_p, position_p);
1777 token = G_TOKEN_COMMENT_MULTI;
1778 in_comment_multi = TRUE;
1779 gstring = g_string_new (NULL);
1780 while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0)
1782 if (ch == '*' && g_scanner_peek_next_char (scanner) == '/')
1784 g_scanner_get_char (scanner, line_p, position_p);
1785 in_comment_multi = FALSE;
1786 break;
1788 else
1789 gstring = g_string_append_c (gstring, ch);
1791 ch = 0;
1792 break;
1794 case '\'':
1795 if (!config->scan_string_sq)
1796 goto default_case;
1797 token = G_TOKEN_STRING;
1798 in_string_sq = TRUE;
1799 gstring = g_string_new (NULL);
1800 while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0)
1802 if (ch == '\'')
1804 in_string_sq = FALSE;
1805 break;
1807 else
1808 gstring = g_string_append_c (gstring, ch);
1810 ch = 0;
1811 break;
1813 case '"':
1814 if (!config->scan_string_dq)
1815 goto default_case;
1816 token = G_TOKEN_STRING;
1817 in_string_dq = TRUE;
1818 gstring = g_string_new (NULL);
1819 while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0)
1821 if (ch == '"')
1823 in_string_dq = FALSE;
1824 break;
1826 else
1828 if (ch == '\\')
1830 ch = g_scanner_get_char (scanner, line_p, position_p);
1831 switch (ch)
1833 guint i;
1834 guint fchar;
1836 case 0:
1837 break;
1839 case '\\':
1840 gstring = g_string_append_c (gstring, '\\');
1841 break;
1843 case 'n':
1844 gstring = g_string_append_c (gstring, '\n');
1845 break;
1847 case 't':
1848 gstring = g_string_append_c (gstring, '\t');
1849 break;
1851 case 'r':
1852 gstring = g_string_append_c (gstring, '\r');
1853 break;
1855 case 'b':
1856 gstring = g_string_append_c (gstring, '\b');
1857 break;
1859 case 'f':
1860 gstring = g_string_append_c (gstring, '\f');
1861 break;
1863 case '0':
1864 case '1':
1865 case '2':
1866 case '3':
1867 case '4':
1868 case '5':
1869 case '6':
1870 case '7':
1871 i = ch - '0';
1872 fchar = g_scanner_peek_next_char (scanner);
1873 if (fchar >= '0' && fchar <= '7')
1875 ch = g_scanner_get_char (scanner, line_p, position_p);
1876 i = i * 8 + ch - '0';
1877 fchar = g_scanner_peek_next_char (scanner);
1878 if (fchar >= '0' && fchar <= '7')
1880 ch = g_scanner_get_char (scanner, line_p, position_p);
1881 i = i * 8 + ch - '0';
1884 gstring = g_string_append_c (gstring, i);
1885 break;
1887 default:
1888 gstring = g_string_append_c (gstring, ch);
1889 break;
1892 else
1893 gstring = g_string_append_c (gstring, ch);
1896 ch = 0;
1897 break;
1899 case '.':
1900 if (!config->scan_float)
1901 goto default_case;
1902 token = G_TOKEN_FLOAT;
1903 dotted_float = TRUE;
1904 ch = g_scanner_get_char (scanner, line_p, position_p);
1905 goto number_parsing;
1907 case '$':
1908 if (!config->scan_hex_dollar)
1909 goto default_case;
1910 token = G_TOKEN_HEX;
1911 ch = g_scanner_get_char (scanner, line_p, position_p);
1912 goto number_parsing;
1914 case '0':
1915 if (config->scan_octal)
1916 token = G_TOKEN_OCTAL;
1917 else
1918 token = G_TOKEN_INT;
1919 ch = g_scanner_peek_next_char (scanner);
1920 if (config->scan_hex && (ch == 'x' || ch == 'X'))
1922 token = G_TOKEN_HEX;
1923 g_scanner_get_char (scanner, line_p, position_p);
1924 ch = g_scanner_get_char (scanner, line_p, position_p);
1925 if (ch == 0)
1927 token = G_TOKEN_ERROR;
1928 value.v_error = G_ERR_UNEXP_EOF;
1929 (*position_p)++;
1930 break;
1932 if (g_scanner_char_2_num (ch, 16) < 0)
1934 token = G_TOKEN_ERROR;
1935 value.v_error = G_ERR_DIGIT_RADIX;
1936 ch = 0;
1937 break;
1940 else if (config->scan_binary && (ch == 'b' || ch == 'B'))
1942 token = G_TOKEN_BINARY;
1943 g_scanner_get_char (scanner, line_p, position_p);
1944 ch = g_scanner_get_char (scanner, line_p, position_p);
1945 if (ch == 0)
1947 token = G_TOKEN_ERROR;
1948 value.v_error = G_ERR_UNEXP_EOF;
1949 (*position_p)++;
1950 break;
1952 if (g_scanner_char_2_num (ch, 10) < 0)
1954 token = G_TOKEN_ERROR;
1955 value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1956 ch = 0;
1957 break;
1960 else
1961 ch = '0';
1962 /* fall through */
1963 case '1':
1964 case '2':
1965 case '3':
1966 case '4':
1967 case '5':
1968 case '6':
1969 case '7':
1970 case '8':
1971 case '9':
1972 number_parsing:
1974 gboolean in_number = TRUE;
1975 gchar *endptr;
1977 if (token == G_TOKEN_NONE)
1978 token = G_TOKEN_INT;
1980 gstring = g_string_new (dotted_float ? "0." : "");
1981 gstring = g_string_append_c (gstring, ch);
1983 do /* while (in_number) */
1985 gboolean is_E;
1987 is_E = token == G_TOKEN_FLOAT && (ch == 'e' || ch == 'E');
1989 ch = g_scanner_peek_next_char (scanner);
1991 if (g_scanner_char_2_num (ch, 36) >= 0 ||
1992 (config->scan_float && ch == '.') ||
1993 (is_E && (ch == '+' || ch == '-')))
1995 ch = g_scanner_get_char (scanner, line_p, position_p);
1997 switch (ch)
1999 case '.':
2000 if (token != G_TOKEN_INT && token != G_TOKEN_OCTAL)
2002 value.v_error = token == G_TOKEN_FLOAT ? G_ERR_FLOAT_MALFORMED : G_ERR_FLOAT_RADIX;
2003 token = G_TOKEN_ERROR;
2004 in_number = FALSE;
2006 else
2008 token = G_TOKEN_FLOAT;
2009 gstring = g_string_append_c (gstring, ch);
2011 break;
2013 case '0':
2014 case '1':
2015 case '2':
2016 case '3':
2017 case '4':
2018 case '5':
2019 case '6':
2020 case '7':
2021 case '8':
2022 case '9':
2023 gstring = g_string_append_c (gstring, ch);
2024 break;
2026 case '-':
2027 case '+':
2028 if (token != G_TOKEN_FLOAT)
2030 token = G_TOKEN_ERROR;
2031 value.v_error = G_ERR_NON_DIGIT_IN_CONST;
2032 in_number = FALSE;
2034 else
2035 gstring = g_string_append_c (gstring, ch);
2036 break;
2038 case 'e':
2039 case 'E':
2040 if ((token != G_TOKEN_HEX && !config->scan_float) ||
2041 (token != G_TOKEN_HEX &&
2042 token != G_TOKEN_OCTAL &&
2043 token != G_TOKEN_FLOAT &&
2044 token != G_TOKEN_INT))
2046 token = G_TOKEN_ERROR;
2047 value.v_error = G_ERR_NON_DIGIT_IN_CONST;
2048 in_number = FALSE;
2050 else
2052 if (token != G_TOKEN_HEX)
2053 token = G_TOKEN_FLOAT;
2054 gstring = g_string_append_c (gstring, ch);
2056 break;
2058 default:
2059 if (token != G_TOKEN_HEX)
2061 token = G_TOKEN_ERROR;
2062 value.v_error = G_ERR_NON_DIGIT_IN_CONST;
2063 in_number = FALSE;
2065 else
2066 gstring = g_string_append_c (gstring, ch);
2067 break;
2070 else
2071 in_number = FALSE;
2073 while (in_number);
2075 endptr = NULL;
2076 if (token == G_TOKEN_FLOAT)
2077 value.v_float = g_strtod (gstring->str, &endptr);
2078 else
2080 guint64 ui64 = 0;
2081 switch (token)
2083 case G_TOKEN_BINARY:
2084 ui64 = g_ascii_strtoull (gstring->str, &endptr, 2);
2085 break;
2086 case G_TOKEN_OCTAL:
2087 ui64 = g_ascii_strtoull (gstring->str, &endptr, 8);
2088 break;
2089 case G_TOKEN_INT:
2090 ui64 = g_ascii_strtoull (gstring->str, &endptr, 10);
2091 break;
2092 case G_TOKEN_HEX:
2093 ui64 = g_ascii_strtoull (gstring->str, &endptr, 16);
2094 break;
2095 default: ;
2097 if (scanner->config->store_int64)
2098 value.v_int64 = ui64;
2099 else
2100 value.v_int = ui64;
2102 if (endptr && *endptr)
2104 token = G_TOKEN_ERROR;
2105 if (*endptr == 'e' || *endptr == 'E')
2106 value.v_error = G_ERR_NON_DIGIT_IN_CONST;
2107 else
2108 value.v_error = G_ERR_DIGIT_RADIX;
2110 g_string_free (gstring, TRUE);
2111 gstring = NULL;
2112 ch = 0;
2113 } /* number_parsing:... */
2114 break;
2116 default:
2117 default_case:
2119 if (config->cpair_comment_single &&
2120 ch == config->cpair_comment_single[0])
2122 token = G_TOKEN_COMMENT_SINGLE;
2123 in_comment_single = TRUE;
2124 gstring = g_string_new (NULL);
2125 ch = g_scanner_get_char (scanner, line_p, position_p);
2126 while (ch != 0)
2128 if (ch == config->cpair_comment_single[1])
2130 in_comment_single = FALSE;
2131 ch = 0;
2132 break;
2135 gstring = g_string_append_c (gstring, ch);
2136 ch = g_scanner_get_char (scanner, line_p, position_p);
2138 /* ignore a missing newline at EOF for single line comments */
2139 if (in_comment_single &&
2140 config->cpair_comment_single[1] == '\n')
2141 in_comment_single = FALSE;
2143 else if (config->scan_identifier && ch &&
2144 strchr (config->cset_identifier_first, ch))
2146 identifier_precedence:
2148 if (config->cset_identifier_nth && ch &&
2149 strchr (config->cset_identifier_nth,
2150 g_scanner_peek_next_char (scanner)))
2152 token = G_TOKEN_IDENTIFIER;
2153 gstring = g_string_new (NULL);
2154 gstring = g_string_append_c (gstring, ch);
2157 ch = g_scanner_get_char (scanner, line_p, position_p);
2158 gstring = g_string_append_c (gstring, ch);
2159 ch = g_scanner_peek_next_char (scanner);
2161 while (ch && strchr (config->cset_identifier_nth, ch));
2162 ch = 0;
2164 else if (config->scan_identifier_1char)
2166 token = G_TOKEN_IDENTIFIER;
2167 value.v_identifier = g_new0 (gchar, 2);
2168 value.v_identifier[0] = ch;
2169 ch = 0;
2172 if (ch)
2174 if (config->char_2_token)
2175 token = ch;
2176 else
2178 token = G_TOKEN_CHAR;
2179 value.v_char = ch;
2181 ch = 0;
2183 } /* default_case:... */
2184 break;
2186 g_assert (ch == 0 && token != G_TOKEN_NONE); /* paranoid */
2188 while (ch != 0);
2190 if (in_comment_multi || in_comment_single ||
2191 in_string_sq || in_string_dq)
2193 token = G_TOKEN_ERROR;
2194 if (gstring)
2196 g_string_free (gstring, TRUE);
2197 gstring = NULL;
2199 (*position_p)++;
2200 if (in_comment_multi || in_comment_single)
2201 value.v_error = G_ERR_UNEXP_EOF_IN_COMMENT;
2202 else /* (in_string_sq || in_string_dq) */
2203 value.v_error = G_ERR_UNEXP_EOF_IN_STRING;
2206 if (gstring)
2208 value.v_string = g_string_free (gstring, FALSE);
2209 gstring = NULL;
2212 if (token == G_TOKEN_IDENTIFIER)
2214 if (config->scan_symbols)
2216 GScannerKey *key;
2217 guint scope_id;
2219 scope_id = scanner->scope_id;
2220 key = g_scanner_lookup_internal (scanner, scope_id, value.v_identifier);
2221 if (!key && scope_id && scanner->config->scope_0_fallback)
2222 key = g_scanner_lookup_internal (scanner, 0, value.v_identifier);
2224 if (key)
2226 g_free (value.v_identifier);
2227 token = G_TOKEN_SYMBOL;
2228 value.v_symbol = key->value;
2232 if (token == G_TOKEN_IDENTIFIER &&
2233 config->scan_identifier_NULL &&
2234 strlen (value.v_identifier) == 4)
2236 gchar *null_upper = "NULL";
2237 gchar *null_lower = "null";
2239 if (scanner->config->case_sensitive)
2241 if (value.v_identifier[0] == null_upper[0] &&
2242 value.v_identifier[1] == null_upper[1] &&
2243 value.v_identifier[2] == null_upper[2] &&
2244 value.v_identifier[3] == null_upper[3])
2245 token = G_TOKEN_IDENTIFIER_NULL;
2247 else
2249 if ((value.v_identifier[0] == null_upper[0] ||
2250 value.v_identifier[0] == null_lower[0]) &&
2251 (value.v_identifier[1] == null_upper[1] ||
2252 value.v_identifier[1] == null_lower[1]) &&
2253 (value.v_identifier[2] == null_upper[2] ||
2254 value.v_identifier[2] == null_lower[2]) &&
2255 (value.v_identifier[3] == null_upper[3] ||
2256 value.v_identifier[3] == null_lower[3]))
2257 token = G_TOKEN_IDENTIFIER_NULL;
2262 *token_p = token;
2263 *value_p = value;