*** empty log message ***
[glib.git] / gscanner.c
blob832ca31fb1e0b06a33f0061d14b721710153d551
1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
4 * GScanner: Flexible lexical scanner for general purpose.
5 * Copyright (C) 1997, 1998 Tim Janik
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
17 * You should have received a copy of the GNU Library General Public
18 * License along with this library; if not, write to the
19 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 * Boston, MA 02111-1307, USA.
24 * Modified by the GLib Team and others 1997-1999. See the AUTHORS
25 * file for a list of people on the GLib Team. See the ChangeLog
26 * files for a list of changes. These files are distributed with
27 * GLib at ftp://ftp.gtk.org/pub/gtk/.
30 /*
31 * MT safe
34 #define __gscanner_c__
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
40 #include <stdlib.h>
41 #include <stdarg.h>
42 #include <string.h>
43 #include <stdio.h>
44 #include "glib.h"
45 #ifdef HAVE_UNISTD_H
46 #include <unistd.h>
47 #endif
48 #include <errno.h>
49 #include <sys/types.h> /* needed for sys/stat.h */
50 #include <sys/stat.h>
51 #ifdef NATIVE_WIN32
52 #include <io.h> /* For _read() */
53 #endif
55 /* --- defines --- */
56 #define to_lower(c) ( \
57 (guchar) ( \
58 ( (((guchar)(c))>='A' && ((guchar)(c))<='Z') * ('a'-'A') ) | \
59 ( (((guchar)(c))>=192 && ((guchar)(c))<=214) * (224-192) ) | \
60 ( (((guchar)(c))>=216 && ((guchar)(c))<=222) * (248-216) ) | \
61 ((guchar)(c)) \
62 ) \
64 #define READ_BUFFER_SIZE (4000)
67 /* --- typedefs --- */
68 typedef struct _GScannerKey GScannerKey;
70 struct _GScannerKey
72 guint scope_id;
73 gchar *symbol;
74 gpointer value;
79 /* --- variables --- */
80 static GScannerConfig g_scanner_config_template =
83 " \t\r\n"
84 ) /* cset_skip_characters */,
86 G_CSET_a_2_z
87 "_"
88 G_CSET_A_2_Z
89 ) /* cset_identifier_first */,
91 G_CSET_a_2_z
92 "_0123456789"
93 G_CSET_A_2_Z
94 G_CSET_LATINS
95 G_CSET_LATINC
96 ) /* cset_identifier_nth */,
97 ( "#\n" ) /* cpair_comment_single */,
99 FALSE /* case_sensitive */,
101 TRUE /* skip_comment_multi */,
102 TRUE /* skip_comment_single */,
103 TRUE /* scan_comment_multi */,
104 TRUE /* scan_identifier */,
105 FALSE /* scan_identifier_1char */,
106 FALSE /* scan_identifier_NULL */,
107 TRUE /* scan_symbols */,
108 FALSE /* scan_binary */,
109 TRUE /* scan_octal */,
110 TRUE /* scan_float */,
111 TRUE /* scan_hex */,
112 FALSE /* scan_hex_dollar */,
113 TRUE /* scan_string_sq */,
114 TRUE /* scan_string_dq */,
115 TRUE /* numbers_2_int */,
116 FALSE /* int_2_float */,
117 FALSE /* identifier_2_string */,
118 TRUE /* char_2_token */,
119 FALSE /* symbol_2_token */,
120 FALSE /* scope_0_fallback */,
124 /* --- prototypes --- */
125 static inline
126 GScannerKey* g_scanner_lookup_internal (GScanner *scanner,
127 guint scope_id,
128 const gchar *symbol);
129 static gint g_scanner_key_equal (gconstpointer v1,
130 gconstpointer v2);
131 static guint g_scanner_key_hash (gconstpointer v);
132 static void g_scanner_get_token_ll (GScanner *scanner,
133 GTokenType *token_p,
134 GTokenValue *value_p,
135 guint *line_p,
136 guint *position_p);
137 static void g_scanner_get_token_i (GScanner *scanner,
138 GTokenType *token_p,
139 GTokenValue *value_p,
140 guint *line_p,
141 guint *position_p);
143 static guchar g_scanner_peek_next_char (GScanner *scanner);
144 static guchar g_scanner_get_char (GScanner *scanner,
145 guint *line_p,
146 guint *position_p);
147 static void g_scanner_msg_handler (GScanner *scanner,
148 gchar *message,
149 gint is_error);
152 /* --- functions --- */
153 static inline gint
154 g_scanner_char_2_num (guchar c,
155 guchar base)
157 if (c >= '0' && c <= '9')
158 c -= '0';
159 else if (c >= 'A' && c <= 'Z')
160 c -= 'A' - 10;
161 else if (c >= 'a' && c <= 'z')
162 c -= 'a' - 10;
163 else
164 return -1;
166 if (c < base)
167 return c;
169 return -1;
172 GScanner*
173 g_scanner_new (GScannerConfig *config_templ)
175 GScanner *scanner;
177 if (!config_templ)
178 config_templ = &g_scanner_config_template;
180 scanner = g_new0 (GScanner, 1);
182 scanner->user_data = NULL;
183 scanner->max_parse_errors = 0;
184 scanner->parse_errors = 0;
185 scanner->input_name = NULL;
186 scanner->derived_data = NULL;
188 scanner->config = g_new0 (GScannerConfig, 1);
190 scanner->config->case_sensitive = config_templ->case_sensitive;
191 scanner->config->cset_skip_characters = config_templ->cset_skip_characters;
192 scanner->config->cset_identifier_first= config_templ->cset_identifier_first;
193 scanner->config->cset_identifier_nth = config_templ->cset_identifier_nth;
194 scanner->config->cpair_comment_single = config_templ->cpair_comment_single;
195 scanner->config->skip_comment_multi = config_templ->skip_comment_multi;
196 scanner->config->skip_comment_single = config_templ->skip_comment_single;
197 scanner->config->scan_comment_multi = config_templ->scan_comment_multi;
198 scanner->config->scan_identifier = config_templ->scan_identifier;
199 scanner->config->scan_identifier_1char= config_templ->scan_identifier_1char;
200 scanner->config->scan_identifier_NULL = config_templ->scan_identifier_NULL;
201 scanner->config->scan_symbols = config_templ->scan_symbols;
202 scanner->config->scan_binary = config_templ->scan_binary;
203 scanner->config->scan_octal = config_templ->scan_octal;
204 scanner->config->scan_float = config_templ->scan_float;
205 scanner->config->scan_hex = config_templ->scan_hex;
206 scanner->config->scan_hex_dollar = config_templ->scan_hex_dollar;
207 scanner->config->scan_string_sq = config_templ->scan_string_sq;
208 scanner->config->scan_string_dq = config_templ->scan_string_dq;
209 scanner->config->numbers_2_int = config_templ->numbers_2_int;
210 scanner->config->int_2_float = config_templ->int_2_float;
211 scanner->config->identifier_2_string = config_templ->identifier_2_string;
212 scanner->config->char_2_token = config_templ->char_2_token;
213 scanner->config->symbol_2_token = config_templ->symbol_2_token;
214 scanner->config->scope_0_fallback = config_templ->scope_0_fallback;
216 scanner->token = G_TOKEN_NONE;
217 scanner->value.v_int = 0;
218 scanner->line = 1;
219 scanner->position = 0;
221 scanner->next_token = G_TOKEN_NONE;
222 scanner->next_value.v_int = 0;
223 scanner->next_line = 1;
224 scanner->next_position = 0;
226 scanner->symbol_table = g_hash_table_new (g_scanner_key_hash, g_scanner_key_equal);
227 scanner->input_fd = -1;
228 scanner->text = NULL;
229 scanner->text_end = NULL;
230 scanner->buffer = NULL;
231 scanner->scope_id = 0;
233 scanner->msg_handler = g_scanner_msg_handler;
235 return scanner;
238 static inline void
239 g_scanner_free_value (GTokenType *token_p,
240 GTokenValue *value_p)
242 switch (*token_p)
244 case G_TOKEN_STRING:
245 case G_TOKEN_IDENTIFIER:
246 case G_TOKEN_IDENTIFIER_NULL:
247 case G_TOKEN_COMMENT_SINGLE:
248 case G_TOKEN_COMMENT_MULTI:
249 g_free (value_p->v_string);
250 break;
252 default:
253 break;
256 *token_p = G_TOKEN_NONE;
259 static void
260 g_scanner_destroy_symbol_table_entry (gpointer _key,
261 gpointer _value,
262 gpointer _data)
264 GScannerKey *key = _key;
266 g_free (key->symbol);
267 g_free (key);
270 void
271 g_scanner_destroy (GScanner *scanner)
273 g_return_if_fail (scanner != NULL);
275 g_hash_table_foreach (scanner->symbol_table,
276 g_scanner_destroy_symbol_table_entry, NULL);
277 g_hash_table_destroy (scanner->symbol_table);
278 g_scanner_free_value (&scanner->token, &scanner->value);
279 g_scanner_free_value (&scanner->next_token, &scanner->next_value);
280 g_free (scanner->config);
281 g_free (scanner->buffer);
282 g_free (scanner);
285 static void
286 g_scanner_msg_handler (GScanner *scanner,
287 gchar *message,
288 gint is_error)
290 g_return_if_fail (scanner != NULL);
292 fprintf (stdout, "%s:%d: ", scanner->input_name, scanner->line);
293 if (is_error)
294 fprintf (stdout, "error: ");
295 fprintf (stdout, "%s\n", message);
298 void
299 g_scanner_error (GScanner *scanner,
300 const gchar *format,
301 ...)
303 g_return_if_fail (scanner != NULL);
304 g_return_if_fail (format != NULL);
306 scanner->parse_errors++;
308 if (scanner->msg_handler)
310 va_list args;
311 gchar *string;
313 va_start (args, format);
314 string = g_strdup_vprintf (format, args);
315 va_end (args);
317 scanner->msg_handler (scanner, string, TRUE);
319 g_free (string);
323 void
324 g_scanner_warn (GScanner *scanner,
325 const gchar *format,
326 ...)
328 g_return_if_fail (scanner != NULL);
329 g_return_if_fail (format != NULL);
331 if (scanner->msg_handler)
333 va_list args;
334 gchar *string;
336 va_start (args, format);
337 string = g_strdup_vprintf (format, args);
338 va_end (args);
340 scanner->msg_handler (scanner, string, FALSE);
342 g_free (string);
346 static gint
347 g_scanner_key_equal (gconstpointer v1,
348 gconstpointer v2)
350 const GScannerKey *key1 = v1;
351 const GScannerKey *key2 = v2;
353 return (key1->scope_id == key2->scope_id) && (strcmp (key1->symbol, key2->symbol) == 0);
356 static guint
357 g_scanner_key_hash (gconstpointer v)
359 const GScannerKey *key = v;
360 gchar *c;
361 guint h;
363 h = key->scope_id;
364 for (c = key->symbol; *c; c++)
366 guint g;
368 h = (h << 4) + *c;
369 g = h & 0xf0000000;
370 if (g)
372 h = h ^ (g >> 24);
373 h = h ^ g;
377 return h;
380 static inline GScannerKey*
381 g_scanner_lookup_internal (GScanner *scanner,
382 guint scope_id,
383 const gchar *symbol)
385 GScannerKey *key_p;
386 GScannerKey key;
388 key.scope_id = scope_id;
390 if (!scanner->config->case_sensitive)
392 gchar *d;
393 const gchar *c;
395 key.symbol = g_new (gchar, strlen (symbol) + 1);
396 for (d = key.symbol, c = symbol; *c; c++, d++)
397 *d = to_lower (*c);
398 *d = 0;
399 key_p = g_hash_table_lookup (scanner->symbol_table, &key);
400 g_free (key.symbol);
402 else
404 key.symbol = (gchar*) symbol;
405 key_p = g_hash_table_lookup (scanner->symbol_table, &key);
408 return key_p;
411 void
412 g_scanner_scope_add_symbol (GScanner *scanner,
413 guint scope_id,
414 const gchar *symbol,
415 gpointer value)
417 GScannerKey *key;
419 g_return_if_fail (scanner != NULL);
420 g_return_if_fail (symbol != NULL);
422 key = g_scanner_lookup_internal (scanner, scope_id, symbol);
424 if (!key)
426 key = g_new (GScannerKey, 1);
427 key->scope_id = scope_id;
428 key->symbol = g_strdup (symbol);
429 key->value = value;
430 if (!scanner->config->case_sensitive)
432 gchar *c;
434 c = key->symbol;
435 while (*c != 0)
437 *c = to_lower (*c);
438 c++;
441 g_hash_table_insert (scanner->symbol_table, key, key);
443 else
444 key->value = value;
447 void
448 g_scanner_scope_remove_symbol (GScanner *scanner,
449 guint scope_id,
450 const gchar *symbol)
452 GScannerKey *key;
454 g_return_if_fail (scanner != NULL);
455 g_return_if_fail (symbol != NULL);
457 key = g_scanner_lookup_internal (scanner, scope_id, symbol);
459 if (key)
461 g_hash_table_remove (scanner->symbol_table, key);
462 g_free (key->symbol);
463 g_free (key);
467 gpointer
468 g_scanner_lookup_symbol (GScanner *scanner,
469 const gchar *symbol)
471 GScannerKey *key;
472 guint scope_id;
474 g_return_val_if_fail (scanner != NULL, NULL);
476 if (!symbol)
477 return NULL;
479 scope_id = scanner->scope_id;
480 key = g_scanner_lookup_internal (scanner, scope_id, symbol);
481 if (!key && scope_id && scanner->config->scope_0_fallback)
482 key = g_scanner_lookup_internal (scanner, 0, symbol);
484 if (key)
485 return key->value;
486 else
487 return NULL;
490 gpointer
491 g_scanner_scope_lookup_symbol (GScanner *scanner,
492 guint scope_id,
493 const gchar *symbol)
495 GScannerKey *key;
497 g_return_val_if_fail (scanner != NULL, NULL);
499 if (!symbol)
500 return NULL;
502 key = g_scanner_lookup_internal (scanner, scope_id, symbol);
504 if (key)
505 return key->value;
506 else
507 return NULL;
510 guint
511 g_scanner_set_scope (GScanner *scanner,
512 guint scope_id)
514 guint old_scope_id;
516 g_return_val_if_fail (scanner != NULL, 0);
518 old_scope_id = scanner->scope_id;
519 scanner->scope_id = scope_id;
521 return old_scope_id;
524 static void
525 g_scanner_foreach_internal (gpointer _key,
526 gpointer _value,
527 gpointer _user_data)
529 GScannerKey *key;
530 gpointer *d;
531 GHFunc func;
532 gpointer user_data;
533 guint *scope_id;
535 d = _user_data;
536 func = (GHFunc) d[0];
537 user_data = d[1];
538 scope_id = d[2];
539 key = _value;
541 if (key->scope_id == *scope_id)
542 func (key->symbol, key->value, user_data);
545 void
546 g_scanner_scope_foreach_symbol (GScanner *scanner,
547 guint scope_id,
548 GHFunc func,
549 gpointer user_data)
551 gpointer d[3];
553 g_return_if_fail (scanner != NULL);
555 d[0] = (gpointer) func;
556 d[1] = user_data;
557 d[2] = &scope_id;
559 g_hash_table_foreach (scanner->symbol_table, g_scanner_foreach_internal, d);
562 void
563 g_scanner_freeze_symbol_table (GScanner *scanner)
565 g_return_if_fail (scanner != NULL);
567 g_hash_table_freeze (scanner->symbol_table);
570 void
571 g_scanner_thaw_symbol_table (GScanner *scanner)
573 g_return_if_fail (scanner != NULL);
575 g_hash_table_thaw (scanner->symbol_table);
578 GTokenType
579 g_scanner_peek_next_token (GScanner *scanner)
581 g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
583 if (scanner->next_token == G_TOKEN_NONE)
585 scanner->next_line = scanner->line;
586 scanner->next_position = scanner->position;
587 g_scanner_get_token_i (scanner,
588 &scanner->next_token,
589 &scanner->next_value,
590 &scanner->next_line,
591 &scanner->next_position);
594 return scanner->next_token;
597 GTokenType
598 g_scanner_get_next_token (GScanner *scanner)
600 g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
602 if (scanner->next_token != G_TOKEN_NONE)
604 g_scanner_free_value (&scanner->token, &scanner->value);
606 scanner->token = scanner->next_token;
607 scanner->value = scanner->next_value;
608 scanner->line = scanner->next_line;
609 scanner->position = scanner->next_position;
610 scanner->next_token = G_TOKEN_NONE;
612 else
613 g_scanner_get_token_i (scanner,
614 &scanner->token,
615 &scanner->value,
616 &scanner->line,
617 &scanner->position);
619 return scanner->token;
622 GTokenType
623 g_scanner_cur_token (GScanner *scanner)
625 g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
627 return scanner->token;
630 GTokenValue
631 g_scanner_cur_value (GScanner *scanner)
633 GTokenValue v;
635 v.v_int = 0;
637 g_return_val_if_fail (scanner != NULL, v);
639 /* MSC isn't capable of handling return scanner->value; ? */
641 v = scanner->value;
643 return v;
646 guint
647 g_scanner_cur_line (GScanner *scanner)
649 g_return_val_if_fail (scanner != NULL, 0);
651 return scanner->line;
654 guint
655 g_scanner_cur_position (GScanner *scanner)
657 g_return_val_if_fail (scanner != NULL, 0);
659 return scanner->position;
662 gboolean
663 g_scanner_eof (GScanner *scanner)
665 g_return_val_if_fail (scanner != NULL, TRUE);
667 return scanner->token == G_TOKEN_EOF;
670 void
671 g_scanner_input_file (GScanner *scanner,
672 gint input_fd)
674 g_return_if_fail (scanner != NULL);
675 g_return_if_fail (input_fd >= 0);
677 if (scanner->input_fd >= 0)
678 g_scanner_sync_file_offset (scanner);
680 scanner->token = G_TOKEN_NONE;
681 scanner->value.v_int = 0;
682 scanner->line = 1;
683 scanner->position = 0;
684 scanner->next_token = G_TOKEN_NONE;
686 scanner->input_fd = input_fd;
687 scanner->text = NULL;
688 scanner->text_end = NULL;
690 if (!scanner->buffer)
691 scanner->buffer = g_new (gchar, READ_BUFFER_SIZE + 1);
694 void
695 g_scanner_input_text (GScanner *scanner,
696 const gchar *text,
697 guint text_len)
699 g_return_if_fail (scanner != NULL);
700 if (text_len)
701 g_return_if_fail (text != NULL);
702 else
703 text = NULL;
705 if (scanner->input_fd >= 0)
706 g_scanner_sync_file_offset (scanner);
708 scanner->token = G_TOKEN_NONE;
709 scanner->value.v_int = 0;
710 scanner->line = 1;
711 scanner->position = 0;
712 scanner->next_token = G_TOKEN_NONE;
714 scanner->input_fd = -1;
715 scanner->text = text;
716 scanner->text_end = text + text_len;
718 if (scanner->buffer)
720 g_free (scanner->buffer);
721 scanner->buffer = NULL;
725 static guchar
726 g_scanner_peek_next_char (GScanner *scanner)
728 if (scanner->text < scanner->text_end)
730 return *scanner->text;
732 else if (scanner->input_fd >= 0)
734 gint count;
735 gchar *buffer;
737 buffer = scanner->buffer;
740 count = read (scanner->input_fd, buffer, READ_BUFFER_SIZE);
742 while (count == -1 && (errno == EINTR || errno == EAGAIN));
744 if (count < 1)
746 scanner->input_fd = -1;
748 return 0;
750 else
752 scanner->text = buffer;
753 scanner->text_end = buffer + count;
755 return *buffer;
758 else
759 return 0;
762 void
763 g_scanner_sync_file_offset (GScanner *scanner)
765 g_return_if_fail (scanner != NULL);
767 /* for file input, rewind the filedescriptor to the current
768 * buffer position and blow the file read ahead buffer. usefull for
769 * third party uses of our filedescriptor, which hooks onto the current
770 * scanning position.
773 if (scanner->input_fd >= 0 && scanner->text_end > scanner->text)
775 gint buffered;
777 buffered = scanner->text_end - scanner->text;
778 if (lseek (scanner->input_fd, - buffered, SEEK_CUR) >= 0)
780 /* we succeeded, blow our buffer's contents now */
781 scanner->text = NULL;
782 scanner->text_end = NULL;
784 else
785 errno = 0;
789 static guchar
790 g_scanner_get_char (GScanner *scanner,
791 guint *line_p,
792 guint *position_p)
794 guchar fchar;
796 if (scanner->text < scanner->text_end)
797 fchar = *(scanner->text++);
798 else if (scanner->input_fd >= 0)
800 gint count;
801 gchar *buffer;
803 buffer = scanner->buffer;
806 count = read (scanner->input_fd, buffer, READ_BUFFER_SIZE);
808 while (count == -1 && (errno == EINTR || errno == EAGAIN));
810 if (count < 1)
812 scanner->input_fd = -1;
813 fchar = 0;
815 else
817 scanner->text = buffer + 1;
818 scanner->text_end = buffer + count;
819 fchar = *buffer;
820 if (!fchar)
822 g_scanner_sync_file_offset (scanner);
823 scanner->text_end = scanner->text;
824 scanner->input_fd = -1;
828 else
829 fchar = 0;
831 if (fchar == '\n')
833 (*position_p) = 0;
834 (*line_p)++;
836 else if (fchar)
838 (*position_p)++;
841 return fchar;
844 void
845 g_scanner_unexp_token (GScanner *scanner,
846 GTokenType expected_token,
847 const gchar *identifier_spec,
848 const gchar *symbol_spec,
849 const gchar *symbol_name,
850 const gchar *message,
851 gint is_error)
853 gchar *token_string;
854 guint token_string_len;
855 gchar *expected_string;
856 guint expected_string_len;
857 gchar *message_prefix;
858 gboolean print_unexp;
859 void (*msg_handler) (GScanner*, const gchar*, ...);
861 g_return_if_fail (scanner != NULL);
863 if (is_error)
864 msg_handler = g_scanner_error;
865 else
866 msg_handler = g_scanner_warn;
868 if (!identifier_spec)
869 identifier_spec = "identifier";
870 if (!symbol_spec)
871 symbol_spec = "symbol";
873 token_string_len = 56;
874 token_string = g_new (gchar, token_string_len + 1);
875 expected_string_len = 64;
876 expected_string = g_new (gchar, expected_string_len + 1);
877 print_unexp = TRUE;
879 switch (scanner->token)
881 case G_TOKEN_EOF:
882 g_snprintf (token_string, token_string_len, "end of file");
883 break;
885 default:
886 if (scanner->token >= 1 && scanner->token <= 255)
888 if ((scanner->token >= ' ' && scanner->token <= '~') ||
889 strchr (scanner->config->cset_identifier_first, scanner->token) ||
890 strchr (scanner->config->cset_identifier_nth, scanner->token))
891 g_snprintf (token_string, expected_string_len, "character `%c'", scanner->token);
892 else
893 g_snprintf (token_string, expected_string_len, "character `\\%o'", scanner->token);
894 break;
896 else if (!scanner->config->symbol_2_token)
898 g_snprintf (token_string, token_string_len, "(unknown) token <%d>", scanner->token);
899 break;
901 /* fall through */
902 case G_TOKEN_SYMBOL:
903 if (expected_token == G_TOKEN_SYMBOL ||
904 (scanner->config->symbol_2_token &&
905 expected_token > G_TOKEN_LAST))
906 print_unexp = FALSE;
907 if (symbol_name)
908 g_snprintf (token_string,
909 token_string_len,
910 "%s%s `%s'",
911 print_unexp ? "" : "invalid ",
912 symbol_spec,
913 symbol_name);
914 else
915 g_snprintf (token_string,
916 token_string_len,
917 "%s%s",
918 print_unexp ? "" : "invalid ",
919 symbol_spec);
920 break;
922 case G_TOKEN_ERROR:
923 print_unexp = FALSE;
924 expected_token = G_TOKEN_NONE;
925 switch (scanner->value.v_error)
927 case G_ERR_UNEXP_EOF:
928 g_snprintf (token_string, token_string_len, "scanner: unexpected end of file");
929 break;
931 case G_ERR_UNEXP_EOF_IN_STRING:
932 g_snprintf (token_string, token_string_len, "scanner: unterminated string constant");
933 break;
935 case G_ERR_UNEXP_EOF_IN_COMMENT:
936 g_snprintf (token_string, token_string_len, "scanner: unterminated comment");
937 break;
939 case G_ERR_NON_DIGIT_IN_CONST:
940 g_snprintf (token_string, token_string_len, "scanner: non digit in constant");
941 break;
943 case G_ERR_FLOAT_RADIX:
944 g_snprintf (token_string, token_string_len, "scanner: invalid radix for floating constant");
945 break;
947 case G_ERR_FLOAT_MALFORMED:
948 g_snprintf (token_string, token_string_len, "scanner: malformed floating constant");
949 break;
951 case G_ERR_DIGIT_RADIX:
952 g_snprintf (token_string, token_string_len, "scanner: digit is beyond radix");
953 break;
955 case G_ERR_UNKNOWN:
956 default:
957 g_snprintf (token_string, token_string_len, "scanner: unknown error");
958 break;
960 break;
962 case G_TOKEN_CHAR:
963 g_snprintf (token_string, token_string_len, "character `%c'", scanner->value.v_char);
964 break;
966 case G_TOKEN_IDENTIFIER:
967 case G_TOKEN_IDENTIFIER_NULL:
968 if (expected_token == G_TOKEN_IDENTIFIER ||
969 expected_token == G_TOKEN_IDENTIFIER_NULL)
970 print_unexp = FALSE;
971 g_snprintf (token_string,
972 token_string_len,
973 "%s%s `%s'",
974 print_unexp ? "" : "invalid ",
975 identifier_spec,
976 scanner->value.v_string);
977 break;
979 case G_TOKEN_BINARY:
980 case G_TOKEN_OCTAL:
981 case G_TOKEN_INT:
982 case G_TOKEN_HEX:
983 g_snprintf (token_string, token_string_len, "number `%ld'", scanner->value.v_int);
984 break;
986 case G_TOKEN_FLOAT:
987 g_snprintf (token_string, token_string_len, "number `%.3f'", scanner->value.v_float);
988 break;
990 case G_TOKEN_STRING:
991 if (expected_token == G_TOKEN_STRING)
992 print_unexp = FALSE;
993 g_snprintf (token_string,
994 token_string_len,
995 "%s%sstring constant \"%s\"",
996 print_unexp ? "" : "invalid ",
997 scanner->value.v_string[0] == 0 ? "empty " : "",
998 scanner->value.v_string);
999 token_string[token_string_len - 2] = '"';
1000 token_string[token_string_len - 1] = 0;
1001 break;
1003 case G_TOKEN_COMMENT_SINGLE:
1004 case G_TOKEN_COMMENT_MULTI:
1005 g_snprintf (token_string, token_string_len, "comment");
1006 break;
1008 case G_TOKEN_NONE:
1009 /* somehow the user's parsing code is screwed, there isn't much
1010 * we can do about it.
1011 * Note, a common case to trigger this is
1012 * g_scanner_peek_next_token(); g_scanner_unexp_token();
1013 * without an intermediate g_scanner_get_next_token().
1015 g_assert_not_reached ();
1016 break;
1020 switch (expected_token)
1022 gboolean need_valid;
1024 default:
1025 if (expected_token >= 1 && expected_token <= 255)
1027 if ((expected_token >= ' ' && expected_token <= '~') ||
1028 strchr (scanner->config->cset_identifier_first, expected_token) ||
1029 strchr (scanner->config->cset_identifier_nth, expected_token))
1030 g_snprintf (expected_string, expected_string_len, "character `%c'", expected_token);
1031 else
1032 g_snprintf (expected_string, expected_string_len, "character `\\%o'", expected_token);
1033 break;
1035 else if (!scanner->config->symbol_2_token)
1037 g_snprintf (expected_string, expected_string_len, "(unknown) token <%d>", expected_token);
1038 break;
1040 /* fall through */
1041 case G_TOKEN_SYMBOL:
1042 need_valid = (scanner->token == G_TOKEN_SYMBOL ||
1043 (scanner->config->symbol_2_token &&
1044 scanner->token > G_TOKEN_LAST));
1045 g_snprintf (expected_string,
1046 expected_string_len,
1047 "%s%s",
1048 need_valid ? "valid " : "",
1049 symbol_spec);
1050 /* FIXME: should we attempt to lookup the symbol_name for symbol_2_token? */
1051 break;
1053 case G_TOKEN_INT:
1054 g_snprintf (expected_string, expected_string_len, "number (integer)");
1055 break;
1057 case G_TOKEN_FLOAT:
1058 g_snprintf (expected_string, expected_string_len, "number (float)");
1059 break;
1061 case G_TOKEN_STRING:
1062 g_snprintf (expected_string,
1063 expected_string_len,
1064 "%sstring constant",
1065 scanner->token == G_TOKEN_STRING ? "valid " : "");
1066 break;
1068 case G_TOKEN_IDENTIFIER:
1069 case G_TOKEN_IDENTIFIER_NULL:
1070 g_snprintf (expected_string,
1071 expected_string_len,
1072 "%s%s",
1073 (scanner->token == G_TOKEN_IDENTIFIER_NULL ||
1074 scanner->token == G_TOKEN_IDENTIFIER ? "valid " : ""),
1075 identifier_spec);
1076 break;
1078 case G_TOKEN_NONE:
1079 break;
1082 if (message && message[0] != 0)
1083 message_prefix = " - ";
1084 else
1086 message_prefix = "";
1087 message = "";
1090 if (expected_token != G_TOKEN_NONE)
1092 if (print_unexp)
1093 msg_handler (scanner,
1094 "unexpected %s, expected %s%s%s",
1095 token_string,
1096 expected_string,
1097 message_prefix,
1098 message);
1099 else
1100 msg_handler (scanner,
1101 "%s, expected %s%s%s",
1102 token_string,
1103 expected_string,
1104 message_prefix,
1105 message);
1107 else
1109 if (print_unexp)
1110 msg_handler (scanner,
1111 "unexpected %s%s%s",
1112 token_string,
1113 message_prefix,
1114 message);
1115 else
1116 msg_handler (scanner,
1117 "%s%s%s",
1118 token_string,
1119 message_prefix,
1120 message);
1123 g_free (token_string);
1124 g_free (expected_string);
1127 gint
1128 g_scanner_stat_mode (const gchar *filename)
1130 struct stat *stat_buf;
1131 gint st_mode;
1133 stat_buf = g_new0 (struct stat, 1);
1134 #ifdef HAVE_LSTAT
1135 lstat (filename, stat_buf);
1136 #else
1137 stat (filename, stat_buf);
1138 #endif
1139 st_mode = stat_buf->st_mode;
1141 g_free (stat_buf);
1143 return st_mode;
1146 static void
1147 g_scanner_get_token_i (GScanner *scanner,
1148 GTokenType *token_p,
1149 GTokenValue *value_p,
1150 guint *line_p,
1151 guint *position_p)
1155 g_scanner_free_value (token_p, value_p);
1156 g_scanner_get_token_ll (scanner, token_p, value_p, line_p, position_p);
1158 while (((*token_p > 0 && *token_p < 256) &&
1159 strchr (scanner->config->cset_skip_characters, *token_p)) ||
1160 (*token_p == G_TOKEN_CHAR &&
1161 strchr (scanner->config->cset_skip_characters, value_p->v_char)) ||
1162 (*token_p == G_TOKEN_COMMENT_MULTI &&
1163 scanner->config->skip_comment_multi) ||
1164 (*token_p == G_TOKEN_COMMENT_SINGLE &&
1165 scanner->config->skip_comment_single));
1167 switch (*token_p)
1169 case G_TOKEN_IDENTIFIER:
1170 if (scanner->config->identifier_2_string)
1171 *token_p = G_TOKEN_STRING;
1172 break;
1174 case G_TOKEN_SYMBOL:
1175 if (scanner->config->symbol_2_token)
1176 *token_p = (GTokenType) value_p->v_symbol;
1177 break;
1179 case G_TOKEN_BINARY:
1180 case G_TOKEN_OCTAL:
1181 case G_TOKEN_HEX:
1182 if (scanner->config->numbers_2_int)
1183 *token_p = G_TOKEN_INT;
1184 break;
1186 default:
1187 break;
1190 if (*token_p == G_TOKEN_INT &&
1191 scanner->config->int_2_float)
1193 *token_p = G_TOKEN_FLOAT;
1194 value_p->v_float = value_p->v_int;
1197 errno = 0;
1200 static void
1201 g_scanner_get_token_ll (GScanner *scanner,
1202 GTokenType *token_p,
1203 GTokenValue *value_p,
1204 guint *line_p,
1205 guint *position_p)
1207 GScannerConfig *config;
1208 GTokenType token;
1209 gboolean in_comment_multi;
1210 gboolean in_comment_single;
1211 gboolean in_string_sq;
1212 gboolean in_string_dq;
1213 GString *gstring;
1214 GTokenValue value;
1215 guchar ch;
1217 config = scanner->config;
1218 (*value_p).v_int = 0;
1220 if ((scanner->text >= scanner->text_end && scanner->input_fd < 0) ||
1221 scanner->token == G_TOKEN_EOF)
1223 *token_p = G_TOKEN_EOF;
1224 return;
1227 in_comment_multi = FALSE;
1228 in_comment_single = FALSE;
1229 in_string_sq = FALSE;
1230 in_string_dq = FALSE;
1231 gstring = NULL;
1233 do /* while (ch != 0) */
1235 gboolean dotted_float = FALSE;
1237 ch = g_scanner_get_char (scanner, line_p, position_p);
1239 value.v_int = 0;
1240 token = G_TOKEN_NONE;
1242 /* this is *evil*, but needed ;(
1243 * we first check for identifier first character, because it
1244 * might interfere with other key chars like slashes or numbers
1246 if (config->scan_identifier &&
1247 ch && strchr (config->cset_identifier_first, ch))
1248 goto identifier_precedence;
1250 switch (ch)
1252 case 0:
1253 token = G_TOKEN_EOF;
1254 (*position_p)++;
1255 /* ch = 0; */
1256 break;
1258 case '/':
1259 if (!config->scan_comment_multi ||
1260 g_scanner_peek_next_char (scanner) != '*')
1261 goto default_case;
1262 g_scanner_get_char (scanner, line_p, position_p);
1263 token = G_TOKEN_COMMENT_MULTI;
1264 in_comment_multi = TRUE;
1265 gstring = g_string_new ("");
1266 while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0)
1268 if (ch == '*' && g_scanner_peek_next_char (scanner) == '/')
1270 g_scanner_get_char (scanner, line_p, position_p);
1271 in_comment_multi = FALSE;
1272 break;
1274 else
1275 gstring = g_string_append_c (gstring, ch);
1277 ch = 0;
1278 break;
1280 case '\'':
1281 if (!config->scan_string_sq)
1282 goto default_case;
1283 token = G_TOKEN_STRING;
1284 in_string_sq = TRUE;
1285 gstring = g_string_new ("");
1286 while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0)
1288 if (ch == '\'')
1290 in_string_sq = FALSE;
1291 break;
1293 else
1294 gstring = g_string_append_c (gstring, ch);
1296 ch = 0;
1297 break;
1299 case '"':
1300 if (!config->scan_string_dq)
1301 goto default_case;
1302 token = G_TOKEN_STRING;
1303 in_string_dq = TRUE;
1304 gstring = g_string_new ("");
1305 while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0)
1307 if (ch == '"')
1309 in_string_dq = FALSE;
1310 break;
1312 else
1314 if (ch == '\\')
1316 ch = g_scanner_get_char (scanner, line_p, position_p);
1317 switch (ch)
1319 guint i;
1320 guint fchar;
1322 case 0:
1323 break;
1325 case '\\':
1326 gstring = g_string_append_c (gstring, '\\');
1327 break;
1329 case 'n':
1330 gstring = g_string_append_c (gstring, '\n');
1331 break;
1333 case 't':
1334 gstring = g_string_append_c (gstring, '\t');
1335 break;
1337 case 'r':
1338 gstring = g_string_append_c (gstring, '\r');
1339 break;
1341 case 'b':
1342 gstring = g_string_append_c (gstring, '\b');
1343 break;
1345 case 'f':
1346 gstring = g_string_append_c (gstring, '\f');
1347 break;
1349 case '0':
1350 case '1':
1351 case '2':
1352 case '3':
1353 case '4':
1354 case '5':
1355 case '6':
1356 case '7':
1357 i = ch - '0';
1358 fchar = g_scanner_peek_next_char (scanner);
1359 if (fchar >= '0' && fchar <= '7')
1361 ch = g_scanner_get_char (scanner, line_p, position_p);
1362 i = i * 8 + ch - '0';
1363 fchar = g_scanner_peek_next_char (scanner);
1364 if (fchar >= '0' && fchar <= '7')
1366 ch = g_scanner_get_char (scanner, line_p, position_p);
1367 i = i * 8 + ch - '0';
1370 gstring = g_string_append_c (gstring, i);
1371 break;
1373 default:
1374 gstring = g_string_append_c (gstring, ch);
1375 break;
1378 else
1379 gstring = g_string_append_c (gstring, ch);
1382 ch = 0;
1383 break;
1385 case '.':
1386 if (!config->scan_float)
1387 goto default_case;
1388 token = G_TOKEN_FLOAT;
1389 dotted_float = TRUE;
1390 ch = g_scanner_get_char (scanner, line_p, position_p);
1391 goto number_parsing;
1393 case '$':
1394 if (!config->scan_hex_dollar)
1395 goto default_case;
1396 token = G_TOKEN_HEX;
1397 ch = g_scanner_get_char (scanner, line_p, position_p);
1398 goto number_parsing;
1400 case '0':
1401 if (config->scan_octal)
1402 token = G_TOKEN_OCTAL;
1403 else
1404 token = G_TOKEN_INT;
1405 ch = g_scanner_peek_next_char (scanner);
1406 if (config->scan_hex && (ch == 'x' || ch == 'X'))
1408 token = G_TOKEN_HEX;
1409 g_scanner_get_char (scanner, line_p, position_p);
1410 ch = g_scanner_get_char (scanner, line_p, position_p);
1411 if (ch == 0)
1413 token = G_TOKEN_ERROR;
1414 value.v_error = G_ERR_UNEXP_EOF;
1415 (*position_p)++;
1416 break;
1418 if (g_scanner_char_2_num (ch, 16) < 0)
1420 token = G_TOKEN_ERROR;
1421 value.v_error = G_ERR_DIGIT_RADIX;
1422 ch = 0;
1423 break;
1426 else if (config->scan_binary && (ch == 'b' || ch == 'B'))
1428 token = G_TOKEN_BINARY;
1429 g_scanner_get_char (scanner, line_p, position_p);
1430 ch = g_scanner_get_char (scanner, line_p, position_p);
1431 if (ch == 0)
1433 token = G_TOKEN_ERROR;
1434 value.v_error = G_ERR_UNEXP_EOF;
1435 (*position_p)++;
1436 break;
1438 if (g_scanner_char_2_num (ch, 10) < 0)
1440 token = G_TOKEN_ERROR;
1441 value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1442 ch = 0;
1443 break;
1446 else
1447 ch = '0';
1448 /* fall through */
1449 case '1':
1450 case '2':
1451 case '3':
1452 case '4':
1453 case '5':
1454 case '6':
1455 case '7':
1456 case '8':
1457 case '9':
1458 number_parsing:
1460 gboolean in_number = TRUE;
1461 gchar *endptr;
1463 if (token == G_TOKEN_NONE)
1464 token = G_TOKEN_INT;
1466 gstring = g_string_new (dotted_float ? "0." : "");
1467 gstring = g_string_append_c (gstring, ch);
1469 do /* while (in_number) */
1471 gboolean is_E;
1473 is_E = token == G_TOKEN_FLOAT && (ch == 'e' || ch == 'E');
1475 ch = g_scanner_peek_next_char (scanner);
1477 if (g_scanner_char_2_num (ch, 36) >= 0 ||
1478 (config->scan_float && ch == '.') ||
1479 (is_E && (ch == '+' || ch == '-')))
1481 ch = g_scanner_get_char (scanner, line_p, position_p);
1483 switch (ch)
1485 case '.':
1486 if (token != G_TOKEN_INT && token != G_TOKEN_OCTAL)
1488 value.v_error = token == G_TOKEN_FLOAT ? G_ERR_FLOAT_MALFORMED : G_ERR_FLOAT_RADIX;
1489 token = G_TOKEN_ERROR;
1490 in_number = FALSE;
1492 else
1494 token = G_TOKEN_FLOAT;
1495 gstring = g_string_append_c (gstring, ch);
1497 break;
1499 case '0':
1500 case '1':
1501 case '2':
1502 case '3':
1503 case '4':
1504 case '5':
1505 case '6':
1506 case '7':
1507 case '8':
1508 case '9':
1509 gstring = g_string_append_c (gstring, ch);
1510 break;
1512 case '-':
1513 case '+':
1514 if (token != G_TOKEN_FLOAT)
1516 token = G_TOKEN_ERROR;
1517 value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1518 in_number = FALSE;
1520 else
1521 gstring = g_string_append_c (gstring, ch);
1522 break;
1524 case 'e':
1525 case 'E':
1526 if ((token != G_TOKEN_HEX && !config->scan_float) ||
1527 (token != G_TOKEN_HEX &&
1528 token != G_TOKEN_OCTAL &&
1529 token != G_TOKEN_FLOAT &&
1530 token != G_TOKEN_INT))
1532 token = G_TOKEN_ERROR;
1533 value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1534 in_number = FALSE;
1536 else
1538 if (token != G_TOKEN_HEX)
1539 token = G_TOKEN_FLOAT;
1540 gstring = g_string_append_c (gstring, ch);
1542 break;
1544 default:
1545 if (token != G_TOKEN_HEX)
1547 token = G_TOKEN_ERROR;
1548 value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1549 in_number = FALSE;
1551 else
1552 gstring = g_string_append_c (gstring, ch);
1553 break;
1556 else
1557 in_number = FALSE;
1559 while (in_number);
1561 endptr = NULL;
1562 switch (token)
1564 case G_TOKEN_BINARY:
1565 value.v_binary = strtol (gstring->str, &endptr, 2);
1566 break;
1568 case G_TOKEN_OCTAL:
1569 value.v_octal = strtol (gstring->str, &endptr, 8);
1570 break;
1572 case G_TOKEN_INT:
1573 value.v_int = strtol (gstring->str, &endptr, 10);
1574 break;
1576 case G_TOKEN_FLOAT:
1577 value.v_float = g_strtod (gstring->str, &endptr);
1578 break;
1580 case G_TOKEN_HEX:
1581 value.v_hex = strtol (gstring->str, &endptr, 16);
1582 break;
1584 default:
1585 break;
1587 if (endptr && *endptr)
1589 token = G_TOKEN_ERROR;
1590 if (*endptr == 'e' || *endptr == 'E')
1591 value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1592 else
1593 value.v_error = G_ERR_DIGIT_RADIX;
1595 g_string_free (gstring, TRUE);
1596 gstring = NULL;
1597 ch = 0;
1598 } /* number_parsing:... */
1599 break;
1601 default:
1602 default_case:
1604 if (config->cpair_comment_single &&
1605 ch == config->cpair_comment_single[0])
1607 token = G_TOKEN_COMMENT_SINGLE;
1608 in_comment_single = TRUE;
1609 gstring = g_string_new ("");
1610 while ((ch = g_scanner_get_char (scanner,
1611 line_p,
1612 position_p)) != 0)
1614 if (ch == config->cpair_comment_single[1])
1616 in_comment_single = FALSE;
1617 ch = 0;
1618 break;
1621 gstring = g_string_append_c (gstring, ch);
1622 ch = 0;
1625 else if (config->scan_identifier && ch &&
1626 strchr (config->cset_identifier_first, ch))
1628 identifier_precedence:
1630 if (config->cset_identifier_nth && ch &&
1631 strchr (config->cset_identifier_nth,
1632 g_scanner_peek_next_char (scanner)))
1634 token = G_TOKEN_IDENTIFIER;
1635 gstring = g_string_new ("");
1636 gstring = g_string_append_c (gstring, ch);
1639 ch = g_scanner_get_char (scanner, line_p, position_p);
1640 gstring = g_string_append_c (gstring, ch);
1641 ch = g_scanner_peek_next_char (scanner);
1643 while (ch && strchr (config->cset_identifier_nth, ch));
1644 ch = 0;
1646 else if (config->scan_identifier_1char)
1648 token = G_TOKEN_IDENTIFIER;
1649 value.v_identifier = g_new0 (gchar, 2);
1650 value.v_identifier[0] = ch;
1651 ch = 0;
1654 if (ch)
1656 if (config->char_2_token)
1657 token = ch;
1658 else
1660 token = G_TOKEN_CHAR;
1661 value.v_char = ch;
1663 ch = 0;
1665 } /* default_case:... */
1666 break;
1668 g_assert (ch == 0 && token != G_TOKEN_NONE); /* paranoid */
1670 while (ch != 0);
1672 if (in_comment_multi || in_comment_single ||
1673 in_string_sq || in_string_dq)
1675 token = G_TOKEN_ERROR;
1676 if (gstring)
1678 g_string_free (gstring, TRUE);
1679 gstring = NULL;
1681 (*position_p)++;
1682 if (in_comment_multi || in_comment_single)
1683 value.v_error = G_ERR_UNEXP_EOF_IN_COMMENT;
1684 else /* (in_string_sq || in_string_dq) */
1685 value.v_error = G_ERR_UNEXP_EOF_IN_STRING;
1688 if (gstring)
1690 value.v_string = gstring->str;
1691 g_string_free (gstring, FALSE);
1692 gstring = NULL;
1695 if (token == G_TOKEN_IDENTIFIER)
1697 if (config->scan_symbols)
1699 GScannerKey *key;
1700 guint scope_id;
1702 scope_id = scanner->scope_id;
1703 key = g_scanner_lookup_internal (scanner, scope_id, value.v_identifier);
1704 if (!key && scope_id && scanner->config->scope_0_fallback)
1705 key = g_scanner_lookup_internal (scanner, 0, value.v_identifier);
1707 if (key)
1709 g_free (value.v_identifier);
1710 token = G_TOKEN_SYMBOL;
1711 value.v_symbol = key->value;
1715 if (token == G_TOKEN_IDENTIFIER &&
1716 config->scan_identifier_NULL &&
1717 strlen (value.v_identifier) == 4)
1719 gchar *null_upper = "NULL";
1720 gchar *null_lower = "null";
1722 if (scanner->config->case_sensitive)
1724 if (value.v_identifier[0] == null_upper[0] &&
1725 value.v_identifier[1] == null_upper[1] &&
1726 value.v_identifier[2] == null_upper[2] &&
1727 value.v_identifier[3] == null_upper[3])
1728 token = G_TOKEN_IDENTIFIER_NULL;
1730 else
1732 if ((value.v_identifier[0] == null_upper[0] ||
1733 value.v_identifier[0] == null_lower[0]) &&
1734 (value.v_identifier[1] == null_upper[1] ||
1735 value.v_identifier[1] == null_lower[1]) &&
1736 (value.v_identifier[2] == null_upper[2] ||
1737 value.v_identifier[2] == null_lower[2]) &&
1738 (value.v_identifier[3] == null_upper[3] ||
1739 value.v_identifier[3] == null_lower[3]))
1740 token = G_TOKEN_IDENTIFIER_NULL;
1745 *token_p = token;
1746 *value_p = value;