1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
4 * GScanner: Flexible lexical scanner for general purpose.
5 * Copyright (C) 1997, 1998 Tim Janik
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
17 * You should have received a copy of the GNU Library General Public
18 * License along with this library; if not, write to the
19 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 * Boston, MA 02111-1307, USA.
27 #define __gscanner_c__
42 #include <sys/types.h> /* needed for sys/stat.h */
45 #include <io.h> /* For _read() */
49 #define to_lower(c) ( \
51 ( (((guchar)(c))>='A' && ((guchar)(c))<='Z') * ('a'-'A') ) | \
52 ( (((guchar)(c))>=192 && ((guchar)(c))<=214) * (224-192) ) | \
53 ( (((guchar)(c))>=216 && ((guchar)(c))<=222) * (248-216) ) | \
57 #define READ_BUFFER_SIZE (4000)
60 /* --- typedefs --- */
61 typedef struct _GScannerKey GScannerKey
;
72 /* --- variables --- */
73 static GScannerConfig g_scanner_config_template
=
77 ) /* cset_skip_characters */,
82 ) /* cset_identifier_first */,
89 ) /* cset_identifier_nth */,
90 ( "#\n" ) /* cpair_comment_single */,
92 FALSE
/* case_sensitive */,
94 TRUE
/* skip_comment_multi */,
95 TRUE
/* skip_comment_single */,
96 TRUE
/* scan_comment_multi */,
97 TRUE
/* scan_identifier */,
98 FALSE
/* scan_identifier_1char */,
99 FALSE
/* scan_identifier_NULL */,
100 TRUE
/* scan_symbols */,
101 FALSE
/* scan_binary */,
102 TRUE
/* scan_octal */,
103 TRUE
/* scan_float */,
105 FALSE
/* scan_hex_dollar */,
106 TRUE
/* scan_string_sq */,
107 TRUE
/* scan_string_dq */,
108 TRUE
/* numbers_2_int */,
109 FALSE
/* int_2_float */,
110 FALSE
/* identifier_2_string */,
111 TRUE
/* char_2_token */,
112 FALSE
/* symbol_2_token */,
113 FALSE
/* scope_0_fallback */,
117 /* --- prototypes --- */
119 GScannerKey
* g_scanner_lookup_internal (GScanner
*scanner
,
121 const gchar
*symbol
);
122 static gint
g_scanner_key_equal (gconstpointer v1
,
124 static guint
g_scanner_key_hash (gconstpointer v
);
125 static void g_scanner_get_token_ll (GScanner
*scanner
,
127 GTokenValue
*value_p
,
130 static void g_scanner_get_token_i (GScanner
*scanner
,
132 GTokenValue
*value_p
,
136 static guchar
g_scanner_peek_next_char (GScanner
*scanner
);
137 static guchar
g_scanner_get_char (GScanner
*scanner
,
140 static void g_scanner_msg_handler (GScanner
*scanner
,
145 /* --- functions --- */
147 g_scanner_char_2_num (guchar c
,
150 if (c
>= '0' && c
<= '9')
152 else if (c
>= 'A' && c
<= 'Z')
154 else if (c
>= 'a' && c
<= 'z')
166 g_scanner_new (GScannerConfig
*config_templ
)
171 config_templ
= &g_scanner_config_template
;
173 scanner
= g_new0 (GScanner
, 1);
175 scanner
->user_data
= NULL
;
176 scanner
->max_parse_errors
= 0;
177 scanner
->parse_errors
= 0;
178 scanner
->input_name
= NULL
;
179 scanner
->derived_data
= NULL
;
181 scanner
->config
= g_new0 (GScannerConfig
, 1);
183 scanner
->config
->case_sensitive
= config_templ
->case_sensitive
;
184 scanner
->config
->cset_skip_characters
= config_templ
->cset_skip_characters
;
185 scanner
->config
->cset_identifier_first
= config_templ
->cset_identifier_first
;
186 scanner
->config
->cset_identifier_nth
= config_templ
->cset_identifier_nth
;
187 scanner
->config
->cpair_comment_single
= config_templ
->cpair_comment_single
;
188 scanner
->config
->skip_comment_multi
= config_templ
->skip_comment_multi
;
189 scanner
->config
->skip_comment_single
= config_templ
->skip_comment_single
;
190 scanner
->config
->scan_comment_multi
= config_templ
->scan_comment_multi
;
191 scanner
->config
->scan_identifier
= config_templ
->scan_identifier
;
192 scanner
->config
->scan_identifier_1char
= config_templ
->scan_identifier_1char
;
193 scanner
->config
->scan_identifier_NULL
= config_templ
->scan_identifier_NULL
;
194 scanner
->config
->scan_symbols
= config_templ
->scan_symbols
;
195 scanner
->config
->scan_binary
= config_templ
->scan_binary
;
196 scanner
->config
->scan_octal
= config_templ
->scan_octal
;
197 scanner
->config
->scan_float
= config_templ
->scan_float
;
198 scanner
->config
->scan_hex
= config_templ
->scan_hex
;
199 scanner
->config
->scan_hex_dollar
= config_templ
->scan_hex_dollar
;
200 scanner
->config
->scan_string_sq
= config_templ
->scan_string_sq
;
201 scanner
->config
->scan_string_dq
= config_templ
->scan_string_dq
;
202 scanner
->config
->numbers_2_int
= config_templ
->numbers_2_int
;
203 scanner
->config
->int_2_float
= config_templ
->int_2_float
;
204 scanner
->config
->identifier_2_string
= config_templ
->identifier_2_string
;
205 scanner
->config
->char_2_token
= config_templ
->char_2_token
;
206 scanner
->config
->symbol_2_token
= config_templ
->symbol_2_token
;
207 scanner
->config
->scope_0_fallback
= config_templ
->scope_0_fallback
;
209 scanner
->token
= G_TOKEN_NONE
;
210 scanner
->value
.v_int
= 0;
212 scanner
->position
= 0;
214 scanner
->next_token
= G_TOKEN_NONE
;
215 scanner
->next_value
.v_int
= 0;
216 scanner
->next_line
= 1;
217 scanner
->next_position
= 0;
219 scanner
->symbol_table
= g_hash_table_new (g_scanner_key_hash
, g_scanner_key_equal
);
220 scanner
->input_fd
= -1;
221 scanner
->text
= NULL
;
222 scanner
->text_end
= NULL
;
223 scanner
->buffer
= NULL
;
224 scanner
->scope_id
= 0;
226 scanner
->msg_handler
= g_scanner_msg_handler
;
232 g_scanner_free_value (GTokenType
*token_p
,
233 GTokenValue
*value_p
)
238 case G_TOKEN_IDENTIFIER
:
239 case G_TOKEN_IDENTIFIER_NULL
:
240 case G_TOKEN_COMMENT_SINGLE
:
241 case G_TOKEN_COMMENT_MULTI
:
242 g_free (value_p
->v_string
);
249 *token_p
= G_TOKEN_NONE
;
253 g_scanner_destroy_symbol_table_entry (gpointer _key
,
257 GScannerKey
*key
= _key
;
259 g_free (key
->symbol
);
264 g_scanner_destroy (GScanner
*scanner
)
266 g_return_if_fail (scanner
!= NULL
);
268 g_hash_table_foreach (scanner
->symbol_table
,
269 g_scanner_destroy_symbol_table_entry
, NULL
);
270 g_hash_table_destroy (scanner
->symbol_table
);
271 g_scanner_free_value (&scanner
->token
, &scanner
->value
);
272 g_scanner_free_value (&scanner
->next_token
, &scanner
->next_value
);
273 g_free (scanner
->config
);
274 g_free (scanner
->buffer
);
279 g_scanner_msg_handler (GScanner
*scanner
,
283 g_return_if_fail (scanner
!= NULL
);
285 fprintf (stdout
, "%s:%d: ", scanner
->input_name
, scanner
->line
);
287 fprintf (stdout
, "error: ");
288 fprintf (stdout
, "%s\n", message
);
292 g_scanner_error (GScanner
*scanner
,
296 g_return_if_fail (scanner
!= NULL
);
297 g_return_if_fail (format
!= NULL
);
299 scanner
->parse_errors
++;
301 if (scanner
->msg_handler
)
306 va_start (args
, format
);
307 string
= g_strdup_vprintf (format
, args
);
310 scanner
->msg_handler (scanner
, string
, TRUE
);
317 g_scanner_warn (GScanner
*scanner
,
321 g_return_if_fail (scanner
!= NULL
);
322 g_return_if_fail (format
!= NULL
);
324 if (scanner
->msg_handler
)
329 va_start (args
, format
);
330 string
= g_strdup_vprintf (format
, args
);
333 scanner
->msg_handler (scanner
, string
, FALSE
);
340 g_scanner_key_equal (gconstpointer v1
,
343 const GScannerKey
*key1
= v1
;
344 const GScannerKey
*key2
= v2
;
346 return (key1
->scope_id
== key2
->scope_id
) && (strcmp (key1
->symbol
, key2
->symbol
) == 0);
350 g_scanner_key_hash (gconstpointer v
)
352 const GScannerKey
*key
= v
;
357 for (c
= key
->symbol
; *c
; c
++)
373 static inline GScannerKey
*
374 g_scanner_lookup_internal (GScanner
*scanner
,
381 key
.scope_id
= scope_id
;
383 if (!scanner
->config
->case_sensitive
)
388 key
.symbol
= g_new (gchar
, strlen (symbol
) + 1);
389 for (d
= key
.symbol
, c
= symbol
; *c
; c
++, d
++)
392 key_p
= g_hash_table_lookup (scanner
->symbol_table
, &key
);
397 key
.symbol
= (gchar
*) symbol
;
398 key_p
= g_hash_table_lookup (scanner
->symbol_table
, &key
);
405 g_scanner_scope_add_symbol (GScanner
*scanner
,
412 g_return_if_fail (scanner
!= NULL
);
413 g_return_if_fail (symbol
!= NULL
);
415 key
= g_scanner_lookup_internal (scanner
, scope_id
, symbol
);
419 key
= g_new (GScannerKey
, 1);
420 key
->scope_id
= scope_id
;
421 key
->symbol
= g_strdup (symbol
);
423 if (!scanner
->config
->case_sensitive
)
434 g_hash_table_insert (scanner
->symbol_table
, key
, key
);
441 g_scanner_scope_remove_symbol (GScanner
*scanner
,
447 g_return_if_fail (scanner
!= NULL
);
448 g_return_if_fail (symbol
!= NULL
);
450 key
= g_scanner_lookup_internal (scanner
, scope_id
, symbol
);
454 g_hash_table_remove (scanner
->symbol_table
, key
);
455 g_free (key
->symbol
);
461 g_scanner_lookup_symbol (GScanner
*scanner
,
467 g_return_val_if_fail (scanner
!= NULL
, NULL
);
472 scope_id
= scanner
->scope_id
;
473 key
= g_scanner_lookup_internal (scanner
, scope_id
, symbol
);
474 if (!key
&& scope_id
&& scanner
->config
->scope_0_fallback
)
475 key
= g_scanner_lookup_internal (scanner
, 0, symbol
);
484 g_scanner_scope_lookup_symbol (GScanner
*scanner
,
490 g_return_val_if_fail (scanner
!= NULL
, NULL
);
495 key
= g_scanner_lookup_internal (scanner
, scope_id
, symbol
);
504 g_scanner_set_scope (GScanner
*scanner
,
509 g_return_val_if_fail (scanner
!= NULL
, 0);
511 old_scope_id
= scanner
->scope_id
;
512 scanner
->scope_id
= scope_id
;
518 g_scanner_foreach_internal (gpointer _key
,
529 func
= (GHFunc
) d
[0];
534 if (key
->scope_id
== *scope_id
)
535 func (key
->symbol
, key
->value
, user_data
);
539 g_scanner_scope_foreach_symbol (GScanner
*scanner
,
546 g_return_if_fail (scanner
!= NULL
);
548 d
[0] = (gpointer
) func
;
552 g_hash_table_foreach (scanner
->symbol_table
, g_scanner_foreach_internal
, d
);
556 g_scanner_freeze_symbol_table (GScanner
*scanner
)
558 g_return_if_fail (scanner
!= NULL
);
560 g_hash_table_freeze (scanner
->symbol_table
);
564 g_scanner_thaw_symbol_table (GScanner
*scanner
)
566 g_return_if_fail (scanner
!= NULL
);
568 g_hash_table_thaw (scanner
->symbol_table
);
572 g_scanner_peek_next_token (GScanner
*scanner
)
574 g_return_val_if_fail (scanner
!= NULL
, G_TOKEN_EOF
);
576 if (scanner
->next_token
== G_TOKEN_NONE
)
578 scanner
->next_line
= scanner
->line
;
579 scanner
->next_position
= scanner
->position
;
580 g_scanner_get_token_i (scanner
,
581 &scanner
->next_token
,
582 &scanner
->next_value
,
584 &scanner
->next_position
);
587 return scanner
->next_token
;
591 g_scanner_get_next_token (GScanner
*scanner
)
593 g_return_val_if_fail (scanner
!= NULL
, G_TOKEN_EOF
);
595 if (scanner
->next_token
!= G_TOKEN_NONE
)
597 g_scanner_free_value (&scanner
->token
, &scanner
->value
);
599 scanner
->token
= scanner
->next_token
;
600 scanner
->value
= scanner
->next_value
;
601 scanner
->line
= scanner
->next_line
;
602 scanner
->position
= scanner
->next_position
;
603 scanner
->next_token
= G_TOKEN_NONE
;
606 g_scanner_get_token_i (scanner
,
612 return scanner
->token
;
616 g_scanner_cur_token (GScanner
*scanner
)
618 g_return_val_if_fail (scanner
!= NULL
, G_TOKEN_EOF
);
620 return scanner
->token
;
624 g_scanner_cur_value (GScanner
*scanner
)
630 g_return_val_if_fail (scanner
!= NULL
, v
);
632 /* MSC isn't capable of handling return scanner->value; ? */
640 g_scanner_cur_line (GScanner
*scanner
)
642 g_return_val_if_fail (scanner
!= NULL
, 0);
644 return scanner
->line
;
648 g_scanner_cur_position (GScanner
*scanner
)
650 g_return_val_if_fail (scanner
!= NULL
, 0);
652 return scanner
->position
;
656 g_scanner_eof (GScanner
*scanner
)
658 g_return_val_if_fail (scanner
!= NULL
, TRUE
);
660 return scanner
->token
== G_TOKEN_EOF
;
664 g_scanner_input_file (GScanner
*scanner
,
667 g_return_if_fail (scanner
!= NULL
);
668 g_return_if_fail (input_fd
>= 0);
670 if (scanner
->input_fd
>= 0)
671 g_scanner_sync_file_offset (scanner
);
673 scanner
->token
= G_TOKEN_NONE
;
674 scanner
->value
.v_int
= 0;
676 scanner
->position
= 0;
677 scanner
->next_token
= G_TOKEN_NONE
;
679 scanner
->input_fd
= input_fd
;
680 scanner
->text
= NULL
;
681 scanner
->text_end
= NULL
;
683 if (!scanner
->buffer
)
684 scanner
->buffer
= g_new (gchar
, READ_BUFFER_SIZE
+ 1);
688 g_scanner_input_text (GScanner
*scanner
,
692 g_return_if_fail (scanner
!= NULL
);
694 g_return_if_fail (text
!= NULL
);
698 if (scanner
->input_fd
>= 0)
699 g_scanner_sync_file_offset (scanner
);
701 scanner
->token
= G_TOKEN_NONE
;
702 scanner
->value
.v_int
= 0;
704 scanner
->position
= 0;
705 scanner
->next_token
= G_TOKEN_NONE
;
707 scanner
->input_fd
= -1;
708 scanner
->text
= text
;
709 scanner
->text_end
= text
+ text_len
;
713 g_free (scanner
->buffer
);
714 scanner
->buffer
= NULL
;
719 g_scanner_peek_next_char (GScanner
*scanner
)
721 if (scanner
->text
< scanner
->text_end
)
723 return *scanner
->text
;
725 else if (scanner
->input_fd
>= 0)
730 buffer
= scanner
->buffer
;
733 count
= read (scanner
->input_fd
, buffer
, READ_BUFFER_SIZE
);
735 while (count
== -1 && (errno
== EINTR
|| errno
== EAGAIN
));
739 scanner
->input_fd
= -1;
745 scanner
->text
= buffer
;
746 scanner
->text_end
= buffer
+ count
;
756 g_scanner_sync_file_offset (GScanner
*scanner
)
758 g_return_if_fail (scanner
!= NULL
);
760 /* for file input, rewind the filedescriptor to the current
761 * buffer position and blow the file read ahead buffer. usefull for
762 * third party uses of our filedescriptor, which hooks onto the current
766 if (scanner
->input_fd
>= 0 && scanner
->text_end
> scanner
->text
)
770 buffered
= scanner
->text_end
- scanner
->text
;
771 if (lseek (scanner
->input_fd
, - buffered
, SEEK_CUR
) >= 0)
773 /* we succeeded, blow our buffer's contents now */
774 scanner
->text
= NULL
;
775 scanner
->text_end
= NULL
;
783 g_scanner_get_char (GScanner
*scanner
,
789 if (scanner
->text
< scanner
->text_end
)
790 fchar
= *(scanner
->text
++);
791 else if (scanner
->input_fd
>= 0)
796 buffer
= scanner
->buffer
;
799 count
= read (scanner
->input_fd
, buffer
, READ_BUFFER_SIZE
);
801 while (count
== -1 && (errno
== EINTR
|| errno
== EAGAIN
));
805 scanner
->input_fd
= -1;
810 scanner
->text
= buffer
+ 1;
811 scanner
->text_end
= buffer
+ count
;
815 g_scanner_sync_file_offset (scanner
);
816 scanner
->text_end
= scanner
->text
;
817 scanner
->input_fd
= -1;
838 g_scanner_unexp_token (GScanner
*scanner
,
839 GTokenType expected_token
,
840 const gchar
*identifier_spec
,
841 const gchar
*symbol_spec
,
842 const gchar
*symbol_name
,
843 const gchar
*message
,
847 guint token_string_len
;
848 gchar
*expected_string
;
849 guint expected_string_len
;
850 gchar
*message_prefix
;
851 gboolean print_unexp
;
852 void (*msg_handler
) (GScanner
*, const gchar
*, ...);
854 g_return_if_fail (scanner
!= NULL
);
857 msg_handler
= g_scanner_error
;
859 msg_handler
= g_scanner_warn
;
861 if (!identifier_spec
)
862 identifier_spec
= "identifier";
864 symbol_spec
= "symbol";
866 token_string_len
= 56;
867 token_string
= g_new (gchar
, token_string_len
+ 1);
868 expected_string_len
= 64;
869 expected_string
= g_new (gchar
, expected_string_len
+ 1);
872 switch (scanner
->token
)
875 g_snprintf (token_string
, token_string_len
, "end of file");
879 if (scanner
->token
>= 1 && scanner
->token
<= 255)
881 if ((scanner
->token
>= ' ' && scanner
->token
<= '~') ||
882 strchr (scanner
->config
->cset_identifier_first
, scanner
->token
) ||
883 strchr (scanner
->config
->cset_identifier_nth
, scanner
->token
))
884 g_snprintf (token_string
, expected_string_len
, "character `%c'", scanner
->token
);
886 g_snprintf (token_string
, expected_string_len
, "character `\\%o'", scanner
->token
);
889 else if (!scanner
->config
->symbol_2_token
)
891 g_snprintf (token_string
, token_string_len
, "(unknown) token <%d>", scanner
->token
);
896 if (expected_token
== G_TOKEN_SYMBOL
||
897 (scanner
->config
->symbol_2_token
&&
898 expected_token
> G_TOKEN_LAST
))
901 g_snprintf (token_string
,
904 print_unexp
? "" : "invalid ",
908 g_snprintf (token_string
,
911 print_unexp
? "" : "invalid ",
917 expected_token
= G_TOKEN_NONE
;
918 switch (scanner
->value
.v_error
)
920 case G_ERR_UNEXP_EOF
:
921 g_snprintf (token_string
, token_string_len
, "scanner: unexpected end of file");
924 case G_ERR_UNEXP_EOF_IN_STRING
:
925 g_snprintf (token_string
, token_string_len
, "scanner: unterminated string constant");
928 case G_ERR_UNEXP_EOF_IN_COMMENT
:
929 g_snprintf (token_string
, token_string_len
, "scanner: unterminated comment");
932 case G_ERR_NON_DIGIT_IN_CONST
:
933 g_snprintf (token_string
, token_string_len
, "scanner: non digit in constant");
936 case G_ERR_FLOAT_RADIX
:
937 g_snprintf (token_string
, token_string_len
, "scanner: invalid radix for floating constant");
940 case G_ERR_FLOAT_MALFORMED
:
941 g_snprintf (token_string
, token_string_len
, "scanner: malformed floating constant");
944 case G_ERR_DIGIT_RADIX
:
945 g_snprintf (token_string
, token_string_len
, "scanner: digit is beyond radix");
950 g_snprintf (token_string
, token_string_len
, "scanner: unknown error");
956 g_snprintf (token_string
, token_string_len
, "character `%c'", scanner
->value
.v_char
);
959 case G_TOKEN_IDENTIFIER
:
960 case G_TOKEN_IDENTIFIER_NULL
:
961 if (expected_token
== G_TOKEN_IDENTIFIER
||
962 expected_token
== G_TOKEN_IDENTIFIER_NULL
)
964 g_snprintf (token_string
,
967 print_unexp
? "" : "invalid ",
969 scanner
->value
.v_string
);
976 g_snprintf (token_string
, token_string_len
, "number `%ld'", scanner
->value
.v_int
);
980 g_snprintf (token_string
, token_string_len
, "number `%.3f'", scanner
->value
.v_float
);
984 if (expected_token
== G_TOKEN_STRING
)
986 g_snprintf (token_string
,
988 "%s%sstring constant \"%s\"",
989 print_unexp
? "" : "invalid ",
990 scanner
->value
.v_string
[0] == 0 ? "empty " : "",
991 scanner
->value
.v_string
);
992 token_string
[token_string_len
- 2] = '"';
993 token_string
[token_string_len
- 1] = 0;
996 case G_TOKEN_COMMENT_SINGLE
:
997 case G_TOKEN_COMMENT_MULTI
:
998 g_snprintf (token_string
, token_string_len
, "comment");
1002 /* somehow the user's parsing code is screwed, there isn't much
1003 * we can do about it.
1004 * Note, a common case to trigger this is
1005 * g_scanner_peek_next_token(); g_scanner_unexp_token();
1006 * without an intermediate g_scanner_get_next_token().
1008 g_assert_not_reached ();
1013 switch (expected_token
)
1015 gboolean need_valid
;
1018 if (expected_token
>= 1 && expected_token
<= 255)
1020 if ((expected_token
>= ' ' && expected_token
<= '~') ||
1021 strchr (scanner
->config
->cset_identifier_first
, expected_token
) ||
1022 strchr (scanner
->config
->cset_identifier_nth
, expected_token
))
1023 g_snprintf (expected_string
, expected_string_len
, "character `%c'", expected_token
);
1025 g_snprintf (expected_string
, expected_string_len
, "character `\\%o'", expected_token
);
1028 else if (!scanner
->config
->symbol_2_token
)
1030 g_snprintf (expected_string
, expected_string_len
, "(unknown) token <%d>", expected_token
);
1034 case G_TOKEN_SYMBOL
:
1035 need_valid
= (scanner
->token
== G_TOKEN_SYMBOL
||
1036 (scanner
->config
->symbol_2_token
&&
1037 scanner
->token
> G_TOKEN_LAST
));
1038 g_snprintf (expected_string
,
1039 expected_string_len
,
1041 need_valid
? "valid " : "",
1043 /* FIXME: should we attempt to lookup the symbol_name for symbol_2_token? */
1047 g_snprintf (expected_string
, expected_string_len
, "number (integer)");
1051 g_snprintf (expected_string
, expected_string_len
, "number (float)");
1054 case G_TOKEN_STRING
:
1055 g_snprintf (expected_string
,
1056 expected_string_len
,
1057 "%sstring constant",
1058 scanner
->token
== G_TOKEN_STRING
? "valid " : "");
1061 case G_TOKEN_IDENTIFIER
:
1062 case G_TOKEN_IDENTIFIER_NULL
:
1063 g_snprintf (expected_string
,
1064 expected_string_len
,
1066 (scanner
->token
== G_TOKEN_IDENTIFIER_NULL
||
1067 scanner
->token
== G_TOKEN_IDENTIFIER
? "valid " : ""),
1075 if (message
&& message
[0] != 0)
1076 message_prefix
= " - ";
1079 message_prefix
= "";
1083 if (expected_token
!= G_TOKEN_NONE
)
1086 msg_handler (scanner
,
1087 "unexpected %s, expected %s%s%s",
1093 msg_handler (scanner
,
1094 "%s, expected %s%s%s",
1103 msg_handler (scanner
,
1104 "unexpected %s%s%s",
1109 msg_handler (scanner
,
1116 g_free (token_string
);
1117 g_free (expected_string
);
1121 g_scanner_stat_mode (const gchar
*filename
)
1123 struct stat
*stat_buf
;
1126 stat_buf
= g_new0 (struct stat
, 1);
1128 lstat (filename
, stat_buf
);
1130 stat (filename
, stat_buf
);
1132 st_mode
= stat_buf
->st_mode
;
1140 g_scanner_get_token_i (GScanner
*scanner
,
1141 GTokenType
*token_p
,
1142 GTokenValue
*value_p
,
1148 g_scanner_free_value (token_p
, value_p
);
1149 g_scanner_get_token_ll (scanner
, token_p
, value_p
, line_p
, position_p
);
1151 while (((*token_p
> 0 && *token_p
< 256) &&
1152 strchr (scanner
->config
->cset_skip_characters
, *token_p
)) ||
1153 (*token_p
== G_TOKEN_CHAR
&&
1154 strchr (scanner
->config
->cset_skip_characters
, value_p
->v_char
)) ||
1155 (*token_p
== G_TOKEN_COMMENT_MULTI
&&
1156 scanner
->config
->skip_comment_multi
) ||
1157 (*token_p
== G_TOKEN_COMMENT_SINGLE
&&
1158 scanner
->config
->skip_comment_single
));
1162 case G_TOKEN_IDENTIFIER
:
1163 if (scanner
->config
->identifier_2_string
)
1164 *token_p
= G_TOKEN_STRING
;
1167 case G_TOKEN_SYMBOL
:
1168 if (scanner
->config
->symbol_2_token
)
1169 *token_p
= (GTokenType
) value_p
->v_symbol
;
1172 case G_TOKEN_BINARY
:
1175 if (scanner
->config
->numbers_2_int
)
1176 *token_p
= G_TOKEN_INT
;
1183 if (*token_p
== G_TOKEN_INT
&&
1184 scanner
->config
->int_2_float
)
1186 *token_p
= G_TOKEN_FLOAT
;
1187 value_p
->v_float
= value_p
->v_int
;
1194 g_scanner_get_token_ll (GScanner
*scanner
,
1195 GTokenType
*token_p
,
1196 GTokenValue
*value_p
,
1200 GScannerConfig
*config
;
1202 gboolean in_comment_multi
;
1203 gboolean in_comment_single
;
1204 gboolean in_string_sq
;
1205 gboolean in_string_dq
;
1210 config
= scanner
->config
;
1211 (*value_p
).v_int
= 0;
1213 if ((scanner
->text
>= scanner
->text_end
&& scanner
->input_fd
< 0) ||
1214 scanner
->token
== G_TOKEN_EOF
)
1216 *token_p
= G_TOKEN_EOF
;
1220 in_comment_multi
= FALSE
;
1221 in_comment_single
= FALSE
;
1222 in_string_sq
= FALSE
;
1223 in_string_dq
= FALSE
;
1226 do /* while (ch != 0) */
1228 gboolean dotted_float
= FALSE
;
1230 ch
= g_scanner_get_char (scanner
, line_p
, position_p
);
1233 token
= G_TOKEN_NONE
;
1235 /* this is *evil*, but needed ;(
1236 * we first check for identifier first character, because it
1237 * might interfere with other key chars like slashes or numbers
1239 if (config
->scan_identifier
&&
1240 ch
&& strchr (config
->cset_identifier_first
, ch
))
1241 goto identifier_precedence
;
1246 token
= G_TOKEN_EOF
;
1252 if (!config
->scan_comment_multi
||
1253 g_scanner_peek_next_char (scanner
) != '*')
1255 g_scanner_get_char (scanner
, line_p
, position_p
);
1256 token
= G_TOKEN_COMMENT_MULTI
;
1257 in_comment_multi
= TRUE
;
1258 gstring
= g_string_new ("");
1259 while ((ch
= g_scanner_get_char (scanner
, line_p
, position_p
)) != 0)
1261 if (ch
== '*' && g_scanner_peek_next_char (scanner
) == '/')
1263 g_scanner_get_char (scanner
, line_p
, position_p
);
1264 in_comment_multi
= FALSE
;
1268 gstring
= g_string_append_c (gstring
, ch
);
1274 if (!config
->scan_string_sq
)
1276 token
= G_TOKEN_STRING
;
1277 in_string_sq
= TRUE
;
1278 gstring
= g_string_new ("");
1279 while ((ch
= g_scanner_get_char (scanner
, line_p
, position_p
)) != 0)
1283 in_string_sq
= FALSE
;
1287 gstring
= g_string_append_c (gstring
, ch
);
1293 if (!config
->scan_string_dq
)
1295 token
= G_TOKEN_STRING
;
1296 in_string_dq
= TRUE
;
1297 gstring
= g_string_new ("");
1298 while ((ch
= g_scanner_get_char (scanner
, line_p
, position_p
)) != 0)
1302 in_string_dq
= FALSE
;
1309 ch
= g_scanner_get_char (scanner
, line_p
, position_p
);
1319 gstring
= g_string_append_c (gstring
, '\\');
1323 gstring
= g_string_append_c (gstring
, '\n');
1327 gstring
= g_string_append_c (gstring
, '\t');
1331 gstring
= g_string_append_c (gstring
, '\r');
1335 gstring
= g_string_append_c (gstring
, '\b');
1339 gstring
= g_string_append_c (gstring
, '\f');
1351 fchar
= g_scanner_peek_next_char (scanner
);
1352 if (fchar
>= '0' && fchar
<= '7')
1354 ch
= g_scanner_get_char (scanner
, line_p
, position_p
);
1355 i
= i
* 8 + ch
- '0';
1356 fchar
= g_scanner_peek_next_char (scanner
);
1357 if (fchar
>= '0' && fchar
<= '7')
1359 ch
= g_scanner_get_char (scanner
, line_p
, position_p
);
1360 i
= i
* 8 + ch
- '0';
1363 gstring
= g_string_append_c (gstring
, i
);
1367 gstring
= g_string_append_c (gstring
, ch
);
1372 gstring
= g_string_append_c (gstring
, ch
);
1379 if (!config
->scan_float
)
1381 token
= G_TOKEN_FLOAT
;
1382 dotted_float
= TRUE
;
1383 ch
= g_scanner_get_char (scanner
, line_p
, position_p
);
1384 goto number_parsing
;
1387 if (!config
->scan_hex_dollar
)
1389 token
= G_TOKEN_HEX
;
1390 ch
= g_scanner_get_char (scanner
, line_p
, position_p
);
1391 goto number_parsing
;
1394 if (config
->scan_octal
)
1395 token
= G_TOKEN_OCTAL
;
1397 token
= G_TOKEN_INT
;
1398 ch
= g_scanner_peek_next_char (scanner
);
1399 if (config
->scan_hex
&& (ch
== 'x' || ch
== 'X'))
1401 token
= G_TOKEN_HEX
;
1402 g_scanner_get_char (scanner
, line_p
, position_p
);
1403 ch
= g_scanner_get_char (scanner
, line_p
, position_p
);
1406 token
= G_TOKEN_ERROR
;
1407 value
.v_error
= G_ERR_UNEXP_EOF
;
1411 if (g_scanner_char_2_num (ch
, 16) < 0)
1413 token
= G_TOKEN_ERROR
;
1414 value
.v_error
= G_ERR_DIGIT_RADIX
;
1419 else if (config
->scan_binary
&& (ch
== 'b' || ch
== 'B'))
1421 token
= G_TOKEN_BINARY
;
1422 g_scanner_get_char (scanner
, line_p
, position_p
);
1423 ch
= g_scanner_get_char (scanner
, line_p
, position_p
);
1426 token
= G_TOKEN_ERROR
;
1427 value
.v_error
= G_ERR_UNEXP_EOF
;
1431 if (g_scanner_char_2_num (ch
, 10) < 0)
1433 token
= G_TOKEN_ERROR
;
1434 value
.v_error
= G_ERR_NON_DIGIT_IN_CONST
;
1453 gboolean in_number
= TRUE
;
1456 if (token
== G_TOKEN_NONE
)
1457 token
= G_TOKEN_INT
;
1459 gstring
= g_string_new (dotted_float
? "0." : "");
1460 gstring
= g_string_append_c (gstring
, ch
);
1462 do /* while (in_number) */
1466 is_E
= token
== G_TOKEN_FLOAT
&& (ch
== 'e' || ch
== 'E');
1468 ch
= g_scanner_peek_next_char (scanner
);
1470 if (g_scanner_char_2_num (ch
, 36) >= 0 ||
1471 (config
->scan_float
&& ch
== '.') ||
1472 (is_E
&& (ch
== '+' || ch
== '-')))
1474 ch
= g_scanner_get_char (scanner
, line_p
, position_p
);
1479 if (token
!= G_TOKEN_INT
&& token
!= G_TOKEN_OCTAL
)
1481 value
.v_error
= token
== G_TOKEN_FLOAT
? G_ERR_FLOAT_MALFORMED
: G_ERR_FLOAT_RADIX
;
1482 token
= G_TOKEN_ERROR
;
1487 token
= G_TOKEN_FLOAT
;
1488 gstring
= g_string_append_c (gstring
, ch
);
1502 gstring
= g_string_append_c (gstring
, ch
);
1507 if (token
!= G_TOKEN_FLOAT
)
1509 token
= G_TOKEN_ERROR
;
1510 value
.v_error
= G_ERR_NON_DIGIT_IN_CONST
;
1514 gstring
= g_string_append_c (gstring
, ch
);
1519 if ((token
!= G_TOKEN_HEX
&& !config
->scan_float
) ||
1520 (token
!= G_TOKEN_HEX
&&
1521 token
!= G_TOKEN_OCTAL
&&
1522 token
!= G_TOKEN_FLOAT
&&
1523 token
!= G_TOKEN_INT
))
1525 token
= G_TOKEN_ERROR
;
1526 value
.v_error
= G_ERR_NON_DIGIT_IN_CONST
;
1531 if (token
!= G_TOKEN_HEX
)
1532 token
= G_TOKEN_FLOAT
;
1533 gstring
= g_string_append_c (gstring
, ch
);
1538 if (token
!= G_TOKEN_HEX
)
1540 token
= G_TOKEN_ERROR
;
1541 value
.v_error
= G_ERR_NON_DIGIT_IN_CONST
;
1545 gstring
= g_string_append_c (gstring
, ch
);
1557 case G_TOKEN_BINARY
:
1558 value
.v_binary
= strtol (gstring
->str
, &endptr
, 2);
1562 value
.v_octal
= strtol (gstring
->str
, &endptr
, 8);
1566 value
.v_int
= strtol (gstring
->str
, &endptr
, 10);
1570 value
.v_float
= g_strtod (gstring
->str
, &endptr
);
1574 value
.v_hex
= strtol (gstring
->str
, &endptr
, 16);
1580 if (endptr
&& *endptr
)
1582 token
= G_TOKEN_ERROR
;
1583 if (*endptr
== 'e' || *endptr
== 'E')
1584 value
.v_error
= G_ERR_NON_DIGIT_IN_CONST
;
1586 value
.v_error
= G_ERR_DIGIT_RADIX
;
1588 g_string_free (gstring
, TRUE
);
1591 } /* number_parsing:... */
1597 if (config
->cpair_comment_single
&&
1598 ch
== config
->cpair_comment_single
[0])
1600 token
= G_TOKEN_COMMENT_SINGLE
;
1601 in_comment_single
= TRUE
;
1602 gstring
= g_string_new ("");
1603 while ((ch
= g_scanner_get_char (scanner
,
1607 if (ch
== config
->cpair_comment_single
[1])
1609 in_comment_single
= FALSE
;
1614 gstring
= g_string_append_c (gstring
, ch
);
1618 else if (config
->scan_identifier
&& ch
&&
1619 strchr (config
->cset_identifier_first
, ch
))
1621 identifier_precedence
:
1623 if (config
->cset_identifier_nth
&& ch
&&
1624 strchr (config
->cset_identifier_nth
,
1625 g_scanner_peek_next_char (scanner
)))
1627 token
= G_TOKEN_IDENTIFIER
;
1628 gstring
= g_string_new ("");
1629 gstring
= g_string_append_c (gstring
, ch
);
1632 ch
= g_scanner_get_char (scanner
, line_p
, position_p
);
1633 gstring
= g_string_append_c (gstring
, ch
);
1634 ch
= g_scanner_peek_next_char (scanner
);
1636 while (ch
&& strchr (config
->cset_identifier_nth
, ch
));
1639 else if (config
->scan_identifier_1char
)
1641 token
= G_TOKEN_IDENTIFIER
;
1642 value
.v_identifier
= g_new0 (gchar
, 2);
1643 value
.v_identifier
[0] = ch
;
1649 if (config
->char_2_token
)
1653 token
= G_TOKEN_CHAR
;
1658 } /* default_case:... */
1661 g_assert (ch
== 0 && token
!= G_TOKEN_NONE
); /* paranoid */
1665 if (in_comment_multi
|| in_comment_single
||
1666 in_string_sq
|| in_string_dq
)
1668 token
= G_TOKEN_ERROR
;
1671 g_string_free (gstring
, TRUE
);
1675 if (in_comment_multi
|| in_comment_single
)
1676 value
.v_error
= G_ERR_UNEXP_EOF_IN_COMMENT
;
1677 else /* (in_string_sq || in_string_dq) */
1678 value
.v_error
= G_ERR_UNEXP_EOF_IN_STRING
;
1683 value
.v_string
= gstring
->str
;
1684 g_string_free (gstring
, FALSE
);
1688 if (token
== G_TOKEN_IDENTIFIER
)
1690 if (config
->scan_symbols
)
1695 scope_id
= scanner
->scope_id
;
1696 key
= g_scanner_lookup_internal (scanner
, scope_id
, value
.v_identifier
);
1697 if (!key
&& scope_id
&& scanner
->config
->scope_0_fallback
)
1698 key
= g_scanner_lookup_internal (scanner
, 0, value
.v_identifier
);
1702 g_free (value
.v_identifier
);
1703 token
= G_TOKEN_SYMBOL
;
1704 value
.v_symbol
= key
->value
;
1708 if (token
== G_TOKEN_IDENTIFIER
&&
1709 config
->scan_identifier_NULL
&&
1710 strlen (value
.v_identifier
) == 4)
1712 gchar
*null_upper
= "NULL";
1713 gchar
*null_lower
= "null";
1715 if (scanner
->config
->case_sensitive
)
1717 if (value
.v_identifier
[0] == null_upper
[0] &&
1718 value
.v_identifier
[1] == null_upper
[1] &&
1719 value
.v_identifier
[2] == null_upper
[2] &&
1720 value
.v_identifier
[3] == null_upper
[3])
1721 token
= G_TOKEN_IDENTIFIER_NULL
;
1725 if ((value
.v_identifier
[0] == null_upper
[0] ||
1726 value
.v_identifier
[0] == null_lower
[0]) &&
1727 (value
.v_identifier
[1] == null_upper
[1] ||
1728 value
.v_identifier
[1] == null_lower
[1]) &&
1729 (value
.v_identifier
[2] == null_upper
[2] ||
1730 value
.v_identifier
[2] == null_lower
[2]) &&
1731 (value
.v_identifier
[3] == null_upper
[3] ||
1732 value
.v_identifier
[3] == null_lower
[3]))
1733 token
= G_TOKEN_IDENTIFIER_NULL
;