2 /* Include this before everything else, for various large-file definitions */
4 #define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER
11 #include <wsutil/str_util.h>
13 #include "dfilter-int.h"
14 #include "syntax-tree.h"
16 #include "dfunctions.h"
17 #include "sttype-number.h"
21 * Always generate warnings.
26 * We want a reentrant scanner.
31 * We don't use input, so don't generate code for it.
36 * We don't use unput, so don't generate code for it.
41 * We don't read interactively from the terminal.
43 %option never-interactive
46 * Prefix scanner routines with "df_yy" rather than "yy", so this scanner
47 * can coexist with other scanners.
49 %option prefix="df_yy"
52 * We're reading from a string, so we don't need yywrap.
57 * The type for the dfs we keep for a scanner.
59 %option extra-type="dfsyntax_t *"
63 * Wireshark - Network traffic analyzer
64 * By Gerald Combs <gerald@wireshark.org>
65 * Copyright 2001 Gerald Combs
67 * SPDX-License-Identifier: GPL-2.0-or-later
71 * Disable diagnostics in the code generated by Flex.
75 WS_WARN_UNUSED static int set_lval_simple(dfsyntax_t *dfs, int token, const char *token_value, sttype_id_t type_id);
76 #define simple(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_UNINITIALIZED))
77 #define test(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_TEST))
78 #define math(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_ARITHMETIC))
80 WS_WARN_UNUSED static int set_lval_literal(dfsyntax_t *dfs, const char *value, const char *token_value);
81 WS_WARN_UNUSED static int set_lval_identifier(dfsyntax_t *dfs, const char *value, const char *token_value);
82 WS_WARN_UNUSED static int set_lval_unparsed(dfsyntax_t *dfs, const char *value, const char *token_value);
84 WS_WARN_UNUSED static int set_lval_field(dfsyntax_t *dfs, const header_field_info *hfinfo, const char *token_value);
85 WS_WARN_UNUSED static int set_lval_quoted_string(dfsyntax_t *dfs, GString *quoted_string);
86 WS_WARN_UNUSED static int set_lval_charconst(dfsyntax_t *dfs, GString *quoted_string);
87 WS_WARN_UNUSED static int set_lval_integer(dfsyntax_t *dfs, const char *value, const char *token_value);
88 WS_WARN_UNUSED static int set_lval_float(dfsyntax_t *dfs, const char *value, const char *token_value);
90 static bool append_escaped_char(dfsyntax_t *dfs, GString *str, char c);
91 static bool append_universal_character_name(dfsyntax_t *dfs, GString *str, const char *ucn);
92 static bool parse_charconst(dfsyntax_t *dfs, const char *s, unsigned long *valuep);
93 static bool parse_unsigned_long_long(dfsyntax_t *dfs, const char *s, unsigned long long *valuep, bool set_error);
94 static bool parse_double(dfsyntax_t *dfs, const char *s, double *valuep);
96 static void update_location(dfsyntax_t *dfs, const char *text);
97 static void update_string_loc(dfsyntax_t *dfs, const char *text);
101 ws_noisy("Scanning failed here."); \
102 dfilter_fail(yyextra, DF_ERROR_GENERIC, yyextra->location, __VA_ARGS__); \
107 FunctionIdentifier [[:alpha:]_][[:alnum:]_]*
110 * Cannot start with '-'. * Some protocol name can contain '-', for example "mac-lte".
111 * Fields that contain '-' anywhere cannot start with a decimal digit.
112 * Note that some protocol names start with a number, for example "9p". This is
113 * handled as a special case for numeric patterns.
114 * Some protocol names contain dots, e.g: _ws.expert
115 * Protocol or protocol field cannot contain DOTDOT anywhere.
117 VarIdentifier [[:alnum:]_][[:alnum:]_-]*
118 ProtoFieldIdentifier {VarIdentifier}(\.{VarIdentifier})*
121 ColonMacAddress {hex2}:{hex2}:{hex2}:{hex2}:{hex2}:{hex2}
122 HyphenMacAddress {hex2}-{hex2}-{hex2}-{hex2}-{hex2}-{hex2}
123 DotMacAddress {hex2}\.{hex2}\.{hex2}\.{hex2}\.{hex2}\.{hex2}
126 DotQuadMacAddress {hex4}\.{hex4}\.{hex4}
128 ColonBytes ({hex2}:)|({hex2}(:{hex2})+)
129 HyphenBytes {hex2}(-{hex2})+
130 DotBytes {hex2}(\.{hex2})+
132 DecOctet [0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]
133 IPv4Address {DecOctet}\.{DecOctet}\.{DecOctet}\.{DecOctet}
136 ls32 {h16}:{h16}|{IPv4Address}
137 IPv6Address ({h16}:){6}{ls32}|::({h16}:){5}{ls32}|({h16})?::({h16}:){4}{ls32}|(({h16}:){0,1}{h16})?::({h16}:){3}{ls32}|(({h16}:){0,2}{h16})?::({h16}:){2}{ls32}|(({h16}:){0,3}{h16})?::{h16}:{ls32}|(({h16}:){0,4}{h16})?::{ls32}|(({h16}:){0,5}{h16})?::{h16}|(({h16}:){0,6}{h16})?::
139 V4CidrPrefix \/[[:digit:]]{1,2}
140 V6CidrPrefix \/[[:digit:]]{1,3}
142 /* Catch all valid semantic values. Cannot contain DOT DOT or start with MINUS. */
143 StartAlphabet [[:alnum:]_:]
144 Alphabet [[:alnum:]_:/-]
145 LiteralValue {StartAlphabet}{Alphabet}*(\.{Alphabet}+)*
147 Exponent ([eE][+-]?[[:digit:]]+)
148 HexExponent ([pP][+-]?[[:digit:]]+)
158 update_location(yyextra, yytext);
161 "(" return simple(TOKEN_LPAREN);
162 ")" return simple(TOKEN_RPAREN);
163 "," return simple(TOKEN_COMMA);
164 "{" return simple(TOKEN_LBRACE);
165 ".." return simple(TOKEN_DOTDOT);
166 "}" return simple(TOKEN_RBRACE);
167 "$" return simple(TOKEN_DOLLAR);
168 "@" return simple(TOKEN_ATSIGN);
169 "any" return simple(TOKEN_ANY);
170 "all" return simple(TOKEN_ALL);
172 "==" return test(TOKEN_TEST_ANY_EQ);
173 "eq" return test(TOKEN_TEST_ANY_EQ);
174 "any_eq" return test(TOKEN_TEST_ANY_EQ);
175 "!=" return test(TOKEN_TEST_ALL_NE);
176 "ne" return test(TOKEN_TEST_ALL_NE);
177 "all_ne" return test(TOKEN_TEST_ALL_NE);
178 "===" return test(TOKEN_TEST_ALL_EQ);
179 "all_eq" return test(TOKEN_TEST_ALL_EQ);
180 "!==" return test(TOKEN_TEST_ANY_NE);
181 "any_ne" return test(TOKEN_TEST_ANY_NE);
182 ">" return test(TOKEN_TEST_GT);
183 "gt" return test(TOKEN_TEST_GT);
184 ">=" return test(TOKEN_TEST_GE);
185 "ge" return test(TOKEN_TEST_GE);
186 "<" return test(TOKEN_TEST_LT);
187 "lt" return test(TOKEN_TEST_LT);
188 "<=" return test(TOKEN_TEST_LE);
189 "le" return test(TOKEN_TEST_LE);
190 "contains" return test(TOKEN_TEST_CONTAINS);
191 "~" return test(TOKEN_TEST_MATCHES);
192 "matches" return test(TOKEN_TEST_MATCHES);
193 "!" return test(TOKEN_TEST_NOT);
194 "not" return test(TOKEN_TEST_NOT);
195 "&&" return test(TOKEN_TEST_AND);
196 "and" return test(TOKEN_TEST_AND);
197 "||" return test(TOKEN_TEST_OR);
198 "or" return test(TOKEN_TEST_OR);
199 "^^" return test(TOKEN_TEST_XOR);
200 "xor" return test(TOKEN_TEST_XOR);
201 "in" return test(TOKEN_TEST_IN);
203 "+" return math(TOKEN_PLUS);
204 "-" return math(TOKEN_MINUS);
205 "*" return math(TOKEN_STAR);
206 "/" return math(TOKEN_RSLASH);
207 "%" return math(TOKEN_PERCENT);
208 "&" return math(TOKEN_BITWISE_AND);
209 "bitand" return math(TOKEN_BITWISE_AND);
210 "bitwise_and" return math(TOKEN_BITWISE_AND);
214 return simple(TOKEN_HASH);
217 <LAYER>[[:digit:]]+ {
219 update_location(yyextra, yytext);
220 return set_lval_simple(yyextra, TOKEN_INDEX, yytext, STTYPE_UNINITIALIZED);
223 <LAYER>[^[:digit:][] {
224 update_location(yyextra, yytext);
225 FAIL("Expected digit or \"[\", not \"%s\"", yytext);
231 return simple(TOKEN_LBRACKET);
235 update_location(yyextra, yytext);
236 return set_lval_simple(yyextra, TOKEN_RANGE_NODE, yytext, STTYPE_UNINITIALIZED);
240 return simple(TOKEN_COMMA);
245 return simple(TOKEN_RBRACKET);
249 update_location(yyextra, yytext);
250 FAIL("The right bracket was missing from a slice.");
255 /* start quote of a quoted string */
257 * The example of how to scan for strings was taken from
258 * the flex manual, from the section "Start Conditions".
259 * See: https://westes.github.io/flex/manual/Start-Conditions.html
262 update_location(yyextra, yytext);
263 yyextra->string_loc = yyextra->location;
265 yyextra->quoted_string = g_string_new(NULL);
267 if (yytext[0] == 'r' || yytext[0] == 'R') {
269 * This is a raw string (like in Python). Rules: 1) The two
270 * escape sequences are \\ and \". 2) Backslashes are
271 * preserved. 3) Double quotes in the string must be escaped.
272 * Corollary: Strings cannot end with an odd number of
274 * Example: r"a\b\x12\"\\" is the string (including the implicit NUL terminator)
275 * {'a', '\\', 'b', '\\', 'x', '1', '2', '\\', '"', '\\'. '\\', '\0'}
277 yyextra->raw_string = true;
280 yyextra->raw_string = false;
285 /* unterminated string */
286 update_string_loc(yyextra, yytext);
287 g_string_free(yyextra->quoted_string, TRUE);
288 yyextra->quoted_string = NULL;
289 FAIL("The final quote was missing from a quoted string.");
296 update_string_loc(yyextra, yytext);
297 int token = set_lval_quoted_string(yyextra, yyextra->quoted_string);
298 yyextra->quoted_string = NULL;
299 yyextra->string_loc.col_start = -1;
303 <DQUOTE>\\[0-7]{1,3} {
305 update_string_loc(yyextra, yytext);
306 if (yyextra->raw_string) {
307 g_string_append(yyextra->quoted_string, yytext);
310 unsigned long result;
311 result = strtoul(yytext + 1, NULL, 8);
313 g_string_free(yyextra->quoted_string, TRUE);
314 yyextra->quoted_string = NULL;
315 FAIL("%s is larger than 255.", yytext);
318 g_string_append_c(yyextra->quoted_string, (char) result);
322 <DQUOTE>\\x[[:xdigit:]]{1,2} {
325 * C standard does not place a limit on the number of hex
326 * digits after \x... but we do. \xNN can have 1 or two Ns, not more.
328 update_string_loc(yyextra, yytext);
329 if (yyextra->raw_string) {
330 g_string_append(yyextra->quoted_string, yytext);
333 unsigned long result;
334 result = strtoul(yytext + 2, NULL, 16);
335 g_string_append_c(yyextra->quoted_string, (char) result);
339 <DQUOTE>\\u[[:xdigit:]]{0,4} {
340 /* universal character name */
341 update_string_loc(yyextra, yytext);
342 if (yyextra->raw_string) {
343 g_string_append(yyextra->quoted_string, yytext);
345 else if (!append_universal_character_name(yyextra, yyextra->quoted_string, yytext)) {
346 g_string_free(yyextra->quoted_string, TRUE);
347 yyextra->quoted_string = NULL;
352 <DQUOTE>\\U[[:xdigit:]]{0,8} {
353 /* universal character name */
354 update_string_loc(yyextra, yytext);
355 if (yyextra->raw_string) {
356 g_string_append(yyextra->quoted_string, yytext);
358 else if (!append_universal_character_name(yyextra, yyextra->quoted_string, yytext)) {
359 g_string_free(yyextra->quoted_string, TRUE);
360 yyextra->quoted_string = NULL;
367 /* escaped character */
368 update_string_loc(yyextra, yytext);
369 if (yyextra->raw_string) {
370 g_string_append(yyextra->quoted_string, yytext);
372 else if (!append_escaped_char(yyextra, yyextra->quoted_string, yytext[1])) {
373 g_string_free(yyextra->quoted_string, TRUE);
374 yyextra->quoted_string = NULL;
380 /* non-escaped string */
381 update_string_loc(yyextra, yytext);
382 g_string_append(yyextra->quoted_string, yytext);
387 /* start quote of a quoted character value */
389 update_location(yyextra, yytext);
390 yyextra->string_loc = yyextra->location;
392 yyextra->quoted_string = g_string_new("'");
396 /* unterminated character value */
397 update_string_loc(yyextra, yytext);
398 g_string_free(yyextra->quoted_string, TRUE);
399 yyextra->quoted_string = NULL;
400 FAIL("The final quote was missing from a character constant.");
407 update_string_loc(yyextra, yytext);
408 g_string_append_c(yyextra->quoted_string, '\'');
409 int token = set_lval_charconst(yyextra, yyextra->quoted_string);
410 yyextra->quoted_string = NULL;
411 yyextra->string_loc.col_start = -1;
416 /* escaped character */
417 update_string_loc(yyextra, yytext);
418 g_string_append(yyextra->quoted_string, yytext);
422 /* non-escaped string */
423 update_string_loc(yyextra, yytext);
424 g_string_append(yyextra->quoted_string, yytext);
427 /* NOTE: None of the patterns below can match ".." anywhere in the token string. */
431 {ColonMacAddress}|{HyphenMacAddress} {
433 update_location(yyextra, yytext);
434 return set_lval_literal(yyextra, yytext, yytext);
439 {IPv4Address}{V4CidrPrefix}? {
440 /* IPv4 with or without prefix. */
441 update_location(yyextra, yytext);
442 return set_lval_literal(yyextra, yytext, yytext);
445 {IPv6Address}{V6CidrPrefix}? {
446 /* IPv6 with or without prefix. */
447 update_location(yyextra, yytext);
448 return set_lval_literal(yyextra, yytext, yytext);
453 [[:digit:]][[:digit:]]* {
454 /* Numeric or field. */
455 update_location(yyextra, yytext);
456 /* Check if we have a protocol or protocol field, otherwise assume a literal. */
457 /* It is only reasonable to assume a literal here, instead of a
458 * (possibly non-existent) protocol field, because protocol field filter names
459 * should not start with a digit (the lexical syntax for numbers). */
460 header_field_info *hfinfo = dfilter_resolve_unparsed(yytext, yyextra->deprecated);
461 if (hfinfo != NULL) {
462 return set_lval_field(yyextra, hfinfo, yytext);
464 return set_lval_integer(yyextra, yytext, yytext);
467 0[bBxX]?[[:xdigit:]]+ {
468 /* Binary or octal or hexadecimal. */
469 update_location(yyextra, yytext);
470 return set_lval_integer(yyextra, yytext, yytext);
473 /* Floating point. */
475 [[:digit:]]+\.[[:digit:]]+ {
477 update_location(yyextra, yytext);
478 return set_lval_float(yyextra, yytext, yytext);
481 [[:digit:]]+{Exponent}|[[:digit:]]+\.[[:digit:]]+{Exponent}? {
482 /* Decimal float with optional exponent. */
483 /* Significand cannot have any side omitted. */
484 update_location(yyextra, yytext);
485 /* Check if we have a protocol or protocol field, otherwise assume a literal. */
486 /* It is only reasonable to assume a literal here, instead of a
487 * (possibly non-existent) protocol field, because protocol field filter names
488 * should not start with a digit (the lexical syntax for numbers). */
489 header_field_info *hfinfo = dfilter_resolve_unparsed(yytext, yyextra->deprecated);
490 if (hfinfo != NULL) {
491 return set_lval_field(yyextra, hfinfo, yytext);
493 return set_lval_float(yyextra, yytext, yytext);
496 0[xX][[:xdigit:]]+{HexExponent}|0[xX][[:xdigit:]]+\.[[:xdigit:]]+{HexExponent}? {
497 /* Hexadecimal float with optional exponent. Can't be a field because
498 * field cannot beging with 0x. */
499 /* Significand cannot have any side omitted. */
500 update_location(yyextra, yytext);
501 return set_lval_float(yyextra, yytext, yytext);
504 (?i:inf)(?i:inity)? {
506 update_location(yyextra, yytext);
507 return set_lval_float(yyextra, yytext, yytext);
510 (?i:nan)(\([[:alnum:]_]*\))? {
511 /* NaNs (including quiet NaNs). */
512 update_location(yyextra, yytext);
513 return set_lval_float(yyextra, yytext, yytext);
517 /* Numeric prefixed with ':'. */
518 update_location(yyextra, yytext);
519 return set_lval_literal(yyextra, yytext + 1, yytext);
526 update_location(yyextra, yytext);
527 if (yytext[0] == ':')
528 return set_lval_literal(yyextra, yytext + 1, yytext);
529 return set_lval_literal(yyextra, yytext, yytext);
534 update_location(yyextra, yytext);
535 return set_lval_literal(yyextra, yytext + 1, yytext);
539 /* DotBytes, can be a field without ':' prefix. */
540 update_location(yyextra, yytext);
541 return set_lval_literal(yyextra, yytext + 1, yytext);
544 /* Identifier (protocol/field/function name). */
546 /* This must come before FieldIdentifier to match function names. */
547 {FunctionIdentifier} {
548 /* Identifier (field or function) or literal (bytes without separator). */
549 update_location(yyextra, yytext);
550 return set_lval_identifier(yyextra, yytext, yytext);
553 \.{ProtoFieldIdentifier} {
554 /* Identifier, prefixed with a '.', must be a field, no ifs or buts. */
555 update_location(yyextra, yytext);
556 const char *name = yytext + 1;
557 header_field_info *hfinfo = dfilter_resolve_unparsed(name, yyextra->deprecated);
558 if (hfinfo == NULL) {
559 FAIL("\"%s\" is not a valid protocol or protocol field.", name);
562 return set_lval_field(yyextra, hfinfo, yytext);
565 {ProtoFieldIdentifier} {
566 /* Catch-all for protocol values. Can also be a literal. */
567 update_location(yyextra, yytext);
568 return set_lval_unparsed(yyextra, yytext, yytext);
572 /* Catch-all for semantic values. */
573 update_location(yyextra, yytext);
574 /* We use literal here because identifiers (using unparsed) should have
575 * matched one of the previous rules. */
576 return set_lval_literal(yyextra, yytext, yytext);
581 update_location(yyextra, yytext);
582 if (isprint_string(yytext))
583 FAIL("\"%s\" was unexpected in this context.", yytext);
585 FAIL("Non-printable ASCII characters may only appear inside double-quotes.");
592 * Turn diagnostics back on, so we check the code that we've written.
597 _update_location(dfsyntax_t *dfs, size_t len)
599 dfs->location.col_start += (long)dfs->location.col_len;
600 dfs->location.col_len = len;
604 update_location(dfsyntax_t *dfs, const char *text)
606 _update_location(dfs, strlen(text));
610 update_string_loc(dfsyntax_t *dfs, const char *text)
612 size_t len = strlen(text);
613 dfs->string_loc.col_len += len;
614 _update_location(dfs, len);
618 set_lval_simple(dfsyntax_t *dfs, int token, const char *token_value, sttype_id_t type_id)
620 dfs->lval = stnode_new(type_id, NULL, g_strdup(token_value), dfs->location);
625 set_lval_literal(dfsyntax_t *dfs, const char *value, const char *token_value)
627 dfs->lval = stnode_new(STTYPE_LITERAL, g_strdup(value), g_strdup(token_value), dfs->location);
628 return TOKEN_LITERAL;
632 set_lval_identifier(dfsyntax_t *dfs, const char *value, const char *token_value)
634 dfs->lval = stnode_new(STTYPE_UNPARSED, g_strdup(value), g_strdup(token_value), dfs->location);
635 stnode_set_flags(dfs->lval, STFLAG_UNPARSED);
636 return TOKEN_IDENTIFIER;
640 set_lval_unparsed(dfsyntax_t *dfs, const char *value, const char *token_value)
642 dfs->lval = stnode_new(STTYPE_UNPARSED, g_strdup(value), g_strdup(token_value), dfs->location);
643 stnode_set_flags(dfs->lval, STFLAG_UNPARSED);
644 return TOKEN_UNPARSED;
648 set_lval_field(dfsyntax_t *dfs, const header_field_info *hfinfo, const char *token_value)
650 dfs->lval = stnode_new(STTYPE_FIELD, (void *)hfinfo, g_strdup(token_value), dfs->location);
655 set_lval_quoted_string(dfsyntax_t *dfs, GString *quoted_string)
659 token_value = ws_escape_string_len(NULL, quoted_string->str, quoted_string->len, true);
660 dfs->lval = stnode_new(STTYPE_STRING, quoted_string, token_value, dfs->string_loc);
665 set_lval_charconst(dfsyntax_t *dfs, GString *quoted_string)
667 unsigned long number;
670 char *token_value = g_string_free(quoted_string, FALSE);
671 ok = parse_charconst(dfs, token_value, &number);
676 dfs->lval = stnode_new(STTYPE_CHARCONST, g_memdup2(&number, sizeof(number)), token_value, dfs->string_loc);
677 return TOKEN_CHARCONST;
681 set_lval_integer(dfsyntax_t *dfs, const char *value, const char *token_value)
683 unsigned long long number;
686 ok = parse_unsigned_long_long(dfs, value, &number, false);
688 /* Instead of failing assume this is a literal such as
689 "10f3deccc00d5c8f629fba7a0fff34aa" that can be interpreted
690 as a literal bytes valid. */
691 dfs->lval = stnode_new(STTYPE_LITERAL, g_strdup(value), g_strdup(token_value), dfs->location);
692 return TOKEN_LITERAL;
694 dfs->lval = stnode_new(STTYPE_NUMBER, NULL, g_strdup(token_value), dfs->location);
695 sttype_number_set_unsigned(dfs->lval, number);
700 set_lval_float(dfsyntax_t *dfs, const char *value, const char *token_value)
705 ok = parse_double(dfs, value, &number);
709 dfs->lval = stnode_new(STTYPE_NUMBER, NULL, g_strdup(token_value), dfs->location);
710 sttype_number_set_float(dfs->lval, number);
715 append_escaped_char(dfsyntax_t *dfs, GString *str, char c)
744 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->location,
745 "\\%c is not a valid character escape sequence", c);
749 g_string_append_c(str, c);
754 parse_universal_character_name(dfsyntax_t *dfs _U_, const char *str, char **ret_endptr, gunichar *valuep)
765 else if (str[1] == 'U')
770 for (int i = 2; i < ndigits + 2; i++) {
771 if (!g_ascii_isxdigit(str[i])) {
777 val = g_ascii_strtoull(str + 2, &endptr, 16); /* skip leading 'u' or 'U' */
779 if (errno != 0 || endptr == str || val > UINT32_MAX) {
784 * Ref: https://en.cppreference.com/w/c/language/escape
785 * Range of universal character names
787 * If a universal character name corresponds to a code point that is
788 * not 0x24 ($), 0x40 (@), nor 0x60 (`) and less than 0xA0, or a
789 * surrogate code point (the range 0xD800-0xDFFF, inclusive), or
790 * greater than 0x10FFFF, i.e. not a Unicode code point (since C23),
791 * the program is ill-formed. In other words, members of basic source
792 * character set and control characters (in ranges 0x0-0x1F and
793 * 0x7F-0x9F) cannot be expressed in universal character names.
795 if (val < 0xA0 && val != 0x24 && val != 0x40 && val != 0x60)
797 else if (val >= 0xD800 && val <= 0xDFFF)
799 else if (val > 0x10FFFF)
802 *valuep = (gunichar)val;
804 *ret_endptr = endptr;
809 append_universal_character_name(dfsyntax_t *dfs, GString *str, const char *ucn)
813 if (!parse_universal_character_name(dfs, ucn, NULL, &val)) {
814 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->location, "%s is not a valid universal character name", ucn);
818 g_string_append_unichar(str, val);
823 parse_charconst(dfsyntax_t *dfs, const char *s, unsigned long *valuep)
830 cp = s + 1; /* skip the leading ' */
832 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "Empty character constant.");
839 * An escape sequence is an octal number \NNN,
840 * an hex number \xNN, or one of \' \" \\ \a \b \f \n \r \t \v
841 * that stands for the byte value of the equivalent
842 * C-escape in ASCII encoding.
848 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s);
902 if (*cp >= '0' && *cp <= '9')
904 else if (*cp >= 'A' && *cp <= 'F')
905 value = 10 + (*cp - 'A');
906 else if (*cp >= 'a' && *cp <= 'f')
907 value = 10 + (*cp - 'a');
909 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s);
915 if (*cp >= '0' && *cp <= '9')
917 else if (*cp >= 'A' && *cp <= 'F')
918 value |= 10 + (*cp - 'A');
919 else if (*cp >= 'a' && *cp <= 'f')
920 value |= 10 + (*cp - 'a');
922 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s);
931 if (!parse_universal_character_name(dfs, s+1, &endptr, &unival)) {
932 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s is not a valid universal character name", s);
935 value = (unsigned long)unival;
941 if (*cp >= '0' && *cp <= '7')
944 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s);
947 if (*(cp + 1) != '\'') {
950 if (*cp >= '0' && *cp <= '7')
953 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s);
956 if (*(cp + 1) != '\'') {
959 if (*cp >= '0' && *cp <= '7')
962 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s);
968 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s is too large to be a valid character constant.", s);
975 if (!g_ascii_isprint(value)) {
976 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "Non-printable value '0x%02lx' in character constant.", value);
981 if ((*cp != '\'') || (*(cp + 1) != '\0')){
982 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s is too long to be a valid character constant.", s);
991 parse_unsigned_long_long(dfsyntax_t *dfs, const char *s, unsigned long long *valuep, bool set_error)
996 if (s[0] == '0' && (s[1] == 'b' || s[1] == 'B')) {
997 *valuep = g_ascii_strtoull(s + 2, &endptr, 2);
1000 *valuep = g_ascii_strtoull(s, &endptr, 0);
1003 if (errno == EINVAL || endptr == s || *endptr != '\0') {
1004 /* This isn't a valid number. */
1006 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "\"%s\" is not a valid number.", s);
1009 if (errno == ERANGE) {
1011 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "\"%s\" is too large to be represented as a 64-bit number.", s);
1015 // Should not happen
1017 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "\"%s\" is not a valid number (%s).", s, g_strerror(errno));
1025 parse_double(dfsyntax_t *dfs, const char *s, double *valuep)
1027 char *endptr = NULL;
1030 *valuep = g_ascii_strtod(s, &endptr);
1032 if (endptr == s || *endptr != '\0') {
1033 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "\"%s\" is not a valid floating-point number.", s);
1036 if (errno == ERANGE) {
1037 if (*valuep == HUGE_VAL) {
1038 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "\"%s\" causes floating-point overflow.", s);
1041 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "\"%s\" causes floating-point underflow.", s);
1046 // Should not happen
1047 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc,
1048 "\"%s\" is not a valid floating-point number (%s).",
1049 s, g_strerror(errno));