epan/dissectors/pidl/ C99 drsuapi
[wireshark-sm.git] / epan / dfilter / scanner.l
blobf8b6c09507b753bf2f589170ac1502e950773f3e
1 %top {
2 /* Include this before everything else, for various large-file definitions */
3 #include "config.h"
4 #define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER
5 #include <wireshark.h>
7 #include <stdlib.h>
8 #include <errno.h>
9 #include <math.h>
11 #include <wsutil/str_util.h>
13 #include "dfilter-int.h"
14 #include "syntax-tree.h"
15 #include "grammar.h"
16 #include "dfunctions.h"
17 #include "sttype-number.h"
21  * Always generate warnings.
22  */
23 %option warn
26  * We want a reentrant scanner.
27  */
28 %option reentrant
31  * We don't use input, so don't generate code for it.
32  */
33 %option noinput
36  * We don't use unput, so don't generate code for it.
37  */
38 %option nounput
41  * We don't read interactively from the terminal.
42  */
43 %option never-interactive
46  * Prefix scanner routines with "df_yy" rather than "yy", so this scanner
47  * can coexist with other scanners.
48  */
49 %option prefix="df_yy"
52  * We're reading from a string, so we don't need yywrap.
53  */
54 %option noyywrap
57  * The type for the dfs we keep for a scanner.
58  */
59 %option extra-type="dfsyntax_t *"
63  * Wireshark - Network traffic analyzer
64  * By Gerald Combs <gerald@wireshark.org>
65  * Copyright 2001 Gerald Combs
66  *
67  * SPDX-License-Identifier: GPL-2.0-or-later
68  */
71  * Disable diagnostics in the code generated by Flex.
72  */
73 DIAG_OFF_FLEX()
75 WS_WARN_UNUSED static int set_lval_simple(dfsyntax_t *dfs, int token, const char *token_value, sttype_id_t type_id);
76 #define simple(token)   (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_UNINITIALIZED))
77 #define test(token)     (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_TEST))
78 #define math(token)     (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_ARITHMETIC))
80 WS_WARN_UNUSED static int set_lval_literal(dfsyntax_t *dfs,  const char *value, const char *token_value);
81 WS_WARN_UNUSED static int set_lval_identifier(dfsyntax_t *dfs,  const char *value, const char *token_value);
82 WS_WARN_UNUSED static int set_lval_unparsed(dfsyntax_t *dfs, const char *value, const char *token_value);
84 WS_WARN_UNUSED static int set_lval_field(dfsyntax_t *dfs, const header_field_info *hfinfo, const char *token_value);
85 WS_WARN_UNUSED static int set_lval_quoted_string(dfsyntax_t *dfs, GString *quoted_string);
86 WS_WARN_UNUSED static int set_lval_charconst(dfsyntax_t *dfs, GString *quoted_string);
87 WS_WARN_UNUSED static int set_lval_integer(dfsyntax_t *dfs, const char *value, const char *token_value);
88 WS_WARN_UNUSED static int set_lval_float(dfsyntax_t *dfs, const char *value, const char *token_value);
90 static bool append_escaped_char(dfsyntax_t *dfs, GString *str, char c);
91 static bool append_universal_character_name(dfsyntax_t *dfs, GString *str, const char *ucn);
92 static bool parse_charconst(dfsyntax_t *dfs, const char *s, unsigned long *valuep);
93 static bool parse_unsigned_long_long(dfsyntax_t *dfs, const char *s, unsigned long long *valuep, bool set_error);
94 static bool parse_double(dfsyntax_t *dfs, const char *s, double *valuep);
96 static void update_location(dfsyntax_t *dfs, const char *text);
97 static void update_string_loc(dfsyntax_t *dfs, const char *text);
99 #define FAIL(...) \
100         do { \
101                 ws_noisy("Scanning failed here."); \
102                 dfilter_fail(yyextra, DF_ERROR_GENERIC, yyextra->location, __VA_ARGS__); \
103         } while (0)
107 FunctionIdentifier      [[:alpha:]_][[:alnum:]_]*
110  * Cannot start with '-'. * Some protocol name can contain '-', for example "mac-lte".
111  * Fields that contain '-' anywhere cannot start with a decimal digit.
112  * Note that some protocol names start with a number, for example "9p". This is
113  * handled as a special case for numeric patterns.
114  * Some protocol names contain dots, e.g: _ws.expert
115  * Protocol or protocol field cannot contain DOTDOT anywhere.
116  */
117 VarIdentifier           [[:alnum:]_][[:alnum:]_-]*
118 ProtoFieldIdentifier    {VarIdentifier}(\.{VarIdentifier})*
120 hex2                    [[:xdigit:]]{2}
121 ColonMacAddress         {hex2}:{hex2}:{hex2}:{hex2}:{hex2}:{hex2}
122 HyphenMacAddress        {hex2}-{hex2}-{hex2}-{hex2}-{hex2}-{hex2}
123 DotMacAddress           {hex2}\.{hex2}\.{hex2}\.{hex2}\.{hex2}\.{hex2}
125 hex4                    [[:xdigit:]]{4}
126 DotQuadMacAddress       {hex4}\.{hex4}\.{hex4}
128 ColonBytes              ({hex2}:)|({hex2}(:{hex2})+)
129 HyphenBytes             {hex2}(-{hex2})+
130 DotBytes                {hex2}(\.{hex2})+
132 DecOctet                [0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]
133 IPv4Address             {DecOctet}\.{DecOctet}\.{DecOctet}\.{DecOctet}
135 h16                     [0-9A-Fa-f]{1,4}
136 ls32                    {h16}:{h16}|{IPv4Address}
137 IPv6Address             ({h16}:){6}{ls32}|::({h16}:){5}{ls32}|({h16})?::({h16}:){4}{ls32}|(({h16}:){0,1}{h16})?::({h16}:){3}{ls32}|(({h16}:){0,2}{h16})?::({h16}:){2}{ls32}|(({h16}:){0,3}{h16})?::{h16}:{ls32}|(({h16}:){0,4}{h16})?::{ls32}|(({h16}:){0,5}{h16})?::{h16}|(({h16}:){0,6}{h16})?::
139 V4CidrPrefix            \/[[:digit:]]{1,2}
140 V6CidrPrefix            \/[[:digit:]]{1,3}
142 /* Catch all valid semantic values. Cannot contain DOT DOT or start with MINUS. */
143 StartAlphabet           [[:alnum:]_:]
144 Alphabet                [[:alnum:]_:/-]
145 LiteralValue            {StartAlphabet}{Alphabet}*(\.{Alphabet}+)*
147 Exponent                ([eE][+-]?[[:digit:]]+)
148 HexExponent             ([pP][+-]?[[:digit:]]+)
150 %x RANGE
151 %x LAYER
152 %x DQUOTE
153 %x SQUOTE
157 [[:blank:]\n\r]+        {
158         update_location(yyextra, yytext);
161 "("             return simple(TOKEN_LPAREN);
162 ")"             return simple(TOKEN_RPAREN);
163 ","             return simple(TOKEN_COMMA);
164 "{"             return simple(TOKEN_LBRACE);
165 ".."            return simple(TOKEN_DOTDOT);
166 "}"             return simple(TOKEN_RBRACE);
167 "$"             return simple(TOKEN_DOLLAR);
168 "@"             return simple(TOKEN_ATSIGN);
169 "any"           return simple(TOKEN_ANY);
170 "all"           return simple(TOKEN_ALL);
172 "=="            return test(TOKEN_TEST_ANY_EQ);
173 "eq"            return test(TOKEN_TEST_ANY_EQ);
174 "any_eq"        return test(TOKEN_TEST_ANY_EQ);
175 "!="            return test(TOKEN_TEST_ALL_NE);
176 "ne"            return test(TOKEN_TEST_ALL_NE);
177 "all_ne"        return test(TOKEN_TEST_ALL_NE);
178 "==="           return test(TOKEN_TEST_ALL_EQ);
179 "all_eq"        return test(TOKEN_TEST_ALL_EQ);
180 "!=="           return test(TOKEN_TEST_ANY_NE);
181 "any_ne"        return test(TOKEN_TEST_ANY_NE);
182 ">"             return test(TOKEN_TEST_GT);
183 "gt"            return test(TOKEN_TEST_GT);
184 ">="            return test(TOKEN_TEST_GE);
185 "ge"            return test(TOKEN_TEST_GE);
186 "<"             return test(TOKEN_TEST_LT);
187 "lt"            return test(TOKEN_TEST_LT);
188 "<="            return test(TOKEN_TEST_LE);
189 "le"            return test(TOKEN_TEST_LE);
190 "contains"      return test(TOKEN_TEST_CONTAINS);
191 "~"             return test(TOKEN_TEST_MATCHES);
192 "matches"       return test(TOKEN_TEST_MATCHES);
193 "!"             return test(TOKEN_TEST_NOT);
194 "not"           return test(TOKEN_TEST_NOT);
195 "&&"            return test(TOKEN_TEST_AND);
196 "and"           return test(TOKEN_TEST_AND);
197 "||"            return test(TOKEN_TEST_OR);
198 "or"            return test(TOKEN_TEST_OR);
199 "^^"            return test(TOKEN_TEST_XOR);
200 "xor"           return test(TOKEN_TEST_XOR);
201 "in"            return test(TOKEN_TEST_IN);
203 "+"             return math(TOKEN_PLUS);
204 "-"             return math(TOKEN_MINUS);
205 "*"             return math(TOKEN_STAR);
206 "/"             return math(TOKEN_RSLASH);
207 "%"             return math(TOKEN_PERCENT);
208 "&"             return math(TOKEN_BITWISE_AND);
209 "bitand"        return math(TOKEN_BITWISE_AND);
210 "bitwise_and"   return math(TOKEN_BITWISE_AND);
212 "#"                             {
213         BEGIN(LAYER);
214         return simple(TOKEN_HASH);
217 <LAYER>[[:digit:]]+             {
218         BEGIN(INITIAL);
219         update_location(yyextra, yytext);
220         return set_lval_simple(yyextra, TOKEN_INDEX, yytext, STTYPE_UNINITIALIZED);
223 <LAYER>[^[:digit:][]                    {
224         update_location(yyextra, yytext);
225         FAIL("Expected digit or \"[\", not \"%s\"", yytext);
226         return SCAN_FAILED;
229 <INITIAL,LAYER>"["              {
230         BEGIN(RANGE);
231         return simple(TOKEN_LBRACKET);
234 <RANGE>[^],]+                   {
235         update_location(yyextra, yytext);
236         return set_lval_simple(yyextra, TOKEN_RANGE_NODE, yytext, STTYPE_UNINITIALIZED);
239 <RANGE>","                      {
240         return simple(TOKEN_COMMA);
243 <RANGE>"]"                              {
244         BEGIN(INITIAL);
245         return simple(TOKEN_RBRACKET);
248 <RANGE><<EOF>>                          {
249         update_location(yyextra, yytext);
250         FAIL("The right bracket was missing from a slice.");
251         return SCAN_FAILED;
254 [rR]{0,1}\042                   {
255         /* start quote of a quoted string */
256         /*
257          * The example of how to scan for strings was taken from
258          * the flex manual, from the section "Start Conditions".
259          * See: https://westes.github.io/flex/manual/Start-Conditions.html
260          */
261         BEGIN(DQUOTE);
262         update_location(yyextra, yytext);
263         yyextra->string_loc = yyextra->location;
265         yyextra->quoted_string = g_string_new(NULL);
267         if (yytext[0] == 'r' || yytext[0] == 'R') {
268                 /*
269                  * This is a raw string (like in Python). Rules: 1) The two
270                  * escape sequences are \\ and \". 2) Backslashes are
271                  * preserved. 3) Double quotes in the string must be escaped.
272                  * Corollary: Strings cannot end with an odd number of
273                  * backslashes.
274                  * Example: r"a\b\x12\"\\" is the string (including the implicit NUL terminator)
275                  * {'a', '\\', 'b', '\\', 'x', '1', '2', '\\', '"', '\\'. '\\', '\0'}
276                  */
277                 yyextra->raw_string = true;
278         }
279         else {
280                 yyextra->raw_string = false;
281         }
284 <DQUOTE><<EOF>>                         {
285         /* unterminated string */
286         update_string_loc(yyextra, yytext);
287         g_string_free(yyextra->quoted_string, TRUE);
288         yyextra->quoted_string = NULL;
289         FAIL("The final quote was missing from a quoted string.");
290         return SCAN_FAILED;
293 <DQUOTE>\042                    {
294         /* end quote */
295         BEGIN(INITIAL);
296         update_string_loc(yyextra, yytext);
297         int token = set_lval_quoted_string(yyextra, yyextra->quoted_string);
298         yyextra->quoted_string = NULL;
299         yyextra->string_loc.col_start = -1;
300         return token;
303 <DQUOTE>\\[0-7]{1,3} {
304         /* octal sequence */
305         update_string_loc(yyextra, yytext);
306         if (yyextra->raw_string) {
307                 g_string_append(yyextra->quoted_string, yytext);
308         }
309         else {
310                 unsigned long result;
311                 result = strtoul(yytext + 1, NULL, 8);
312                 if (result > 0xff) {
313                         g_string_free(yyextra->quoted_string, TRUE);
314                         yyextra->quoted_string = NULL;
315                         FAIL("%s is larger than 255.", yytext);
316                         return SCAN_FAILED;
317                 }
318                 g_string_append_c(yyextra->quoted_string, (char) result);
319         }
322 <DQUOTE>\\x[[:xdigit:]]{1,2} {
323         /* hex sequence */
324         /*
325          * C standard does not place a limit on the number of hex
326          * digits after \x... but we do. \xNN can have 1 or two Ns, not more.
327          */
328         update_string_loc(yyextra, yytext);
329         if (yyextra->raw_string) {
330                 g_string_append(yyextra->quoted_string, yytext);
331         }
332         else {
333                 unsigned long result;
334                 result = strtoul(yytext + 2, NULL, 16);
335                 g_string_append_c(yyextra->quoted_string, (char) result);
336         }
339 <DQUOTE>\\u[[:xdigit:]]{0,4} {
340         /* universal character name */
341         update_string_loc(yyextra, yytext);
342         if (yyextra->raw_string) {
343                 g_string_append(yyextra->quoted_string, yytext);
344         }
345         else if (!append_universal_character_name(yyextra, yyextra->quoted_string, yytext)) {
346                 g_string_free(yyextra->quoted_string, TRUE);
347                 yyextra->quoted_string = NULL;
348                 return SCAN_FAILED;
349         }
352 <DQUOTE>\\U[[:xdigit:]]{0,8} {
353         /* universal character name */
354         update_string_loc(yyextra, yytext);
355         if (yyextra->raw_string) {
356                 g_string_append(yyextra->quoted_string, yytext);
357         }
358         else if (!append_universal_character_name(yyextra, yyextra->quoted_string, yytext)) {
359                 g_string_free(yyextra->quoted_string, TRUE);
360                 yyextra->quoted_string = NULL;
361                 return SCAN_FAILED;
362         }
366 <DQUOTE>\\.                             {
367         /* escaped character */
368         update_string_loc(yyextra, yytext);
369         if (yyextra->raw_string) {
370                 g_string_append(yyextra->quoted_string, yytext);
371         }
372         else if (!append_escaped_char(yyextra, yyextra->quoted_string, yytext[1])) {
373                 g_string_free(yyextra->quoted_string, TRUE);
374                 yyextra->quoted_string = NULL;
375                 return SCAN_FAILED;
376         }
379 <DQUOTE>[^\\\042]+                      {
380         /* non-escaped string */
381         update_string_loc(yyextra, yytext);
382         g_string_append(yyextra->quoted_string, yytext);
386 \047                            {
387         /* start quote of a quoted character value */
388         BEGIN(SQUOTE);
389         update_location(yyextra, yytext);
390         yyextra->string_loc = yyextra->location;
392         yyextra->quoted_string = g_string_new("'");
395 <SQUOTE><<EOF>>                         {
396         /* unterminated character value */
397         update_string_loc(yyextra, yytext);
398         g_string_free(yyextra->quoted_string, TRUE);
399         yyextra->quoted_string = NULL;
400         FAIL("The final quote was missing from a character constant.");
401         return SCAN_FAILED;
404 <SQUOTE>\047                    {
405         /* end quote */
406         BEGIN(INITIAL);
407         update_string_loc(yyextra, yytext);
408         g_string_append_c(yyextra->quoted_string, '\'');
409         int token = set_lval_charconst(yyextra, yyextra->quoted_string);
410         yyextra->quoted_string = NULL;
411         yyextra->string_loc.col_start = -1;
412         return token;
415 <SQUOTE>\\.                             {
416         /* escaped character */
417         update_string_loc(yyextra, yytext);
418         g_string_append(yyextra->quoted_string, yytext);
421 <SQUOTE>[^\\\047]+                      {
422         /* non-escaped string */
423         update_string_loc(yyextra, yytext);
424         g_string_append(yyextra->quoted_string, yytext);
427         /* NOTE: None of the patterns below can match ".." anywhere in the token string. */
429         /* MAC address. */
431 {ColonMacAddress}|{HyphenMacAddress}    {
432         /* MAC Address. */
433         update_location(yyextra, yytext);
434         return set_lval_literal(yyextra, yytext, yytext);
437         /* IP address. */
439 {IPv4Address}{V4CidrPrefix}?            {
440         /* IPv4 with or without prefix. */
441         update_location(yyextra, yytext);
442         return set_lval_literal(yyextra, yytext, yytext);
445 {IPv6Address}{V6CidrPrefix}?            {
446         /* IPv6 with or without prefix. */
447         update_location(yyextra, yytext);
448         return set_lval_literal(yyextra, yytext, yytext);
451         /* Integer */
453 [[:digit:]][[:digit:]]* {
454         /* Numeric or field. */
455         update_location(yyextra, yytext);
456         /* Check if we have a protocol or protocol field, otherwise assume a literal. */
457         /* It is only reasonable to assume a literal here, instead of a
458          * (possibly non-existent) protocol field, because protocol field filter names
459          * should not start with a digit (the lexical syntax for numbers). */
460         header_field_info *hfinfo = dfilter_resolve_unparsed(yytext, yyextra->deprecated);
461         if (hfinfo != NULL) {
462                 return set_lval_field(yyextra, hfinfo, yytext);
463         }
464         return set_lval_integer(yyextra, yytext, yytext);
467 0[bBxX]?[[:xdigit:]]+   {
468         /* Binary or octal or hexadecimal. */
469         update_location(yyextra, yytext);
470         return set_lval_integer(yyextra, yytext, yytext);
473         /* Floating point. */
475 [[:digit:]]+\.[[:digit:]]+      {
476         /* Decimal float. */
477         update_location(yyextra, yytext);
478         return set_lval_float(yyextra, yytext, yytext);
481 [[:digit:]]+{Exponent}|[[:digit:]]+\.[[:digit:]]+{Exponent}?    {
482         /* Decimal float with optional exponent. */
483         /* Significand cannot have any side omitted. */
484         update_location(yyextra, yytext);
485         /* Check if we have a protocol or protocol field, otherwise assume a literal. */
486         /* It is only reasonable to assume a literal here, instead of a
487          * (possibly non-existent) protocol field, because protocol field filter names
488          * should not start with a digit (the lexical syntax for numbers). */
489         header_field_info *hfinfo = dfilter_resolve_unparsed(yytext, yyextra->deprecated);
490         if (hfinfo != NULL) {
491                 return set_lval_field(yyextra, hfinfo, yytext);
492         }
493         return set_lval_float(yyextra, yytext, yytext);
496 0[xX][[:xdigit:]]+{HexExponent}|0[xX][[:xdigit:]]+\.[[:xdigit:]]+{HexExponent}? {
497         /* Hexadecimal float with optional exponent. Can't be a field because
498          * field cannot beging with 0x. */
499         /* Significand cannot have any side omitted. */
500         update_location(yyextra, yytext);
501         return set_lval_float(yyextra, yytext, yytext);
504 (?i:inf)(?i:inity)? {
505         /* Infinity. */
506         update_location(yyextra, yytext);
507         return set_lval_float(yyextra, yytext, yytext);
510 (?i:nan)(\([[:alnum:]_]*\))? {
511         /* NaNs (including quiet NaNs). */
512         update_location(yyextra, yytext);
513         return set_lval_float(yyextra, yytext, yytext);
516 :[[:xdigit:]]+  {
517         /* Numeric prefixed with ':'. */
518         update_location(yyextra, yytext);
519         return set_lval_literal(yyextra, yytext + 1, yytext);
522         /* Bytes. */
524 :?{ColonBytes}  {
525         /* Bytes. */
526         update_location(yyextra, yytext);
527         if (yytext[0] == ':')
528                 return set_lval_literal(yyextra, yytext + 1, yytext);
529         return set_lval_literal(yyextra, yytext, yytext);
532 :{HyphenBytes}  {
533         /* Bytes. */
534         update_location(yyextra, yytext);
535         return set_lval_literal(yyextra, yytext + 1, yytext);
538 :{DotBytes}     {
539         /* DotBytes, can be a field without ':' prefix. */
540         update_location(yyextra, yytext);
541         return set_lval_literal(yyextra, yytext + 1, yytext);
544         /* Identifier (protocol/field/function name). */
546         /* This must come before FieldIdentifier to match function names. */
547 {FunctionIdentifier}    {
548         /* Identifier (field or function) or literal (bytes without separator). */
549         update_location(yyextra, yytext);
550         return set_lval_identifier(yyextra, yytext, yytext);
553 \.{ProtoFieldIdentifier}        {
554         /* Identifier, prefixed with a '.', must be a field, no ifs or buts. */
555         update_location(yyextra, yytext);
556         const char *name = yytext + 1;
557         header_field_info *hfinfo = dfilter_resolve_unparsed(name, yyextra->deprecated);
558         if (hfinfo == NULL) {
559                 FAIL("\"%s\" is not a valid protocol or protocol field.", name);
560                 return SCAN_FAILED;
561         }
562         return set_lval_field(yyextra, hfinfo, yytext);
565 {ProtoFieldIdentifier}  {
566         /* Catch-all for protocol values. Can also be a literal. */
567         update_location(yyextra, yytext);
568         return set_lval_unparsed(yyextra, yytext, yytext);
571 {LiteralValue}  {
572         /* Catch-all for semantic values. */
573         update_location(yyextra, yytext);
574         /* We use literal here because identifiers (using unparsed) should have
575          * matched one of the previous rules. */
576         return set_lval_literal(yyextra, yytext, yytext);
579 . {
580         /* Default */
581         update_location(yyextra, yytext);
582         if (isprint_string(yytext))
583                 FAIL("\"%s\" was unexpected in this context.", yytext);
584         else
585                 FAIL("Non-printable ASCII characters may only appear inside double-quotes.");
586         return SCAN_FAILED;
592  * Turn diagnostics back on, so we check the code that we've written.
593  */
594 DIAG_ON_FLEX()
596 static void
597 _update_location(dfsyntax_t *dfs, size_t len)
599         dfs->location.col_start += (long)dfs->location.col_len;
600         dfs->location.col_len = len;
603 static void
604 update_location(dfsyntax_t *dfs, const char *text)
606         _update_location(dfs, strlen(text));
609 static void
610 update_string_loc(dfsyntax_t *dfs, const char *text)
612         size_t len = strlen(text);
613         dfs->string_loc.col_len += len;
614         _update_location(dfs, len);
617 static int
618 set_lval_simple(dfsyntax_t *dfs, int token, const char *token_value, sttype_id_t type_id)
620         dfs->lval = stnode_new(type_id, NULL, g_strdup(token_value), dfs->location);
621         return token;
624 static int
625 set_lval_literal(dfsyntax_t *dfs, const char *value, const char *token_value)
627         dfs->lval = stnode_new(STTYPE_LITERAL, g_strdup(value), g_strdup(token_value), dfs->location);
628         return TOKEN_LITERAL;
631 static int
632 set_lval_identifier(dfsyntax_t *dfs, const char *value, const char *token_value)
634         dfs->lval = stnode_new(STTYPE_UNPARSED, g_strdup(value), g_strdup(token_value), dfs->location);
635         stnode_set_flags(dfs->lval, STFLAG_UNPARSED);
636         return TOKEN_IDENTIFIER;
639 static int
640 set_lval_unparsed(dfsyntax_t *dfs, const char *value, const char *token_value)
642         dfs->lval = stnode_new(STTYPE_UNPARSED, g_strdup(value), g_strdup(token_value), dfs->location);
643         stnode_set_flags(dfs->lval, STFLAG_UNPARSED);
644         return TOKEN_UNPARSED;
647 static int
648 set_lval_field(dfsyntax_t *dfs, const header_field_info *hfinfo, const char *token_value)
650         dfs->lval = stnode_new(STTYPE_FIELD, (void *)hfinfo, g_strdup(token_value), dfs->location);
651         return TOKEN_FIELD;
654 static int
655 set_lval_quoted_string(dfsyntax_t *dfs, GString *quoted_string)
657         char *token_value;
659         token_value = ws_escape_string_len(NULL, quoted_string->str, quoted_string->len, true);
660         dfs->lval = stnode_new(STTYPE_STRING, quoted_string, token_value, dfs->string_loc);
661         return TOKEN_STRING;
664 static int
665 set_lval_charconst(dfsyntax_t *dfs, GString *quoted_string)
667         unsigned long number;
668         bool ok;
670         char *token_value = g_string_free(quoted_string, FALSE);
671         ok = parse_charconst(dfs, token_value, &number);
672         if (!ok) {
673                 g_free(token_value);
674                 return SCAN_FAILED;
675         }
676         dfs->lval = stnode_new(STTYPE_CHARCONST, g_memdup2(&number, sizeof(number)), token_value, dfs->string_loc);
677         return TOKEN_CHARCONST;
680 static int
681 set_lval_integer(dfsyntax_t *dfs, const char *value, const char *token_value)
683         unsigned long long number;
684         bool ok;
686         ok = parse_unsigned_long_long(dfs, value, &number, false);
687         if (!ok) {
688                 /* Instead of failing assume this is a literal such as
689                   "10f3deccc00d5c8f629fba7a0fff34aa" that can be interpreted
690                   as a literal bytes valid. */
691                 dfs->lval = stnode_new(STTYPE_LITERAL, g_strdup(value), g_strdup(token_value), dfs->location);
692                 return TOKEN_LITERAL;
693         }
694         dfs->lval = stnode_new(STTYPE_NUMBER, NULL, g_strdup(token_value), dfs->location);
695         sttype_number_set_unsigned(dfs->lval, number);
696         return TOKEN_NUMBER;
699 static int
700 set_lval_float(dfsyntax_t *dfs, const char *value, const char *token_value)
702         double number;
703         bool ok;
705         ok = parse_double(dfs, value, &number);
706         if (!ok) {
707                 return SCAN_FAILED;
708         }
709         dfs->lval = stnode_new(STTYPE_NUMBER, NULL, g_strdup(token_value), dfs->location);
710         sttype_number_set_float(dfs->lval, number);
711         return TOKEN_NUMBER;
714 static bool
715 append_escaped_char(dfsyntax_t *dfs, GString *str, char c)
717         switch (c) {
718                 case 'a':
719                         c = '\a';
720                         break;
721                 case 'b':
722                         c = '\b';
723                         break;
724                 case 'f':
725                         c = '\f';
726                         break;
727                 case 'n':
728                         c = '\n';
729                         break;
730                 case 'r':
731                         c = '\r';
732                         break;
733                 case 't':
734                         c = '\t';
735                         break;
736                 case 'v':
737                         c = '\v';
738                         break;
739                 case '\\':
740                 case '\'':
741                 case '\"':
742                         break;
743                 default:
744                         dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->location,
745                                         "\\%c is not a valid character escape sequence", c);
746                         return false;
747         }
749         g_string_append_c(str, c);
750         return true;
753 static bool
754 parse_universal_character_name(dfsyntax_t *dfs _U_, const char *str, char **ret_endptr, gunichar *valuep)
756         uint64_t val;
757         char *endptr;
758         int ndigits;
760         if (str[0] != '\\')
761                 return false;
763         if (str[1] == 'u')
764                 ndigits = 4;
765         else if (str[1] == 'U')
766                 ndigits = 8;
767         else
768                 return false;
770         for (int i = 2; i < ndigits + 2; i++) {
771                 if (!g_ascii_isxdigit(str[i])) {
772                         return false;
773                 }
774         }
776         errno = 0;
777         val = g_ascii_strtoull(str + 2, &endptr, 16); /* skip leading 'u' or 'U' */
779         if (errno != 0 || endptr == str || val > UINT32_MAX) {
780                 return false;
781         }
783         /*
784          * Ref: https://en.cppreference.com/w/c/language/escape
785          * Range of universal character names
786          *
787          * If a universal character name corresponds to a code point that is
788          * not 0x24 ($), 0x40 (@), nor 0x60 (`) and less than 0xA0, or a
789          * surrogate code point (the range 0xD800-0xDFFF, inclusive), or
790          * greater than 0x10FFFF, i.e. not a Unicode code point (since C23),
791          * the program is ill-formed. In other words, members of basic source
792          * character set and control characters (in ranges 0x0-0x1F and
793          * 0x7F-0x9F) cannot be expressed in universal character names.
794          */
795         if (val < 0xA0 && val != 0x24 && val != 0x40 && val != 0x60)
796                 return false;
797         else if (val >= 0xD800 && val <= 0xDFFF)
798                 return false;
799         else if (val > 0x10FFFF)
800                 return false;
802         *valuep = (gunichar)val;
803         if (ret_endptr)
804                 *ret_endptr = endptr;
805         return true;
808 static bool
809 append_universal_character_name(dfsyntax_t *dfs, GString *str, const char *ucn)
811         gunichar val;
813         if (!parse_universal_character_name(dfs, ucn, NULL, &val)) {
814                 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->location, "%s is not a valid universal character name", ucn);
815                 return false;
816         }
818         g_string_append_unichar(str, val);
819         return true;
822 static bool
823 parse_charconst(dfsyntax_t *dfs, const char *s, unsigned long *valuep)
825         const char *cp;
826         unsigned long value;
827         gunichar unival;
828         char *endptr;
830         cp = s + 1;     /* skip the leading ' */
831         if (*cp == '\'') {
832                 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "Empty character constant.");
833                 return false;
834         }
836         if (*cp == '\\') {
837                 /*
838                  * C escape sequence.
839                  * An escape sequence is an octal number \NNN,
840                  * an hex number \xNN, or one of \' \" \\ \a \b \f \n \r \t \v
841                  * that stands for the byte value of the equivalent
842                  * C-escape in ASCII encoding.
843                  */
844                 cp++;
845                 switch (*cp) {
847                 case '\0':
848                         dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s);
849                         return false;
851                 case 'a':
852                         value = '\a';
853                         cp++;
854                         break;
856                 case 'b':
857                         value = '\b';
858                         cp++;
859                         break;
861                 case 'f':
862                         value = '\f';
863                         cp++;
864                         break;
866                 case 'n':
867                         value = '\n';
868                         break;
870                 case 'r':
871                         value = '\r';
872                         cp++;
873                         break;
875                 case 't':
876                         value = '\t';
877                         cp++;
878                         break;
880                 case 'v':
881                         value = '\v';
882                         cp++;
883                         break;
885                 case '\'':
886                         value = '\'';
887                         cp++;
888                         break;
890                 case '\\':
891                         value = '\\';
892                         cp++;
893                         break;
895                 case '"':
896                         value = '"';
897                         cp++;
898                         break;
900                 case 'x':
901                         cp++;
902                         if (*cp >= '0' && *cp <= '9')
903                                 value = *cp - '0';
904                         else if (*cp >= 'A' && *cp <= 'F')
905                                 value = 10 + (*cp - 'A');
906                         else if (*cp >= 'a' && *cp <= 'f')
907                                 value = 10 + (*cp - 'a');
908                         else {
909                                 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s);
910                                 return false;
911                         }
912                         cp++;
913                         if (*cp != '\'') {
914                                 value <<= 4;
915                                 if (*cp >= '0' && *cp <= '9')
916                                         value |= *cp - '0';
917                                 else if (*cp >= 'A' && *cp <= 'F')
918                                         value |= 10 + (*cp - 'A');
919                                 else if (*cp >= 'a' && *cp <= 'f')
920                                         value |= 10 + (*cp - 'a');
921                                 else {
922                                         dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s);
923                                         return false;
924                                 }
925                                 cp++;
926                         }
927                         break;
929                 case 'u':
930                 case 'U':
931                         if (!parse_universal_character_name(dfs, s+1, &endptr, &unival)) {
932                                 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s is not a valid universal character name", s);
933                                 return false;
934                         }
935                         value = (unsigned long)unival;
936                         cp = endptr;
937                         break;
939                 default:
940                         /* Octal */
941                         if (*cp >= '0' && *cp <= '7')
942                                 value = *cp - '0';
943                         else {
944                                 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s);
945                                 return false;
946                         }
947                         if (*(cp + 1) != '\'') {
948                                 cp++;
949                                 value <<= 3;
950                                 if (*cp >= '0' && *cp <= '7')
951                                         value |= *cp - '0';
952                                 else {
953                                         dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s);
954                                         return false;
955                                 }
956                                 if (*(cp + 1) != '\'') {
957                                         cp++;
958                                         value <<= 3;
959                                         if (*cp >= '0' && *cp <= '7')
960                                                 value |= *cp - '0';
961                                         else {
962                                                 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s);
963                                                 return false;
964                                         }
965                                 }
966                         }
967                         if (value > 0xFF) {
968                                 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s is too large to be a valid character constant.", s);
969                                 return false;
970                         }
971                         cp++;
972                 }
973         } else {
974                 value = *cp++;
975                 if (!g_ascii_isprint(value)) {
976                         dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "Non-printable value '0x%02lx' in character constant.", value);
977                         return false;
978                 }
979         }
981         if ((*cp != '\'') || (*(cp + 1) != '\0')){
982                 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s is too long to be a valid character constant.", s);
983                 return false;
984         }
986         *valuep = value;
987         return true;
990 static bool
991 parse_unsigned_long_long(dfsyntax_t *dfs, const char *s, unsigned long long *valuep, bool set_error)
993         char *endptr;
995         errno = 0;
996         if (s[0] == '0' && (s[1] == 'b' || s[1] == 'B')) {
997                 *valuep = g_ascii_strtoull(s + 2, &endptr, 2);
998         }
999         else {
1000                 *valuep = g_ascii_strtoull(s, &endptr, 0);
1001         }
1003         if (errno == EINVAL || endptr == s || *endptr != '\0') {
1004                 /* This isn't a valid number. */
1005                 if (set_error)
1006                         dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "\"%s\" is not a valid number.", s);
1007                 return false;
1008         }
1009         if (errno == ERANGE) {
1010                 if (set_error)
1011                         dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "\"%s\" is too large to be represented as a 64-bit number.", s);
1012                 return false;
1013         }
1014         if (errno != 0) {
1015                 // Should not happen
1016                 if (set_error)
1017                         dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "\"%s\" is not a valid number (%s).", s, g_strerror(errno));
1018                 return false;
1019         }
1021         return true;
1024 static bool
1025 parse_double(dfsyntax_t *dfs, const char *s, double *valuep)
1027         char *endptr = NULL;
1029         errno = 0;
1030         *valuep = g_ascii_strtod(s, &endptr);
1032         if (endptr == s || *endptr != '\0') {
1033                 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "\"%s\" is not a valid floating-point number.", s);
1034                 return false;
1035         }
1036         if (errno == ERANGE) {
1037                 if (*valuep == HUGE_VAL) {
1038                         dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "\"%s\" causes floating-point overflow.", s);
1039                 }
1040                 else {
1041                         dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "\"%s\" causes floating-point underflow.", s);
1042                 }
1043                 return false;
1044         }
1045         if (errno != 0) {
1046                 // Should not happen
1047                 dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc,
1048                                 "\"%s\" is not a valid floating-point number (%s).",
1049                                 s, g_strerror(errno));
1050                 return false;
1051         }
1053         return true;