1 /* FLEX lexer for Ada expressions, for GDB. -*- c++ -*-
2 Copyright (C) 1994-2024 Free Software Foundation, Inc.
4 This file is part of GDB.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 /*----------------------------------------------------------------------*/
21 /* The converted version of this file is to be included in ada-exp.y, */
22 /* the Ada parser for gdb. The function yylex obtains characters from */
23 /* the global pointer lexptr. It returns a syntactic category for */
24 /* each successive token and places a semantic value into yylval */
25 /* (ada-lval), defined by the parser. */
28 NUM10 ({DIG}({DIG}|_)*)
30 NUM16 ({HEXDIG}({HEXDIG}|_)*)
33 ID ({LETTER}({LETTER}|{DIG}|[\x80-\xff])*|"<"{LETTER}({LETTER}|{DIG})*">")
36 GRAPHIC [a-z0-9 #&'()*+,-./:;<>=_|!$%?@\[\]\\^`{}~]
37 OPER ([-+*/=<>&]|"<="|">="|"**"|"/="|"and"|"or"|"xor"|"not"|"mod"|"rem"|"abs")
42 /* This must agree with COMPLETION_CHAR below. See the comment there
43 for the explanation. */
49 #include "diagnostics.h"
51 /* Some old versions of flex (2.5.x) generate code that uses the "register"
52 keyword, which compilers warn about, because it is not allowed in ISO
55 DIAGNOSTIC_IGNORE_REGISTER
57 #define NUMERAL_WIDTH 256
58 #define LONGEST_SIGN ((ULONGEST) 1 << (sizeof(LONGEST) * HOST_CHAR_BIT - 1))
60 static void canonicalizeNumeral (char *s1, const char *);
61 static struct stoken processString (const char*, int);
62 static int processInt (struct parser_state *, const char *, const char *,
64 static int processReal (struct parser_state *, const char *);
65 static struct stoken processId (const char *, int);
66 static int processAttribute (const char *);
67 static int find_dot_all (const char *);
68 static void rewind_to_char (int);
71 #define YY_DECL static int yylex ( void )
73 /* Flex generates a static function "input" which is not used.
74 Defining YY_NO_INPUT comments it out. */
77 /* The character we use to represent the completion point. */
78 #define COMPLETE_CHAR '\001'
81 #define YY_INPUT(BUF, RESULT, MAX_SIZE) \
82 if ( *pstate->lexptr == '\000' ) \
84 if (pstate->parse_completion && !ada_parser->returned_complete) \
86 ada_parser->returned_complete = true; \
87 *(BUF) = COMPLETE_CHAR; \
95 *(BUF) = *pstate->lexptr == COMPLETE_CHAR ? ' ' : *pstate->lexptr; \
97 pstate->lexptr += 1; \
102 %option case-insensitive interactive nodefault noyywrap
110 "--".* { yyterminate(); }
113 char numbuf[NUMERAL_WIDTH];
114 canonicalizeNumeral (numbuf, yytext);
115 char *e_ptr = strrchr (numbuf, 'e');
117 return processInt (pstate, nullptr, numbuf, e_ptr + 1);
121 char numbuf[NUMERAL_WIDTH];
122 canonicalizeNumeral (numbuf, yytext);
123 return processInt (pstate, NULL, numbuf, NULL);
126 {NUM10}"#"{HEXDIG}({HEXDIG}|_)*"#"{POSEXP} {
127 char numbuf[NUMERAL_WIDTH];
128 canonicalizeNumeral (numbuf, yytext);
129 char *e_ptr = strrchr (numbuf, 'e');
131 return processInt (pstate, numbuf,
132 strchr (numbuf, '#') + 1,
136 /* The "llf" is a gdb extension to allow a floating-point
137 constant to be written in some other base. The
138 floating-point number is formed by reinterpreting the
139 bytes, allowing direct control over the bits. */
140 {NUM10}(l{0,2}f)?"#"{HEXDIG}({HEXDIG}|_)*"#" {
141 char numbuf[NUMERAL_WIDTH];
142 canonicalizeNumeral (numbuf, yytext);
143 return processInt (pstate, numbuf, strchr (numbuf, '#') + 1,
148 char numbuf[NUMERAL_WIDTH];
149 canonicalizeNumeral (numbuf, yytext+2);
150 return processInt (pstate, "16#", numbuf, NULL);
154 {NUM10}"."{NUM10}{EXP} {
155 char numbuf[NUMERAL_WIDTH];
156 canonicalizeNumeral (numbuf, yytext);
157 return processReal (pstate, numbuf);
161 char numbuf[NUMERAL_WIDTH];
162 canonicalizeNumeral (numbuf, yytext);
163 return processReal (pstate, numbuf);
166 {NUM10}"#"{NUM16}"."{NUM16}"#"{EXP} {
167 error (_("Based real literals not implemented yet."));
170 {NUM10}"#"{NUM16}"."{NUM16}"#" {
171 error (_("Based real literals not implemented yet."));
174 <INITIAL>"'"({GRAPHIC}|\")"'" {
175 yylval.typed_char.val = yytext[1];
176 yylval.typed_char.type = type_for_char (pstate, yytext[1]);
180 <INITIAL>"'[\""{HEXDIG}{2,}"\"]'" {
181 ULONGEST v = strtoulst (yytext+3, nullptr, 16);
182 yylval.typed_char.val = v;
183 yylval.typed_char.type = type_for_char (pstate, v);
187 /* Note that we don't handle bracket sequences of more than 2
188 digits here. Currently there's no support for wide or
189 wide-wide strings. */
190 \"({GRAPHIC}|"[\""({HEXDIG}{2,}|\")"\"]")*\" {
191 yylval.sval = processString (yytext+1, yyleng-2);
196 error (_("ill-formed or non-terminated string literal"));
201 rewind_to_char ('i');
206 rewind_to_char ('t');
210 thread{WHITE}+{DIG} {
211 /* This keyword signals the end of the expression and
212 will be processed separately. */
213 rewind_to_char ('t');
220 and { return _AND_; }
221 delta { return DELTA; }
222 else { return ELSE; }
228 null { return NULL_PTR; }
230 others { return OTHERS; }
232 then { return THEN; }
233 with { return WITH; }
236 /* BOOLEAN "KEYWORDS" */
238 /* True and False are not keywords in Ada, but rather enumeration constants.
239 However, the boolean type is no longer represented as an enum, so True
240 and False are no longer defined in symbol tables. We compromise by
241 making them keywords (when bare). */
243 true { return TRUEKEYWORD; }
244 false { return FALSEKEYWORD; }
248 {TICK}([a-z][a-z_]*)?{COMPLETE}? { BEGIN INITIAL; return processAttribute (yytext); }
252 "=>" { return ARROW; }
253 ".." { return DOTDOT; }
254 "**" { return STARSTAR; }
255 ":=" { return ASSIGN; }
256 "/=" { return NOTEQUAL; }
260 <BEFORE_QUAL_QUOTE>"'"/{NOT_COMPLETE} { BEGIN INITIAL; return '\''; }
262 [-&*+{}@/:<>=|;\[\]] { return yytext[0]; }
264 "," { if (ada_parser->paren_depth == 0 && pstate->comma_terminates)
266 rewind_to_char (',');
273 "(" { ada_parser->paren_depth += 1; return '('; }
274 ")" { if (ada_parser->paren_depth == 0)
276 rewind_to_char (')');
281 ada_parser->paren_depth -= 1;
286 "."{WHITE}*{ID}{COMPLETE}? {
287 yylval.sval = processId (yytext+1, yyleng-1);
288 if (yytext[yyleng - 1] == COMPLETE_CHAR)
293 "."{WHITE}*{COMPLETE} {
294 yylval.sval.ptr = "";
295 yylval.sval.length = 0;
299 {ID}({WHITE}*"."{WHITE}*({ID}|\"{OPER}\"))*(" "*"'"|{COMPLETE})? {
300 int all_posn = find_dot_all (yytext);
302 if (all_posn == -1 && yytext[yyleng-1] == '\'')
304 BEGIN BEFORE_QUAL_QUOTE;
307 else if (all_posn >= 0)
309 bool is_completion = yytext[yyleng - 1] == COMPLETE_CHAR;
310 yylval.sval = processId (yytext, yyleng);
311 return is_completion ? NAME_COMPLETE : NAME;
315 /* GDB EXPRESSION CONSTRUCTS */
317 "'"[^']+"'"{WHITE}*:: {
319 yylval.sval = processId (yytext, yyleng);
323 "::" { return COLONCOLON; }
325 /* REGISTERS AND GDB CONVENIENCE VARIABLES */
327 "$"({LETTER}|{DIG}|"$")* {
328 yylval.sval.ptr = yytext;
329 yylval.sval.length = yyleng;
330 return DOLLAR_VARIABLE;
333 /* CATCH-ALL ERROR CASE */
335 . { error (_("Invalid character '%s' in expression."), yytext); }
339 /* Initialize the lexer for processing new expression. */
342 lexer_init (FILE *inp)
349 /* Copy S2 to S1, removing all underscores, and downcasing all letters. */
352 canonicalizeNumeral (char *s1, const char *s2)
354 for (; *s2 != '\000'; s2 += 1)
365 /* Interprets the prefix of NUM that consists of digits of the given BASE
366 as an integer of that BASE, with the string EXP as an exponent.
367 Puts value in yylval, and returns INT, if the string is valid. Causes
368 an error if the number is improperly formatted. BASE, if NULL, defaults
369 to "10", and EXP to "1". The EXP does not contain a leading 'e' or 'E'.
373 processInt (struct parser_state *par_state, const char *base0,
374 const char *num0, const char *exp0)
378 /* For the based literal with an "f" prefix, we'll return a
379 floating-point number. This counts the the number of "l"s seen,
380 to decide the width of the floating-point number to return. -1
382 int floating_point_l_count = -1;
389 base = strtol (base0, &end_of_base, 10);
390 if (base < 2 || base > 16)
391 error (_("Invalid base: %d."), base);
392 while (*end_of_base == 'l')
394 ++floating_point_l_count;
397 /* This assertion is ensured by the pattern. */
398 gdb_assert (floating_point_l_count == -1 || *end_of_base == 'f');
399 if (*end_of_base == 'f')
402 ++floating_point_l_count;
404 /* This assertion is ensured by the pattern. */
405 gdb_assert (*end_of_base == '#');
411 exp = strtol(exp0, (char **) NULL, 10);
414 while (isxdigit (*num0))
416 int dig = fromhex (*num0);
418 error (_("Invalid digit `%c' in based literal"), *num0);
430 if (floating_point_l_count > -1)
432 struct type *fp_type;
433 if (floating_point_l_count == 0)
434 fp_type = language_lookup_primitive_type (par_state->language (),
435 par_state->gdbarch (),
437 else if (floating_point_l_count == 1)
438 fp_type = language_lookup_primitive_type (par_state->language (),
439 par_state->gdbarch (),
443 /* This assertion is ensured by the pattern. */
444 gdb_assert (floating_point_l_count == 2);
445 fp_type = language_lookup_primitive_type (par_state->language (),
446 par_state->gdbarch (),
450 yylval.typed_val_float.type = fp_type;
451 result.write (gdb::make_array_view (yylval.typed_val_float.val,
453 type_byte_order (fp_type),
459 const gdb_mpz *value = ada_parser->push_integer (std::move (result));
461 int int_bits = gdbarch_int_bit (par_state->gdbarch ());
462 int long_bits = gdbarch_long_bit (par_state->gdbarch ());
463 int long_long_bits = gdbarch_long_long_bit (par_state->gdbarch ());
465 if (fits_in_type (1, *value, int_bits, true))
466 yylval.typed_val.type = parse_type (par_state)->builtin_int;
467 else if (fits_in_type (1, *value, long_bits, true))
468 yylval.typed_val.type = parse_type (par_state)->builtin_long;
469 else if (fits_in_type (1, *value, long_bits, false))
470 yylval.typed_val.type
471 = builtin_type (par_state->gdbarch ())->builtin_unsigned_long;
472 else if (fits_in_type (1, *value, long_long_bits, true))
473 yylval.typed_val.type = parse_type (par_state)->builtin_long_long;
474 else if (fits_in_type (1, *value, long_long_bits, false))
475 yylval.typed_val.type
476 = builtin_type (par_state->gdbarch ())->builtin_unsigned_long_long;
477 else if (fits_in_type (1, *value, 128, true))
478 yylval.typed_val.type
479 = language_lookup_primitive_type (par_state->language (),
480 par_state->gdbarch (),
481 "long_long_long_integer");
482 else if (fits_in_type (1, *value, 128, false))
483 yylval.typed_val.type
484 = language_lookup_primitive_type (par_state->language (),
485 par_state->gdbarch (),
486 "unsigned_long_long_long_integer");
488 error (_("Integer literal out of range"));
490 yylval.typed_val.val = value;
495 processReal (struct parser_state *par_state, const char *num0)
497 yylval.typed_val_float.type = parse_type (par_state)->builtin_long_double;
499 bool parsed = parse_float (num0, strlen (num0),
500 yylval.typed_val_float.type,
501 yylval.typed_val_float.val);
507 /* Store a canonicalized version of NAME0[0..LEN-1] in yylval.ssym. The
508 resulting string is valid until the next call to ada_parse. If
509 NAME0 contains the substring "___", it is assumed to be already
510 encoded and the resulting name is equal to it. Similarly, if the name
511 starts with '<', it is copied verbatim. Otherwise, it differs
513 + Characters between '...' are transferred verbatim to yylval.ssym.
514 + Trailing "'" characters in quoted sequences are removed (a leading quote is
515 preserved to indicate that the name is not to be GNAT-encoded).
516 + Unquoted whitespace is removed.
517 + Unquoted alphabetic characters are mapped to lower case.
518 Result is returned as a struct stoken, but for convenience, the string
519 is also null-terminated. Result string valid until the next call of
523 processId (const char *name0, int len)
525 char *name = (char *) obstack_alloc (&ada_parser->temp_space, len + 11);
527 struct stoken result;
530 while (len > 0 && isspace (name0[len-1]))
533 if (name0[0] == '<' || strstr (name0, "___") != NULL)
535 strncpy (name, name0, len);
541 bool in_quotes = false;
545 if (name0[i0] == COMPLETE_CHAR)
551 name[i++] = name0[i0++];
552 else if (isalnum (name0[i0]))
554 name[i] = tolower (name0[i0]);
557 else if (isspace (name0[i0]))
559 else if (name0[i0] == '\'')
561 /* Copy the starting quote, but not the ending quote. */
563 name[i++] = name0[i0++];
564 in_quotes = !in_quotes;
567 name[i++] = name0[i0++];
575 /* Return TEXT[0..LEN-1], a string literal without surrounding quotes,
576 with special hex character notations replaced with characters.
577 Result valid until the next call to ada_parse. */
580 processString (const char *text, int len)
584 const char *lim = text + len;
585 struct stoken result;
587 q = (char *) obstack_alloc (&ada_parser->temp_space, len);
592 if (p[0] == '[' && p[1] == '"' && p+2 < lim)
594 if (p[2] == '"') /* "...["""]... */
602 ULONGEST chr = strtoulst (p + 2, &end, 16);
604 error (_("wide strings are not yet supported"));
614 result.length = q - result.ptr;
618 /* Returns the position within STR of the '.' in a
619 '.{WHITE}*all' component of a dotted name, or -1 if there is none.
620 Note: we actually don't need this routine, since 'all' can never be an
621 Ada identifier. Thus, looking up foo.all or foo.all.x as a name
622 must fail, and will eventually be interpreted as (foo).all or
623 (foo).all.x. However, this does avoid an extraneous lookup. */
626 find_dot_all (const char *str)
630 for (i = 0; str[i] != '\000'; i++)
637 while (isspace (str[i]));
639 if (strncasecmp (str + i, "all", 3) == 0
640 && !isalnum (str[i + 3]) && str[i + 3] != '_')
646 /* Returns non-zero iff string SUBSEQ matches a subsequence of STR, ignoring
650 subseqMatch (const char *subseq, const char *str)
652 if (subseq[0] == '\0')
654 else if (str[0] == '\0')
656 else if (tolower (subseq[0]) == tolower (str[0]))
657 return subseqMatch (subseq+1, str+1) || subseqMatch (subseq, str+1);
659 return subseqMatch (subseq, str+1);
663 static const struct { const char *name; int code; }
665 { "address", TICK_ADDRESS },
666 { "unchecked_access", TICK_ACCESS },
667 { "unrestricted_access", TICK_ACCESS },
668 { "access", TICK_ACCESS },
669 { "first", TICK_FIRST },
670 { "last", TICK_LAST },
671 { "length", TICK_LENGTH },
674 { "modulus", TICK_MODULUS },
675 { "object_size", TICK_OBJECT_SIZE },
677 { "range", TICK_RANGE },
678 { "size", TICK_SIZE },
681 { "enum_rep", TICK_ENUM_REP },
682 { "enum_val", TICK_ENUM_VAL },
685 /* Return the syntactic code corresponding to the attribute name or
689 processAttribute (const char *str)
691 gdb_assert (*str == '\'');
693 while (isspace (*str))
696 int len = strlen (str);
697 if (len > 0 && str[len - 1] == COMPLETE_CHAR)
699 /* This is enforced by YY_INPUT. */
700 gdb_assert (pstate->parse_completion);
701 yylval.sval.ptr = obstack_strndup (&ada_parser->temp_space,
703 yylval.sval.length = len - 1;
704 return TICK_COMPLETE;
707 for (const auto &item : attributes)
708 if (strcasecmp (str, item.name) == 0)
711 std::optional<int> found;
712 for (const auto &item : attributes)
713 if (subseqMatch (str, item.name))
715 if (!found.has_value ())
718 error (_("ambiguous attribute name: `%s'"), str);
720 if (!found.has_value ())
721 error (_("unrecognized attribute: `%s'"), str);
727 ada_tick_completer::complete (struct expression *exp,
728 completion_tracker &tracker)
730 completion_list output;
731 for (const auto &item : attributes)
733 if (strncasecmp (item.name, m_name.c_str (), m_name.length ()) == 0)
734 output.emplace_back (xstrdup (item.name));
736 tracker.add_completions (std::move (output));
740 /* Back up lexptr by yyleng and then to the rightmost occurrence of
741 character CH, case-folded (there must be one). WARNING: since
742 lexptr points to the next input character that Flex has not yet
743 transferred to its internal buffer, the use of this function
744 depends on the assumption that Flex calls YY_INPUT only when it is
745 logically necessary to do so (thus, there is no reading ahead
746 farther than needed to identify the next token.) */
749 rewind_to_char (int ch)
751 pstate->lexptr -= yyleng;
752 while (toupper (*pstate->lexptr) != toupper (ch))
757 /* Dummy definition to suppress warnings about unused static definitions. */
758 typedef void (*dummy_function) ();
759 dummy_function ada_flex_use[] =
761 (dummy_function) yyunput