More updated translations
[binutils-gdb.git] / gdb / rust-parse.c
blob490adb378b36d76a957b9c730e077480d8668dc8
1 /* Rust expression parsing for GDB, the GNU debugger.
3 Copyright (C) 2016-2024 Free Software Foundation, Inc.
5 This file is part of GDB.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include "block.h"
22 #include "charset.h"
23 #include "cp-support.h"
24 #include "gdbsupport/gdb_obstack.h"
25 #include "gdbsupport/gdb_regex.h"
26 #include "rust-lang.h"
27 #include "parser-defs.h"
28 #include "gdbsupport/selftest.h"
29 #include "value.h"
30 #include "gdbarch.h"
31 #include "rust-exp.h"
32 #include "inferior.h"
34 using namespace expr;
36 /* A regular expression for matching Rust numbers. This is split up
37 since it is very long and this gives us a way to comment the
38 sections. */
40 static const char number_regex_text[] =
41 /* subexpression 1: allows use of alternation, otherwise uninteresting */
42 "^("
43 /* First comes floating point. */
44 /* Recognize number after the decimal point, with optional
45 exponent and optional type suffix.
46 subexpression 2: allows "?", otherwise uninteresting
47 subexpression 3: if present, type suffix
49 "[0-9][0-9_]*\\.[0-9][0-9_]*([eE][-+]?[0-9][0-9_]*)?(f32|f64)?"
50 #define FLOAT_TYPE1 3
51 "|"
52 /* Recognize exponent without decimal point, with optional type
53 suffix.
54 subexpression 4: if present, type suffix
56 #define FLOAT_TYPE2 4
57 "[0-9][0-9_]*[eE][-+]?[0-9][0-9_]*(f32|f64)?"
58 "|"
59 /* "23." is a valid floating point number, but "23.e5" and
60 "23.f32" are not. So, handle the trailing-. case
61 separately. */
62 "[0-9][0-9_]*\\."
63 "|"
64 /* Finally come integers.
65 subexpression 5: text of integer
66 subexpression 6: if present, type suffix
67 subexpression 7: allows use of alternation, otherwise uninteresting
69 #define INT_TEXT 5
70 #define INT_TYPE 6
71 "(0x[a-fA-F0-9_]+|0o[0-7_]+|0b[01_]+|[0-9][0-9_]*)"
72 "([iu](size|8|16|32|64|128))?"
73 ")";
74 /* The number of subexpressions to allocate space for, including the
75 "0th" whole match subexpression. */
76 #define NUM_SUBEXPRESSIONS 8
78 /* The compiled number-matching regex. */
80 static regex_t number_regex;
82 /* The kinds of tokens. Note that single-character tokens are
83 represented by themselves, so for instance '[' is a token. */
84 enum token_type : int
86 /* Make sure to start after any ASCII character. */
87 GDBVAR = 256,
88 IDENT,
89 COMPLETE,
90 INTEGER,
91 DECIMAL_INTEGER,
92 STRING,
93 BYTESTRING,
94 FLOAT,
95 COMPOUND_ASSIGN,
97 /* Keyword tokens. */
98 KW_AS,
99 KW_IF,
100 KW_TRUE,
101 KW_FALSE,
102 KW_SUPER,
103 KW_SELF,
104 KW_MUT,
105 KW_EXTERN,
106 KW_CONST,
107 KW_FN,
108 KW_SIZEOF,
110 /* Operator tokens. */
111 DOTDOT,
112 DOTDOTEQ,
113 OROR,
114 ANDAND,
115 EQEQ,
116 NOTEQ,
117 LTEQ,
118 GTEQ,
119 LSH,
120 RSH,
121 COLONCOLON,
122 ARROW,
125 /* A typed integer constant. */
127 struct typed_val_int
129 gdb_mpz val;
130 struct type *type;
133 /* A typed floating point constant. */
135 struct typed_val_float
137 float_data val;
138 struct type *type;
141 /* A struct of this type is used to describe a token. */
143 struct token_info
145 const char *name;
146 int value;
147 enum exp_opcode opcode;
150 /* Identifier tokens. */
152 static const struct token_info identifier_tokens[] =
154 { "as", KW_AS, OP_NULL },
155 { "false", KW_FALSE, OP_NULL },
156 { "if", 0, OP_NULL },
157 { "mut", KW_MUT, OP_NULL },
158 { "const", KW_CONST, OP_NULL },
159 { "self", KW_SELF, OP_NULL },
160 { "super", KW_SUPER, OP_NULL },
161 { "true", KW_TRUE, OP_NULL },
162 { "extern", KW_EXTERN, OP_NULL },
163 { "fn", KW_FN, OP_NULL },
164 { "sizeof", KW_SIZEOF, OP_NULL },
167 /* Operator tokens, sorted longest first. */
169 static const struct token_info operator_tokens[] =
171 { ">>=", COMPOUND_ASSIGN, BINOP_RSH },
172 { "<<=", COMPOUND_ASSIGN, BINOP_LSH },
174 { "<<", LSH, OP_NULL },
175 { ">>", RSH, OP_NULL },
176 { "&&", ANDAND, OP_NULL },
177 { "||", OROR, OP_NULL },
178 { "==", EQEQ, OP_NULL },
179 { "!=", NOTEQ, OP_NULL },
180 { "<=", LTEQ, OP_NULL },
181 { ">=", GTEQ, OP_NULL },
182 { "+=", COMPOUND_ASSIGN, BINOP_ADD },
183 { "-=", COMPOUND_ASSIGN, BINOP_SUB },
184 { "*=", COMPOUND_ASSIGN, BINOP_MUL },
185 { "/=", COMPOUND_ASSIGN, BINOP_DIV },
186 { "%=", COMPOUND_ASSIGN, BINOP_REM },
187 { "&=", COMPOUND_ASSIGN, BINOP_BITWISE_AND },
188 { "|=", COMPOUND_ASSIGN, BINOP_BITWISE_IOR },
189 { "^=", COMPOUND_ASSIGN, BINOP_BITWISE_XOR },
190 { "..=", DOTDOTEQ, OP_NULL },
192 { "::", COLONCOLON, OP_NULL },
193 { "..", DOTDOT, OP_NULL },
194 { "->", ARROW, OP_NULL }
197 /* An instance of this is created before parsing, and destroyed when
198 parsing is finished. */
200 struct rust_parser
202 explicit rust_parser (struct parser_state *state)
203 : pstate (state)
207 DISABLE_COPY_AND_ASSIGN (rust_parser);
209 /* Return the parser's language. */
210 const struct language_defn *language () const
212 return pstate->language ();
215 /* Return the parser's gdbarch. */
216 struct gdbarch *arch () const
218 return pstate->gdbarch ();
221 /* A helper to look up a Rust type, or fail. This only works for
222 types defined by rust_language_arch_info. */
224 struct type *get_type (const char *name)
226 struct type *type;
228 type = language_lookup_primitive_type (language (), arch (), name);
229 if (type == NULL)
230 error (_("Could not find Rust type %s"), name);
231 return type;
234 std::string crate_name (const std::string &name);
235 std::string super_name (const std::string &ident, unsigned int n_supers);
237 int lex_character ();
238 int lex_decimal_integer ();
239 int lex_number ();
240 int lex_string ();
241 int lex_identifier ();
242 uint32_t lex_hex (int min, int max);
243 uint32_t lex_escape (bool is_byte);
244 int lex_operator ();
245 int lex_one_token (bool decimal_only);
246 void push_back (char c);
248 /* The main interface to lexing. Lexes one token and updates the
249 internal state. DECIMAL_ONLY is true in the special case where
250 we want to tell the lexer not to parse a number as a float, but
251 instead only as a decimal integer. See parse_field. */
252 void lex (bool decimal_only = false)
254 current_token = lex_one_token (decimal_only);
257 /* Assuming the current token is TYPE, lex the next token.
258 DECIMAL_ONLY is passed to 'lex', which see. */
259 void assume (int type, bool decimal_only = false)
261 gdb_assert (current_token == type);
262 lex (decimal_only);
265 /* Require the single-character token C, and lex the next token; or
266 throw an exception. */
267 void require (char type)
269 if (current_token != type)
270 error (_("'%c' expected"), type);
271 lex ();
274 /* Entry point for all parsing. */
275 operation_up parse_entry_point ()
277 lex ();
278 operation_up result = parse_expr ();
279 if (current_token != 0)
280 error (_("Syntax error near '%s'"), pstate->prev_lexptr);
281 return result;
284 operation_up parse_tuple ();
285 operation_up parse_array ();
286 operation_up name_to_operation (const std::string &name);
287 operation_up parse_struct_expr (struct type *type);
288 operation_up parse_binop (bool required);
289 operation_up parse_range ();
290 operation_up parse_expr ();
291 operation_up parse_sizeof ();
292 operation_up parse_addr ();
293 operation_up parse_field (operation_up &&);
294 operation_up parse_index (operation_up &&);
295 std::vector<operation_up> parse_paren_args ();
296 operation_up parse_call (operation_up &&);
297 std::vector<struct type *> parse_type_list ();
298 std::vector<struct type *> parse_maybe_type_list ();
299 struct type *parse_array_type ();
300 struct type *parse_slice_type ();
301 struct type *parse_pointer_type ();
302 struct type *parse_function_type ();
303 struct type *parse_tuple_type ();
304 struct type *parse_type ();
305 std::string parse_path (bool for_expr);
306 operation_up parse_string ();
307 operation_up parse_tuple_struct (struct type *type);
308 operation_up parse_path_expr ();
309 operation_up parse_atom (bool required);
311 void update_innermost_block (struct block_symbol sym);
312 struct block_symbol lookup_symbol (const char *name,
313 const struct block *block,
314 const domain_search_flags domain);
315 struct type *rust_lookup_type (const char *name);
317 /* Clear some state. This is only used for testing. */
318 #if GDB_SELF_TEST
319 void reset (const char *input)
321 pstate->prev_lexptr = nullptr;
322 pstate->lexptr = input;
323 paren_depth = 0;
324 current_token = 0;
325 current_int_val = {};
326 current_float_val = {};
327 current_string_val = {};
328 current_opcode = OP_NULL;
330 #endif /* GDB_SELF_TEST */
332 /* Return the token's string value as a string. */
333 std::string get_string () const
335 return std::string (current_string_val.ptr, current_string_val.length);
338 /* A pointer to this is installed globally. */
339 auto_obstack obstack;
341 /* The parser state gdb gave us. */
342 struct parser_state *pstate;
344 /* Depth of parentheses. */
345 int paren_depth = 0;
347 /* The current token's type. */
348 int current_token = 0;
349 /* The current token's payload, if any. */
350 typed_val_int current_int_val {};
351 typed_val_float current_float_val {};
352 struct stoken current_string_val {};
353 enum exp_opcode current_opcode = OP_NULL;
355 /* When completing, this may be set to the field operation to
356 complete. */
357 operation_up completion_op;
360 /* Return an string referring to NAME, but relative to the crate's
361 name. */
363 std::string
364 rust_parser::crate_name (const std::string &name)
366 std::string crate = rust_crate_for_block (pstate->expression_context_block);
368 if (crate.empty ())
369 error (_("Could not find crate for current location"));
370 return "::" + crate + "::" + name;
373 /* Return a string referring to a "super::" qualified name. IDENT is
374 the base name and N_SUPERS is how many "super::"s were provided.
375 N_SUPERS can be zero. */
377 std::string
378 rust_parser::super_name (const std::string &ident, unsigned int n_supers)
380 const char *scope = "";
381 if (pstate->expression_context_block != nullptr)
382 scope = pstate->expression_context_block->scope ();
383 int offset;
385 if (scope[0] == '\0')
386 error (_("Couldn't find namespace scope for self::"));
388 if (n_supers > 0)
390 int len;
391 std::vector<int> offsets;
392 unsigned int current_len;
394 current_len = cp_find_first_component (scope);
395 while (scope[current_len] != '\0')
397 offsets.push_back (current_len);
398 gdb_assert (scope[current_len] == ':');
399 /* The "::". */
400 current_len += 2;
401 current_len += cp_find_first_component (scope
402 + current_len);
405 len = offsets.size ();
406 if (n_supers >= len)
407 error (_("Too many super:: uses from '%s'"), scope);
409 offset = offsets[len - n_supers];
411 else
412 offset = strlen (scope);
414 return "::" + std::string (scope, offset) + "::" + ident;
417 /* A helper to appropriately munge NAME and BLOCK depending on the
418 presence of a leading "::". */
420 static void
421 munge_name_and_block (const char **name, const struct block **block)
423 /* If it is a global reference, skip the current block in favor of
424 the static block. */
425 if (startswith (*name, "::"))
427 *name += 2;
428 *block = (*block)->static_block ();
432 /* Like lookup_symbol, but handles Rust namespace conventions, and
433 doesn't require field_of_this_result. */
435 struct block_symbol
436 rust_parser::lookup_symbol (const char *name, const struct block *block,
437 const domain_search_flags domain)
439 struct block_symbol result;
441 munge_name_and_block (&name, &block);
443 result = ::lookup_symbol (name, block, domain, NULL);
444 if (result.symbol != NULL)
445 update_innermost_block (result);
446 return result;
449 /* Look up a type, following Rust namespace conventions. */
451 struct type *
452 rust_parser::rust_lookup_type (const char *name)
454 struct block_symbol result;
455 struct type *type;
457 const struct block *block = pstate->expression_context_block;
458 munge_name_and_block (&name, &block);
460 result = ::lookup_symbol (name, block, SEARCH_TYPE_DOMAIN, nullptr);
461 if (result.symbol != NULL)
463 update_innermost_block (result);
464 return result.symbol->type ();
467 type = lookup_typename (language (), name, NULL, 1);
468 if (type != NULL)
469 return type;
471 /* Last chance, try a built-in type. */
472 return language_lookup_primitive_type (language (), arch (), name);
475 /* A helper that updates the innermost block as appropriate. */
477 void
478 rust_parser::update_innermost_block (struct block_symbol sym)
480 if (symbol_read_needs_frame (sym.symbol))
481 pstate->block_tracker->update (sym);
484 /* Lex a hex number with at least MIN digits and at most MAX
485 digits. */
487 uint32_t
488 rust_parser::lex_hex (int min, int max)
490 uint32_t result = 0;
491 int len = 0;
492 /* We only want to stop at MAX if we're lexing a byte escape. */
493 int check_max = min == max;
495 while ((check_max ? len <= max : 1)
496 && ((pstate->lexptr[0] >= 'a' && pstate->lexptr[0] <= 'f')
497 || (pstate->lexptr[0] >= 'A' && pstate->lexptr[0] <= 'F')
498 || (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9')))
500 result *= 16;
501 if (pstate->lexptr[0] >= 'a' && pstate->lexptr[0] <= 'f')
502 result = result + 10 + pstate->lexptr[0] - 'a';
503 else if (pstate->lexptr[0] >= 'A' && pstate->lexptr[0] <= 'F')
504 result = result + 10 + pstate->lexptr[0] - 'A';
505 else
506 result = result + pstate->lexptr[0] - '0';
507 ++pstate->lexptr;
508 ++len;
511 if (len < min)
512 error (_("Not enough hex digits seen"));
513 if (len > max)
515 gdb_assert (min != max);
516 error (_("Overlong hex escape"));
519 return result;
522 /* Lex an escape. IS_BYTE is true if we're lexing a byte escape;
523 otherwise we're lexing a character escape. */
525 uint32_t
526 rust_parser::lex_escape (bool is_byte)
528 uint32_t result;
530 gdb_assert (pstate->lexptr[0] == '\\');
531 ++pstate->lexptr;
532 switch (pstate->lexptr[0])
534 case 'x':
535 ++pstate->lexptr;
536 result = lex_hex (2, 2);
537 break;
539 case 'u':
540 if (is_byte)
541 error (_("Unicode escape in byte literal"));
542 ++pstate->lexptr;
543 if (pstate->lexptr[0] != '{')
544 error (_("Missing '{' in Unicode escape"));
545 ++pstate->lexptr;
546 result = lex_hex (1, 6);
547 /* Could do range checks here. */
548 if (pstate->lexptr[0] != '}')
549 error (_("Missing '}' in Unicode escape"));
550 ++pstate->lexptr;
551 break;
553 case 'n':
554 result = '\n';
555 ++pstate->lexptr;
556 break;
557 case 'r':
558 result = '\r';
559 ++pstate->lexptr;
560 break;
561 case 't':
562 result = '\t';
563 ++pstate->lexptr;
564 break;
565 case '\\':
566 result = '\\';
567 ++pstate->lexptr;
568 break;
569 case '0':
570 result = '\0';
571 ++pstate->lexptr;
572 break;
573 case '\'':
574 result = '\'';
575 ++pstate->lexptr;
576 break;
577 case '"':
578 result = '"';
579 ++pstate->lexptr;
580 break;
582 default:
583 error (_("Invalid escape \\%c in literal"), pstate->lexptr[0]);
586 return result;
589 /* A helper for lex_character. Search forward for the closing single
590 quote, then convert the bytes from the host charset to UTF-32. */
592 static uint32_t
593 lex_multibyte_char (const char *text, int *len)
595 /* Only look a maximum of 5 bytes for the closing quote. This is
596 the maximum for UTF-8. */
597 int quote;
598 gdb_assert (text[0] != '\'');
599 for (quote = 1; text[quote] != '\0' && text[quote] != '\''; ++quote)
601 *len = quote;
602 /* The caller will issue an error. */
603 if (text[quote] == '\0')
604 return 0;
606 auto_obstack result;
607 convert_between_encodings (host_charset (), HOST_UTF32,
608 (const gdb_byte *) text,
609 quote, 1, &result, translit_none);
611 int size = obstack_object_size (&result);
612 if (size > 4)
613 error (_("overlong character literal"));
614 uint32_t value;
615 memcpy (&value, obstack_finish (&result), size);
616 return value;
619 /* Lex a character constant. */
622 rust_parser::lex_character ()
624 bool is_byte = false;
625 uint32_t value;
627 if (pstate->lexptr[0] == 'b')
629 is_byte = true;
630 ++pstate->lexptr;
632 gdb_assert (pstate->lexptr[0] == '\'');
633 ++pstate->lexptr;
634 if (pstate->lexptr[0] == '\'')
635 error (_("empty character literal"));
636 else if (pstate->lexptr[0] == '\\')
637 value = lex_escape (is_byte);
638 else
640 int len;
641 value = lex_multibyte_char (&pstate->lexptr[0], &len);
642 pstate->lexptr += len;
645 if (pstate->lexptr[0] != '\'')
646 error (_("Unterminated character literal"));
647 ++pstate->lexptr;
649 current_int_val.val = value;
650 current_int_val.type = get_type (is_byte ? "u8" : "char");
652 return INTEGER;
655 /* Return the offset of the double quote if STR looks like the start
656 of a raw string, or 0 if STR does not start a raw string. */
658 static int
659 starts_raw_string (const char *str)
661 const char *save = str;
663 if (str[0] != 'r')
664 return 0;
665 ++str;
666 while (str[0] == '#')
667 ++str;
668 if (str[0] == '"')
669 return str - save;
670 return 0;
673 /* Return true if STR looks like the end of a raw string that had N
674 hashes at the start. */
676 static bool
677 ends_raw_string (const char *str, int n)
679 gdb_assert (str[0] == '"');
680 for (int i = 0; i < n; ++i)
681 if (str[i + 1] != '#')
682 return false;
683 return true;
686 /* Lex a string constant. */
689 rust_parser::lex_string ()
691 int is_byte = pstate->lexptr[0] == 'b';
692 int raw_length;
694 if (is_byte)
695 ++pstate->lexptr;
696 raw_length = starts_raw_string (pstate->lexptr);
697 pstate->lexptr += raw_length;
698 gdb_assert (pstate->lexptr[0] == '"');
699 ++pstate->lexptr;
701 while (1)
703 uint32_t value;
705 if (raw_length > 0)
707 if (pstate->lexptr[0] == '"' && ends_raw_string (pstate->lexptr,
708 raw_length - 1))
710 /* Exit with lexptr pointing after the final "#". */
711 pstate->lexptr += raw_length;
712 break;
714 else if (pstate->lexptr[0] == '\0')
715 error (_("Unexpected EOF in string"));
717 value = pstate->lexptr[0] & 0xff;
718 if (is_byte && value > 127)
719 error (_("Non-ASCII value in raw byte string"));
720 obstack_1grow (&obstack, value);
722 ++pstate->lexptr;
724 else if (pstate->lexptr[0] == '"')
726 /* Make sure to skip the quote. */
727 ++pstate->lexptr;
728 break;
730 else if (pstate->lexptr[0] == '\\')
732 value = lex_escape (is_byte);
734 if (is_byte)
735 obstack_1grow (&obstack, value);
736 else
737 convert_between_encodings (HOST_UTF32, "UTF-8",
738 (gdb_byte *) &value,
739 sizeof (value), sizeof (value),
740 &obstack, translit_none);
742 else if (pstate->lexptr[0] == '\0')
743 error (_("Unexpected EOF in string"));
744 else
746 value = pstate->lexptr[0] & 0xff;
747 if (is_byte && value > 127)
748 error (_("Non-ASCII value in byte string"));
749 obstack_1grow (&obstack, value);
750 ++pstate->lexptr;
754 current_string_val.length = obstack_object_size (&obstack);
755 current_string_val.ptr = (const char *) obstack_finish (&obstack);
756 return is_byte ? BYTESTRING : STRING;
759 /* Return true if STRING starts with whitespace followed by a digit. */
761 static bool
762 space_then_number (const char *string)
764 const char *p = string;
766 while (p[0] == ' ' || p[0] == '\t')
767 ++p;
768 if (p == string)
769 return false;
771 return *p >= '0' && *p <= '9';
774 /* Return true if C can start an identifier. */
776 static bool
777 rust_identifier_start_p (char c)
779 return ((c >= 'a' && c <= 'z')
780 || (c >= 'A' && c <= 'Z')
781 || c == '_'
782 || c == '$'
783 /* Allow any non-ASCII character as an identifier. There
784 doesn't seem to be a need to be picky about this. */
785 || (c & 0x80) != 0);
788 /* Lex an identifier. */
791 rust_parser::lex_identifier ()
793 unsigned int length;
794 const struct token_info *token;
795 int is_gdb_var = pstate->lexptr[0] == '$';
797 bool is_raw = false;
798 if (pstate->lexptr[0] == 'r'
799 && pstate->lexptr[1] == '#'
800 && rust_identifier_start_p (pstate->lexptr[2]))
802 is_raw = true;
803 pstate->lexptr += 2;
806 const char *start = pstate->lexptr;
807 gdb_assert (rust_identifier_start_p (pstate->lexptr[0]));
809 ++pstate->lexptr;
811 /* Allow any non-ASCII character here. This "handles" UTF-8 by
812 passing it through. */
813 while ((pstate->lexptr[0] >= 'a' && pstate->lexptr[0] <= 'z')
814 || (pstate->lexptr[0] >= 'A' && pstate->lexptr[0] <= 'Z')
815 || pstate->lexptr[0] == '_'
816 || (is_gdb_var && pstate->lexptr[0] == '$')
817 || (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9')
818 || (pstate->lexptr[0] & 0x80) != 0)
819 ++pstate->lexptr;
822 length = pstate->lexptr - start;
823 token = NULL;
824 if (!is_raw)
826 for (const auto &candidate : identifier_tokens)
828 if (length == strlen (candidate.name)
829 && strncmp (candidate.name, start, length) == 0)
831 token = &candidate;
832 break;
837 if (token != NULL)
839 if (token->value == 0)
841 /* Leave the terminating token alone. */
842 pstate->lexptr = start;
843 return 0;
846 else if (token == NULL
847 && !is_raw
848 && (strncmp (start, "thread", length) == 0
849 || strncmp (start, "task", length) == 0)
850 && space_then_number (pstate->lexptr))
852 /* "task" or "thread" followed by a number terminates the
853 parse, per gdb rules. */
854 pstate->lexptr = start;
855 return 0;
858 if (token == NULL || (pstate->parse_completion && pstate->lexptr[0] == '\0'))
860 current_string_val.length = length;
861 current_string_val.ptr = start;
864 if (pstate->parse_completion && pstate->lexptr[0] == '\0')
866 /* Prevent rustyylex from returning two COMPLETE tokens. */
867 pstate->prev_lexptr = pstate->lexptr;
868 return COMPLETE;
871 if (token != NULL)
872 return token->value;
873 if (is_gdb_var)
874 return GDBVAR;
875 return IDENT;
878 /* Lex an operator. */
881 rust_parser::lex_operator ()
883 const struct token_info *token = NULL;
885 for (const auto &candidate : operator_tokens)
887 if (strncmp (candidate.name, pstate->lexptr,
888 strlen (candidate.name)) == 0)
890 pstate->lexptr += strlen (candidate.name);
891 token = &candidate;
892 break;
896 if (token != NULL)
898 current_opcode = token->opcode;
899 return token->value;
902 return *pstate->lexptr++;
905 /* Lex a decimal integer. */
908 rust_parser::lex_decimal_integer ()
910 gdb_assert (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9');
912 std::string copy;
913 while (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9')
915 copy.push_back (pstate->lexptr[0]);
916 ++pstate->lexptr;
919 /* No need to set the value's type in this situation. */
920 current_int_val.val.set (copy.c_str (), 10);
922 return DECIMAL_INTEGER;
925 /* Lex a number. */
928 rust_parser::lex_number ()
930 regmatch_t subexps[NUM_SUBEXPRESSIONS];
931 int match;
932 bool is_integer = false;
933 bool implicit_i32 = false;
934 const char *type_name = NULL;
935 struct type *type;
936 int end_index;
937 int type_index = -1;
939 match = regexec (&number_regex, pstate->lexptr, ARRAY_SIZE (subexps),
940 subexps, 0);
941 /* Failure means the regexp is broken. */
942 gdb_assert (match == 0);
944 if (subexps[INT_TEXT].rm_so != -1)
946 /* Integer part matched. */
947 is_integer = true;
948 end_index = subexps[INT_TEXT].rm_eo;
949 if (subexps[INT_TYPE].rm_so == -1)
951 type_name = "i32";
952 implicit_i32 = true;
954 else
955 type_index = INT_TYPE;
957 else if (subexps[FLOAT_TYPE1].rm_so != -1)
959 /* Found floating point type suffix. */
960 end_index = subexps[FLOAT_TYPE1].rm_so;
961 type_index = FLOAT_TYPE1;
963 else if (subexps[FLOAT_TYPE2].rm_so != -1)
965 /* Found floating point type suffix. */
966 end_index = subexps[FLOAT_TYPE2].rm_so;
967 type_index = FLOAT_TYPE2;
969 else
971 /* Any other floating point match. */
972 end_index = subexps[0].rm_eo;
973 type_name = "f64";
976 /* We need a special case if the final character is ".". In this
977 case we might need to parse an integer. For example, "23.f()" is
978 a request for a trait method call, not a syntax error involving
979 the floating point number "23.". */
980 gdb_assert (subexps[0].rm_eo > 0);
981 if (pstate->lexptr[subexps[0].rm_eo - 1] == '.')
983 const char *next = skip_spaces (&pstate->lexptr[subexps[0].rm_eo]);
985 if (rust_identifier_start_p (*next) || *next == '.')
987 --subexps[0].rm_eo;
988 is_integer = true;
989 end_index = subexps[0].rm_eo;
990 type_name = "i32";
991 implicit_i32 = true;
995 /* Compute the type name if we haven't already. */
996 std::string type_name_holder;
997 if (type_name == NULL)
999 gdb_assert (type_index != -1);
1000 type_name_holder = std::string ((pstate->lexptr
1001 + subexps[type_index].rm_so),
1002 (subexps[type_index].rm_eo
1003 - subexps[type_index].rm_so));
1004 type_name = type_name_holder.c_str ();
1007 /* Look up the type. */
1008 type = get_type (type_name);
1010 /* Copy the text of the number and remove the "_"s. */
1011 std::string number;
1012 for (int i = 0; i < end_index && pstate->lexptr[i]; ++i)
1014 if (pstate->lexptr[i] != '_')
1015 number.push_back (pstate->lexptr[i]);
1018 /* Advance past the match. */
1019 pstate->lexptr += subexps[0].rm_eo;
1021 /* Parse the number. */
1022 if (is_integer)
1024 int radix = 10;
1025 int offset = 0;
1027 if (number[0] == '0')
1029 if (number[1] == 'x')
1030 radix = 16;
1031 else if (number[1] == 'o')
1032 radix = 8;
1033 else if (number[1] == 'b')
1034 radix = 2;
1035 if (radix != 10)
1036 offset = 2;
1039 if (!current_int_val.val.set (number.c_str () + offset, radix))
1041 /* Shouldn't be possible. */
1042 error (_("Invalid integer"));
1044 if (implicit_i32)
1046 static gdb_mpz sixty_three_bit = gdb_mpz::pow (2, 63);
1047 static gdb_mpz thirty_one_bit = gdb_mpz::pow (2, 31);
1049 if (current_int_val.val >= sixty_three_bit)
1050 type = get_type ("i128");
1051 else if (current_int_val.val >= thirty_one_bit)
1052 type = get_type ("i64");
1055 current_int_val.type = type;
1057 else
1059 current_float_val.type = type;
1060 bool parsed = parse_float (number.c_str (), number.length (),
1061 current_float_val.type,
1062 current_float_val.val.data ());
1063 gdb_assert (parsed);
1066 return is_integer ? INTEGER : FLOAT;
1069 /* The lexer. */
1072 rust_parser::lex_one_token (bool decimal_only)
1074 /* Skip all leading whitespace. */
1075 while (pstate->lexptr[0] == ' '
1076 || pstate->lexptr[0] == '\t'
1077 || pstate->lexptr[0] == '\r'
1078 || pstate->lexptr[0] == '\n')
1079 ++pstate->lexptr;
1081 /* If we hit EOF and we're completing, then return COMPLETE -- maybe
1082 we're completing an empty string at the end of a field_expr.
1083 But, we don't want to return two COMPLETE tokens in a row. */
1084 if (pstate->lexptr[0] == '\0' && pstate->lexptr == pstate->prev_lexptr)
1085 return 0;
1086 pstate->prev_lexptr = pstate->lexptr;
1087 if (pstate->lexptr[0] == '\0')
1089 if (pstate->parse_completion)
1091 current_string_val.length =0;
1092 current_string_val.ptr = "";
1093 return COMPLETE;
1095 return 0;
1098 if (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9')
1099 return decimal_only ? lex_decimal_integer () : lex_number ();
1100 else if (pstate->lexptr[0] == 'b' && pstate->lexptr[1] == '\'')
1101 return lex_character ();
1102 else if (pstate->lexptr[0] == 'b' && pstate->lexptr[1] == '"')
1103 return lex_string ();
1104 else if (pstate->lexptr[0] == 'b' && starts_raw_string (pstate->lexptr + 1))
1105 return lex_string ();
1106 else if (starts_raw_string (pstate->lexptr))
1107 return lex_string ();
1108 else if (rust_identifier_start_p (pstate->lexptr[0]))
1109 return lex_identifier ();
1110 else if (pstate->lexptr[0] == '"')
1111 return lex_string ();
1112 else if (pstate->lexptr[0] == '\'')
1113 return lex_character ();
1114 else if (pstate->lexptr[0] == '}' || pstate->lexptr[0] == ']')
1116 /* Falls through to lex_operator. */
1117 --paren_depth;
1119 else if (pstate->lexptr[0] == '(' || pstate->lexptr[0] == '{')
1121 /* Falls through to lex_operator. */
1122 ++paren_depth;
1124 else if (pstate->lexptr[0] == ',' && pstate->comma_terminates
1125 && paren_depth == 0)
1126 return 0;
1128 return lex_operator ();
1131 /* Push back a single character to be re-lexed. */
1133 void
1134 rust_parser::push_back (char c)
1136 /* Can't be called before any lexing. */
1137 gdb_assert (pstate->prev_lexptr != NULL);
1139 --pstate->lexptr;
1140 gdb_assert (*pstate->lexptr == c);
1145 /* Parse a tuple or paren expression. */
1147 operation_up
1148 rust_parser::parse_tuple ()
1150 assume ('(');
1152 if (current_token == ')')
1154 lex ();
1155 struct type *unit = get_type ("()");
1156 return make_operation<long_const_operation> (unit, 0);
1159 operation_up expr = parse_expr ();
1160 if (current_token == ')')
1162 /* Parenthesized expression. */
1163 lex ();
1164 return make_operation<rust_parenthesized_operation> (std::move (expr));
1167 std::vector<operation_up> ops;
1168 ops.push_back (std::move (expr));
1169 while (current_token != ')')
1171 if (current_token != ',')
1172 error (_("',' or ')' expected"));
1173 lex ();
1175 /* A trailing "," is ok. */
1176 if (current_token != ')')
1177 ops.push_back (parse_expr ());
1180 assume (')');
1182 error (_("Tuple expressions not supported yet"));
1185 /* Parse an array expression. */
1187 operation_up
1188 rust_parser::parse_array ()
1190 assume ('[');
1192 if (current_token == KW_MUT)
1193 lex ();
1195 operation_up result;
1196 operation_up expr = parse_expr ();
1197 if (current_token == ';')
1199 lex ();
1200 operation_up rhs = parse_expr ();
1201 result = make_operation<rust_array_operation> (std::move (expr),
1202 std::move (rhs));
1204 else if (current_token == ',' || current_token == ']')
1206 std::vector<operation_up> ops;
1207 ops.push_back (std::move (expr));
1208 while (current_token != ']')
1210 if (current_token != ',')
1211 error (_("',' or ']' expected"));
1212 lex ();
1213 ops.push_back (parse_expr ());
1215 ops.shrink_to_fit ();
1216 int len = ops.size () - 1;
1217 result = make_operation<array_operation> (0, len, std::move (ops));
1219 else
1220 error (_("',', ';', or ']' expected"));
1222 require (']');
1224 return result;
1227 /* Turn a name into an operation. */
1229 operation_up
1230 rust_parser::name_to_operation (const std::string &name)
1232 struct block_symbol sym = lookup_symbol (name.c_str (),
1233 pstate->expression_context_block,
1234 SEARCH_VFT);
1235 if (sym.symbol != nullptr && sym.symbol->aclass () != LOC_TYPEDEF)
1236 return make_operation<var_value_operation> (sym);
1238 struct type *type = nullptr;
1240 if (sym.symbol != nullptr)
1242 gdb_assert (sym.symbol->aclass () == LOC_TYPEDEF);
1243 type = sym.symbol->type ();
1245 if (type == nullptr)
1246 type = rust_lookup_type (name.c_str ());
1247 if (type == nullptr)
1248 error (_("No symbol '%s' in current context"), name.c_str ());
1250 if (type->code () == TYPE_CODE_STRUCT && type->num_fields () == 0)
1252 /* A unit-like struct. */
1253 operation_up result (new rust_aggregate_operation (type, {}, {}));
1254 return result;
1256 else
1257 return make_operation<type_operation> (type);
1260 /* Parse a struct expression. */
1262 operation_up
1263 rust_parser::parse_struct_expr (struct type *type)
1265 assume ('{');
1267 if (type->code () != TYPE_CODE_STRUCT
1268 || rust_tuple_type_p (type)
1269 || rust_tuple_struct_type_p (type))
1270 error (_("Struct expression applied to non-struct type"));
1272 std::vector<std::pair<std::string, operation_up>> field_v;
1273 while (current_token != '}' && current_token != DOTDOT)
1275 if (current_token != IDENT)
1276 error (_("'}', '..', or identifier expected"));
1278 std::string name = get_string ();
1279 lex ();
1281 operation_up expr;
1282 if (current_token == ',' || current_token == '}'
1283 || current_token == DOTDOT)
1284 expr = name_to_operation (name);
1285 else
1287 require (':');
1288 expr = parse_expr ();
1290 field_v.emplace_back (std::move (name), std::move (expr));
1292 /* A trailing "," is ok. */
1293 if (current_token == ',')
1294 lex ();
1297 operation_up others;
1298 if (current_token == DOTDOT)
1300 lex ();
1301 others = parse_expr ();
1304 require ('}');
1306 return make_operation<rust_aggregate_operation> (type,
1307 std::move (others),
1308 std::move (field_v));
1311 /* Used by the operator precedence parser. */
1312 struct rustop_item
1314 rustop_item (int token_, int precedence_, enum exp_opcode opcode_,
1315 operation_up &&op_)
1316 : token (token_),
1317 precedence (precedence_),
1318 opcode (opcode_),
1319 op (std::move (op_))
1323 /* The token value. */
1324 int token;
1325 /* Precedence of this operator. */
1326 int precedence;
1327 /* This is used only for assign-modify. */
1328 enum exp_opcode opcode;
1329 /* The right hand side of this operation. */
1330 operation_up op;
1333 /* An operator precedence parser for binary operations, including
1334 "as". */
1336 operation_up
1337 rust_parser::parse_binop (bool required)
1339 /* All the binary operators. Each one is of the form
1340 OPERATION(TOKEN, PRECEDENCE, TYPE)
1341 TOKEN is the corresponding operator token.
1342 PRECEDENCE is a value indicating relative precedence.
1343 TYPE is the operation type corresponding to the operator.
1344 Assignment operations are handled specially, not via this
1345 table; they have precedence 0. */
1346 #define ALL_OPS \
1347 OPERATION ('*', 10, mul_operation) \
1348 OPERATION ('/', 10, div_operation) \
1349 OPERATION ('%', 10, rem_operation) \
1350 OPERATION ('@', 9, repeat_operation) \
1351 OPERATION ('+', 8, add_operation) \
1352 OPERATION ('-', 8, sub_operation) \
1353 OPERATION (LSH, 7, lsh_operation) \
1354 OPERATION (RSH, 7, rsh_operation) \
1355 OPERATION ('&', 6, bitwise_and_operation) \
1356 OPERATION ('^', 5, bitwise_xor_operation) \
1357 OPERATION ('|', 4, bitwise_ior_operation) \
1358 OPERATION (EQEQ, 3, equal_operation) \
1359 OPERATION (NOTEQ, 3, notequal_operation) \
1360 OPERATION ('<', 3, less_operation) \
1361 OPERATION (LTEQ, 3, leq_operation) \
1362 OPERATION ('>', 3, gtr_operation) \
1363 OPERATION (GTEQ, 3, geq_operation) \
1364 OPERATION (ANDAND, 2, logical_and_operation) \
1365 OPERATION (OROR, 1, logical_or_operation)
1367 #define ASSIGN_PREC 0
1369 operation_up start = parse_atom (required);
1370 if (start == nullptr)
1372 gdb_assert (!required);
1373 return start;
1376 std::vector<rustop_item> operator_stack;
1377 operator_stack.emplace_back (0, -1, OP_NULL, std::move (start));
1379 while (true)
1381 int this_token = current_token;
1382 enum exp_opcode compound_assign_op = OP_NULL;
1383 int precedence = -2;
1385 switch (this_token)
1387 #define OPERATION(TOKEN, PRECEDENCE, TYPE) \
1388 case TOKEN: \
1389 precedence = PRECEDENCE; \
1390 lex (); \
1391 break;
1393 ALL_OPS
1395 #undef OPERATION
1397 case COMPOUND_ASSIGN:
1398 compound_assign_op = current_opcode;
1399 [[fallthrough]];
1400 case '=':
1401 precedence = ASSIGN_PREC;
1402 lex ();
1403 break;
1405 /* "as" must be handled specially. */
1406 case KW_AS:
1408 lex ();
1409 rustop_item &lhs = operator_stack.back ();
1410 struct type *type = parse_type ();
1411 lhs.op = make_operation<unop_cast_operation> (std::move (lhs.op),
1412 type);
1414 /* Bypass the rest of the loop. */
1415 continue;
1417 default:
1418 /* Arrange to pop the entire stack. */
1419 precedence = -2;
1420 break;
1423 /* Make sure that assignments are right-associative while other
1424 operations are left-associative. */
1425 while ((precedence == ASSIGN_PREC
1426 ? precedence < operator_stack.back ().precedence
1427 : precedence <= operator_stack.back ().precedence)
1428 && operator_stack.size () > 1)
1430 rustop_item rhs = std::move (operator_stack.back ());
1431 operator_stack.pop_back ();
1433 rustop_item &lhs = operator_stack.back ();
1435 switch (rhs.token)
1437 #define OPERATION(TOKEN, PRECEDENCE, TYPE) \
1438 case TOKEN: \
1439 lhs.op = make_operation<TYPE> (std::move (lhs.op), \
1440 std::move (rhs.op)); \
1441 break;
1443 ALL_OPS
1445 #undef OPERATION
1447 case '=':
1448 case COMPOUND_ASSIGN:
1450 if (rhs.token == '=')
1451 lhs.op = (make_operation<assign_operation>
1452 (std::move (lhs.op), std::move (rhs.op)));
1453 else
1454 lhs.op = (make_operation<assign_modify_operation>
1455 (rhs.opcode, std::move (lhs.op),
1456 std::move (rhs.op)));
1458 struct type *unit_type = get_type ("()");
1460 operation_up nil (new long_const_operation (unit_type, 0));
1461 lhs.op = (make_operation<comma_operation>
1462 (std::move (lhs.op), std::move (nil)));
1464 break;
1466 default:
1467 gdb_assert_not_reached ("bad binary operator");
1471 if (precedence == -2)
1472 break;
1474 operator_stack.emplace_back (this_token, precedence, compound_assign_op,
1475 parse_atom (true));
1478 gdb_assert (operator_stack.size () == 1);
1479 return std::move (operator_stack[0].op);
1480 #undef ALL_OPS
1483 /* Parse a range expression. */
1485 operation_up
1486 rust_parser::parse_range ()
1488 enum range_flag kind = (RANGE_HIGH_BOUND_DEFAULT
1489 | RANGE_LOW_BOUND_DEFAULT);
1491 operation_up lhs;
1492 if (current_token != DOTDOT && current_token != DOTDOTEQ)
1494 lhs = parse_binop (true);
1495 kind &= ~RANGE_LOW_BOUND_DEFAULT;
1498 if (current_token == DOTDOT)
1499 kind |= RANGE_HIGH_BOUND_EXCLUSIVE;
1500 else if (current_token != DOTDOTEQ)
1501 return lhs;
1502 lex ();
1504 /* A "..=" range requires a high bound, but otherwise it is
1505 optional. */
1506 operation_up rhs = parse_binop ((kind & RANGE_HIGH_BOUND_EXCLUSIVE) == 0);
1507 if (rhs != nullptr)
1508 kind &= ~RANGE_HIGH_BOUND_DEFAULT;
1510 return make_operation<rust_range_operation> (kind,
1511 std::move (lhs),
1512 std::move (rhs));
1515 /* Parse an expression. */
1517 operation_up
1518 rust_parser::parse_expr ()
1520 return parse_range ();
1523 /* Parse a sizeof expression. */
1525 operation_up
1526 rust_parser::parse_sizeof ()
1528 assume (KW_SIZEOF);
1530 require ('(');
1531 operation_up result = make_operation<unop_sizeof_operation> (parse_expr ());
1532 require (')');
1533 return result;
1536 /* Parse an address-of operation. */
1538 operation_up
1539 rust_parser::parse_addr ()
1541 assume ('&');
1543 if (current_token == KW_MUT)
1544 lex ();
1546 return make_operation<rust_unop_addr_operation> (parse_atom (true));
1549 /* Parse a field expression. */
1551 operation_up
1552 rust_parser::parse_field (operation_up &&lhs)
1554 assume ('.', true);
1556 operation_up result;
1557 switch (current_token)
1559 case IDENT:
1560 case COMPLETE:
1562 bool is_complete = current_token == COMPLETE;
1563 auto struct_op = new rust_structop (std::move (lhs), get_string ());
1564 lex ();
1565 if (is_complete)
1567 completion_op.reset (struct_op);
1568 pstate->mark_struct_expression (struct_op);
1569 /* Throw to the outermost level of the parser. */
1570 error (_("not really an error"));
1572 result.reset (struct_op);
1574 break;
1576 case DECIMAL_INTEGER:
1578 int idx = current_int_val.val.as_integer<int> ();
1579 result = make_operation<rust_struct_anon> (idx, std::move (lhs));
1580 lex ();
1582 break;
1584 default:
1585 error (_("field name expected"));
1588 return result;
1591 /* Parse an index expression. */
1593 operation_up
1594 rust_parser::parse_index (operation_up &&lhs)
1596 assume ('[');
1597 operation_up rhs = parse_expr ();
1598 require (']');
1600 return make_operation<rust_subscript_operation> (std::move (lhs),
1601 std::move (rhs));
1604 /* Parse a sequence of comma-separated expressions in parens. */
1606 std::vector<operation_up>
1607 rust_parser::parse_paren_args ()
1609 assume ('(');
1611 std::vector<operation_up> args;
1612 while (current_token != ')')
1614 if (!args.empty ())
1616 if (current_token != ',')
1617 error (_("',' or ')' expected"));
1618 lex ();
1621 args.push_back (parse_expr ());
1624 assume (')');
1626 return args;
1629 /* Parse the parenthesized part of a function call. */
1631 operation_up
1632 rust_parser::parse_call (operation_up &&lhs)
1634 std::vector<operation_up> args = parse_paren_args ();
1636 return make_operation<funcall_operation> (std::move (lhs),
1637 std::move (args));
1640 /* Parse a list of types. */
1642 std::vector<struct type *>
1643 rust_parser::parse_type_list ()
1645 std::vector<struct type *> result;
1646 result.push_back (parse_type ());
1647 while (current_token == ',')
1649 lex ();
1650 result.push_back (parse_type ());
1652 return result;
1655 /* Parse a possibly-empty list of types, surrounded in parens. */
1657 std::vector<struct type *>
1658 rust_parser::parse_maybe_type_list ()
1660 assume ('(');
1661 std::vector<struct type *> types;
1662 if (current_token != ')')
1663 types = parse_type_list ();
1664 require (')');
1665 return types;
1668 /* Parse an array type. */
1670 struct type *
1671 rust_parser::parse_array_type ()
1673 assume ('[');
1674 struct type *elt_type = parse_type ();
1675 require (';');
1677 if (current_token != INTEGER)
1678 error (_("integer expected"));
1679 ULONGEST val = current_int_val.val.as_integer<ULONGEST> ();
1680 lex ();
1681 require (']');
1683 return lookup_array_range_type (elt_type, 0, val - 1);
1686 /* Parse a slice type. */
1688 struct type *
1689 rust_parser::parse_slice_type ()
1691 assume ('&');
1693 /* Handle &str specially. This is an important type in Rust. While
1694 the compiler does emit the "&str" type in the DWARF, just "str"
1695 itself isn't always available -- but it's handy if this works
1696 seamlessly. */
1697 if (current_token == IDENT && get_string () == "str")
1699 lex ();
1700 return rust_slice_type ("&str", get_type ("u8"), get_type ("usize"));
1703 bool is_slice = current_token == '[';
1704 if (is_slice)
1705 lex ();
1707 struct type *target = parse_type ();
1709 if (is_slice)
1711 require (']');
1712 return rust_slice_type ("&[*gdb*]", target, get_type ("usize"));
1715 /* For now we treat &x and *x identically. */
1716 return lookup_pointer_type (target);
1719 /* Parse a pointer type. */
1721 struct type *
1722 rust_parser::parse_pointer_type ()
1724 assume ('*');
1726 if (current_token == KW_MUT || current_token == KW_CONST)
1727 lex ();
1729 struct type *target = parse_type ();
1730 /* For the time being we ignore mut/const. */
1731 return lookup_pointer_type (target);
1734 /* Parse a function type. */
1736 struct type *
1737 rust_parser::parse_function_type ()
1739 assume (KW_FN);
1741 if (current_token != '(')
1742 error (_("'(' expected"));
1744 std::vector<struct type *> types = parse_maybe_type_list ();
1746 if (current_token != ARROW)
1747 error (_("'->' expected"));
1748 lex ();
1750 struct type *result_type = parse_type ();
1752 struct type **argtypes = nullptr;
1753 if (!types.empty ())
1754 argtypes = types.data ();
1756 result_type = lookup_function_type_with_arguments (result_type,
1757 types.size (),
1758 argtypes);
1759 return lookup_pointer_type (result_type);
1762 /* Parse a tuple type. */
1764 struct type *
1765 rust_parser::parse_tuple_type ()
1767 std::vector<struct type *> types = parse_maybe_type_list ();
1769 auto_obstack obstack;
1770 obstack_1grow (&obstack, '(');
1771 for (int i = 0; i < types.size (); ++i)
1773 std::string type_name = type_to_string (types[i]);
1775 if (i > 0)
1776 obstack_1grow (&obstack, ',');
1777 obstack_grow_str (&obstack, type_name.c_str ());
1780 obstack_grow_str0 (&obstack, ")");
1781 const char *name = (const char *) obstack_finish (&obstack);
1783 /* We don't allow creating new tuple types (yet), but we do allow
1784 looking up existing tuple types. */
1785 struct type *result = rust_lookup_type (name);
1786 if (result == nullptr)
1787 error (_("could not find tuple type '%s'"), name);
1789 return result;
1792 /* Parse a type. */
1794 struct type *
1795 rust_parser::parse_type ()
1797 switch (current_token)
1799 case '[':
1800 return parse_array_type ();
1801 case '&':
1802 return parse_slice_type ();
1803 case '*':
1804 return parse_pointer_type ();
1805 case KW_FN:
1806 return parse_function_type ();
1807 case '(':
1808 return parse_tuple_type ();
1809 case KW_SELF:
1810 case KW_SUPER:
1811 case COLONCOLON:
1812 case KW_EXTERN:
1813 case IDENT:
1815 std::string path = parse_path (false);
1816 struct type *result = rust_lookup_type (path.c_str ());
1817 if (result == nullptr)
1818 error (_("No type name '%s' in current context"), path.c_str ());
1819 return result;
1821 default:
1822 error (_("type expected"));
1826 /* Parse a path. */
1828 std::string
1829 rust_parser::parse_path (bool for_expr)
1831 unsigned n_supers = 0;
1832 int first_token = current_token;
1834 switch (current_token)
1836 case KW_SELF:
1837 lex ();
1838 if (current_token != COLONCOLON)
1839 return "self";
1840 lex ();
1841 [[fallthrough]];
1842 case KW_SUPER:
1843 while (current_token == KW_SUPER)
1845 ++n_supers;
1846 lex ();
1847 if (current_token != COLONCOLON)
1848 error (_("'::' expected"));
1849 lex ();
1851 break;
1853 case COLONCOLON:
1854 lex ();
1855 break;
1857 case KW_EXTERN:
1858 /* This is a gdb extension to make it possible to refer to items
1859 in other crates. It just bypasses adding the current crate
1860 to the front of the name. */
1861 lex ();
1862 break;
1865 if (current_token != IDENT)
1866 error (_("identifier expected"));
1867 std::string path = get_string ();
1868 bool saw_ident = true;
1869 lex ();
1871 /* The condition here lets us enter the loop even if we see
1872 "ident<...>". */
1873 while (current_token == COLONCOLON || current_token == '<')
1875 if (current_token == COLONCOLON)
1877 lex ();
1878 saw_ident = false;
1880 if (current_token == IDENT)
1882 path = path + "::" + get_string ();
1883 lex ();
1884 saw_ident = true;
1886 else if (current_token == COLONCOLON)
1888 /* The code below won't detect this scenario. */
1889 error (_("unexpected '::'"));
1893 if (current_token != '<')
1894 continue;
1896 /* Expression use name::<...>, whereas types use name<...>. */
1897 if (for_expr)
1899 /* Expressions use "name::<...>", so if we saw an identifier
1900 after the "::", we ignore the "<" here. */
1901 if (saw_ident)
1902 break;
1904 else
1906 /* Types use "name<...>", so we need to have seen the
1907 identifier. */
1908 if (!saw_ident)
1909 break;
1912 lex ();
1913 std::vector<struct type *> types = parse_type_list ();
1914 if (current_token == '>')
1915 lex ();
1916 else if (current_token == RSH)
1918 push_back ('>');
1919 lex ();
1921 else
1922 error (_("'>' expected"));
1924 path += "<";
1925 for (int i = 0; i < types.size (); ++i)
1927 if (i > 0)
1928 path += ",";
1929 path += type_to_string (types[i]);
1931 path += ">";
1932 break;
1935 switch (first_token)
1937 case KW_SELF:
1938 case KW_SUPER:
1939 return super_name (path, n_supers);
1941 case COLONCOLON:
1942 return crate_name (path);
1944 case KW_EXTERN:
1945 return "::" + path;
1947 case IDENT:
1948 return path;
1950 default:
1951 gdb_assert_not_reached ("missing case in path parsing");
1955 /* Handle the parsing for a string expression. */
1957 operation_up
1958 rust_parser::parse_string ()
1960 gdb_assert (current_token == STRING);
1962 /* Wrap the raw string in the &str struct. */
1963 struct type *type = rust_lookup_type ("&str");
1964 if (type == nullptr)
1965 error (_("Could not find type '&str'"));
1967 std::vector<std::pair<std::string, operation_up>> field_v;
1969 size_t len = current_string_val.length;
1970 operation_up str = make_operation<string_operation> (get_string ());
1971 operation_up addr
1972 = make_operation<rust_unop_addr_operation> (std::move (str));
1973 field_v.emplace_back ("data_ptr", std::move (addr));
1975 struct type *valtype = get_type ("usize");
1976 operation_up lenop = make_operation<long_const_operation> (valtype, len);
1977 field_v.emplace_back ("length", std::move (lenop));
1979 return make_operation<rust_aggregate_operation> (type,
1980 operation_up (),
1981 std::move (field_v));
1984 /* Parse a tuple struct expression. */
1986 operation_up
1987 rust_parser::parse_tuple_struct (struct type *type)
1989 std::vector<operation_up> args = parse_paren_args ();
1991 std::vector<std::pair<std::string, operation_up>> field_v (args.size ());
1992 for (int i = 0; i < args.size (); ++i)
1993 field_v[i] = { string_printf ("__%d", i), std::move (args[i]) };
1995 return (make_operation<rust_aggregate_operation>
1996 (type, operation_up (), std::move (field_v)));
1999 /* Parse a path expression. */
2001 operation_up
2002 rust_parser::parse_path_expr ()
2004 std::string path = parse_path (true);
2006 if (current_token == '{')
2008 struct type *type = rust_lookup_type (path.c_str ());
2009 if (type == nullptr)
2010 error (_("Could not find type '%s'"), path.c_str ());
2012 return parse_struct_expr (type);
2014 else if (current_token == '(')
2016 struct type *type = rust_lookup_type (path.c_str ());
2017 /* If this is actually a tuple struct expression, handle it
2018 here. If it is a call, it will be handled elsewhere. */
2019 if (type != nullptr)
2021 if (!rust_tuple_struct_type_p (type))
2022 error (_("Type %s is not a tuple struct"), path.c_str ());
2023 return parse_tuple_struct (type);
2027 return name_to_operation (path);
2030 /* Parse an atom. "Atom" isn't a Rust term, but this refers to a
2031 single unitary item in the grammar; but here including some unary
2032 prefix and postfix expressions. */
2034 operation_up
2035 rust_parser::parse_atom (bool required)
2037 operation_up result;
2039 switch (current_token)
2041 case '(':
2042 result = parse_tuple ();
2043 break;
2045 case '[':
2046 result = parse_array ();
2047 break;
2049 case INTEGER:
2050 result = make_operation<long_const_operation> (current_int_val.type,
2051 current_int_val.val);
2052 lex ();
2053 break;
2055 case FLOAT:
2056 result = make_operation<float_const_operation> (current_float_val.type,
2057 current_float_val.val);
2058 lex ();
2059 break;
2061 case STRING:
2062 result = parse_string ();
2063 lex ();
2064 break;
2066 case BYTESTRING:
2067 result = make_operation<string_operation> (get_string ());
2068 lex ();
2069 break;
2071 case KW_TRUE:
2072 case KW_FALSE:
2073 result = make_operation<bool_operation> (current_token == KW_TRUE);
2074 lex ();
2075 break;
2077 case GDBVAR:
2078 /* This is kind of a hacky approach. */
2080 pstate->push_dollar (current_string_val);
2081 result = pstate->pop ();
2082 lex ();
2084 break;
2086 case KW_SELF:
2087 case KW_SUPER:
2088 case COLONCOLON:
2089 case KW_EXTERN:
2090 case IDENT:
2091 result = parse_path_expr ();
2092 break;
2094 case '*':
2095 lex ();
2096 result = make_operation<rust_unop_ind_operation> (parse_atom (true));
2097 break;
2098 case '+':
2099 lex ();
2100 result = make_operation<unary_plus_operation> (parse_atom (true));
2101 break;
2102 case '-':
2103 lex ();
2104 result = make_operation<unary_neg_operation> (parse_atom (true));
2105 break;
2106 case '!':
2107 lex ();
2108 result = make_operation<rust_unop_compl_operation> (parse_atom (true));
2109 break;
2110 case KW_SIZEOF:
2111 result = parse_sizeof ();
2112 break;
2113 case '&':
2114 result = parse_addr ();
2115 break;
2117 default:
2118 if (!required)
2119 return {};
2120 error (_("unexpected token"));
2123 /* Now parse suffixes. */
2124 while (true)
2126 switch (current_token)
2128 case '.':
2129 result = parse_field (std::move (result));
2130 break;
2132 case '[':
2133 result = parse_index (std::move (result));
2134 break;
2136 case '(':
2137 result = parse_call (std::move (result));
2138 break;
2140 default:
2141 return result;
2148 /* The parser as exposed to gdb. */
2151 rust_language::parser (struct parser_state *state) const
2153 rust_parser parser (state);
2155 operation_up result;
2158 result = parser.parse_entry_point ();
2160 catch (const gdb_exception &exc)
2162 if (state->parse_completion)
2164 result = std::move (parser.completion_op);
2165 if (result == nullptr)
2166 throw;
2168 else
2169 throw;
2172 state->set_operation (std::move (result));
2174 return 0;
2179 #if GDB_SELF_TEST
2181 /* A test helper that lexes a string, expecting a single token. */
2183 static void
2184 rust_lex_test_one (rust_parser *parser, const char *input, int expected)
2186 int token;
2188 parser->reset (input);
2190 token = parser->lex_one_token (false);
2191 SELF_CHECK (token == expected);
2193 if (token)
2195 token = parser->lex_one_token (false);
2196 SELF_CHECK (token == 0);
2200 /* Test that INPUT lexes as the integer VALUE. */
2202 static void
2203 rust_lex_int_test (rust_parser *parser, const char *input,
2204 ULONGEST value, int kind)
2206 rust_lex_test_one (parser, input, kind);
2207 SELF_CHECK (parser->current_int_val.val == value);
2210 /* Test that INPUT throws an exception with text ERR. */
2212 static void
2213 rust_lex_exception_test (rust_parser *parser, const char *input,
2214 const char *err)
2218 /* The "kind" doesn't matter. */
2219 rust_lex_test_one (parser, input, DECIMAL_INTEGER);
2220 SELF_CHECK (0);
2222 catch (const gdb_exception_error &except)
2224 SELF_CHECK (strcmp (except.what (), err) == 0);
2228 /* Test that INPUT lexes as the identifier, string, or byte-string
2229 VALUE. KIND holds the expected token kind. */
2231 static void
2232 rust_lex_stringish_test (rust_parser *parser, const char *input,
2233 const char *value, int kind)
2235 rust_lex_test_one (parser, input, kind);
2236 SELF_CHECK (parser->get_string () == value);
2239 /* Helper to test that a string parses as a given token sequence. */
2241 static void
2242 rust_lex_test_sequence (rust_parser *parser, const char *input, int len,
2243 const int expected[])
2245 parser->reset (input);
2247 for (int i = 0; i < len; ++i)
2249 int token = parser->lex_one_token (false);
2250 SELF_CHECK (token == expected[i]);
2254 /* Tests for an integer-parsing corner case. */
2256 static void
2257 rust_lex_test_trailing_dot (rust_parser *parser)
2259 const int expected1[] = { INTEGER, '.', IDENT, '(', ')', 0 };
2260 const int expected2[] = { INTEGER, '.', IDENT, '(', ')', 0 };
2261 const int expected3[] = { FLOAT, EQEQ, '(', ')', 0 };
2262 const int expected4[] = { INTEGER, DOTDOT, INTEGER, 0 };
2264 rust_lex_test_sequence (parser, "23.g()", ARRAY_SIZE (expected1), expected1);
2265 rust_lex_test_sequence (parser, "23_0.g()", ARRAY_SIZE (expected2),
2266 expected2);
2267 rust_lex_test_sequence (parser, "23.==()", ARRAY_SIZE (expected3),
2268 expected3);
2269 rust_lex_test_sequence (parser, "23..25", ARRAY_SIZE (expected4), expected4);
2272 /* Tests of completion. */
2274 static void
2275 rust_lex_test_completion (rust_parser *parser)
2277 const int expected[] = { IDENT, '.', COMPLETE, 0 };
2279 parser->pstate->parse_completion = true;
2281 rust_lex_test_sequence (parser, "something.wha", ARRAY_SIZE (expected),
2282 expected);
2283 rust_lex_test_sequence (parser, "something.", ARRAY_SIZE (expected),
2284 expected);
2286 parser->pstate->parse_completion = false;
2289 /* Test pushback. */
2291 static void
2292 rust_lex_test_push_back (rust_parser *parser)
2294 int token;
2296 parser->reset (">>=");
2298 token = parser->lex_one_token (false);
2299 SELF_CHECK (token == COMPOUND_ASSIGN);
2300 SELF_CHECK (parser->current_opcode == BINOP_RSH);
2302 parser->push_back ('=');
2304 token = parser->lex_one_token (false);
2305 SELF_CHECK (token == '=');
2307 token = parser->lex_one_token (false);
2308 SELF_CHECK (token == 0);
2311 /* Unit test the lexer. */
2313 static void
2314 rust_lex_tests (void)
2316 /* Set up dummy "parser", so that rust_type works. */
2317 parser_state ps (language_def (language_rust), current_inferior ()->arch (),
2318 nullptr, 0, 0, nullptr, 0, nullptr);
2319 rust_parser parser (&ps);
2321 rust_lex_test_one (&parser, "", 0);
2322 rust_lex_test_one (&parser, " \t \n \r ", 0);
2323 rust_lex_test_one (&parser, "thread 23", 0);
2324 rust_lex_test_one (&parser, "task 23", 0);
2325 rust_lex_test_one (&parser, "th 104", 0);
2326 rust_lex_test_one (&parser, "ta 97", 0);
2328 rust_lex_int_test (&parser, "'z'", 'z', INTEGER);
2329 rust_lex_int_test (&parser, "'\\xff'", 0xff, INTEGER);
2330 rust_lex_int_test (&parser, "'\\u{1016f}'", 0x1016f, INTEGER);
2331 rust_lex_int_test (&parser, "b'z'", 'z', INTEGER);
2332 rust_lex_int_test (&parser, "b'\\xfe'", 0xfe, INTEGER);
2333 rust_lex_int_test (&parser, "b'\\xFE'", 0xfe, INTEGER);
2334 rust_lex_int_test (&parser, "b'\\xfE'", 0xfe, INTEGER);
2336 /* Test all escapes in both modes. */
2337 rust_lex_int_test (&parser, "'\\n'", '\n', INTEGER);
2338 rust_lex_int_test (&parser, "'\\r'", '\r', INTEGER);
2339 rust_lex_int_test (&parser, "'\\t'", '\t', INTEGER);
2340 rust_lex_int_test (&parser, "'\\\\'", '\\', INTEGER);
2341 rust_lex_int_test (&parser, "'\\0'", '\0', INTEGER);
2342 rust_lex_int_test (&parser, "'\\''", '\'', INTEGER);
2343 rust_lex_int_test (&parser, "'\\\"'", '"', INTEGER);
2345 rust_lex_int_test (&parser, "b'\\n'", '\n', INTEGER);
2346 rust_lex_int_test (&parser, "b'\\r'", '\r', INTEGER);
2347 rust_lex_int_test (&parser, "b'\\t'", '\t', INTEGER);
2348 rust_lex_int_test (&parser, "b'\\\\'", '\\', INTEGER);
2349 rust_lex_int_test (&parser, "b'\\0'", '\0', INTEGER);
2350 rust_lex_int_test (&parser, "b'\\''", '\'', INTEGER);
2351 rust_lex_int_test (&parser, "b'\\\"'", '"', INTEGER);
2353 rust_lex_exception_test (&parser, "'z", "Unterminated character literal");
2354 rust_lex_exception_test (&parser, "b'\\x0'", "Not enough hex digits seen");
2355 rust_lex_exception_test (&parser, "b'\\u{0}'",
2356 "Unicode escape in byte literal");
2357 rust_lex_exception_test (&parser, "'\\x0'", "Not enough hex digits seen");
2358 rust_lex_exception_test (&parser, "'\\u0'", "Missing '{' in Unicode escape");
2359 rust_lex_exception_test (&parser, "'\\u{0", "Missing '}' in Unicode escape");
2360 rust_lex_exception_test (&parser, "'\\u{0000007}", "Overlong hex escape");
2361 rust_lex_exception_test (&parser, "'\\u{}", "Not enough hex digits seen");
2362 rust_lex_exception_test (&parser, "'\\Q'", "Invalid escape \\Q in literal");
2363 rust_lex_exception_test (&parser, "b'\\Q'", "Invalid escape \\Q in literal");
2365 rust_lex_int_test (&parser, "23", 23, INTEGER);
2366 rust_lex_int_test (&parser, "2_344__29", 234429, INTEGER);
2367 rust_lex_int_test (&parser, "0x1f", 0x1f, INTEGER);
2368 rust_lex_int_test (&parser, "23usize", 23, INTEGER);
2369 rust_lex_int_test (&parser, "23i32", 23, INTEGER);
2370 rust_lex_int_test (&parser, "0x1_f", 0x1f, INTEGER);
2371 rust_lex_int_test (&parser, "0b1_101011__", 0x6b, INTEGER);
2372 rust_lex_int_test (&parser, "0o001177i64", 639, INTEGER);
2373 rust_lex_int_test (&parser, "0x123456789u64", 0x123456789ull, INTEGER);
2375 rust_lex_test_trailing_dot (&parser);
2377 rust_lex_test_one (&parser, "23.", FLOAT);
2378 rust_lex_test_one (&parser, "23.99f32", FLOAT);
2379 rust_lex_test_one (&parser, "23e7", FLOAT);
2380 rust_lex_test_one (&parser, "23E-7", FLOAT);
2381 rust_lex_test_one (&parser, "23e+7", FLOAT);
2382 rust_lex_test_one (&parser, "23.99e+7f64", FLOAT);
2383 rust_lex_test_one (&parser, "23.82f32", FLOAT);
2385 rust_lex_stringish_test (&parser, "hibob", "hibob", IDENT);
2386 rust_lex_stringish_test (&parser, "hibob__93", "hibob__93", IDENT);
2387 rust_lex_stringish_test (&parser, "thread", "thread", IDENT);
2388 rust_lex_stringish_test (&parser, "r#true", "true", IDENT);
2390 const int expected1[] = { IDENT, INTEGER, 0 };
2391 rust_lex_test_sequence (&parser, "r#thread 23", ARRAY_SIZE (expected1),
2392 expected1);
2393 const int expected2[] = { IDENT, '#', 0 };
2394 rust_lex_test_sequence (&parser, "r#", ARRAY_SIZE (expected2), expected2);
2396 rust_lex_stringish_test (&parser, "\"string\"", "string", STRING);
2397 rust_lex_stringish_test (&parser, "\"str\\ting\"", "str\ting", STRING);
2398 rust_lex_stringish_test (&parser, "\"str\\\"ing\"", "str\"ing", STRING);
2399 rust_lex_stringish_test (&parser, "r\"str\\ing\"", "str\\ing", STRING);
2400 rust_lex_stringish_test (&parser, "r#\"str\\ting\"#", "str\\ting", STRING);
2401 rust_lex_stringish_test (&parser, "r###\"str\\\"ing\"###", "str\\\"ing",
2402 STRING);
2404 rust_lex_stringish_test (&parser, "b\"string\"", "string", BYTESTRING);
2405 rust_lex_stringish_test (&parser, "b\"\x73tring\"", "string", BYTESTRING);
2406 rust_lex_stringish_test (&parser, "b\"str\\\"ing\"", "str\"ing", BYTESTRING);
2407 rust_lex_stringish_test (&parser, "br####\"\\x73tring\"####", "\\x73tring",
2408 BYTESTRING);
2410 for (const auto &candidate : identifier_tokens)
2411 rust_lex_test_one (&parser, candidate.name, candidate.value);
2413 for (const auto &candidate : operator_tokens)
2414 rust_lex_test_one (&parser, candidate.name, candidate.value);
2416 rust_lex_test_completion (&parser);
2417 rust_lex_test_push_back (&parser);
2420 #endif /* GDB_SELF_TEST */
2424 void _initialize_rust_exp ();
2425 void
2426 _initialize_rust_exp ()
2428 int code = regcomp (&number_regex, number_regex_text, REG_EXTENDED);
2429 /* If the regular expression was incorrect, it was a programming
2430 error. */
2431 gdb_assert (code == 0);
2433 #if GDB_SELF_TEST
2434 selftests::register_test ("rust-lex", rust_lex_tests);
2435 #endif