More updated translations
[binutils-gdb.git] / gdb / cp-name-parser.y
blob9d0085d27f4605108afad485491bf6abd2eeedd7
1 /* YACC parser for C++ names, for GDB.
3 Copyright (C) 2003-2024 Free Software Foundation, Inc.
5 Parts of the lexer are based on c-exp.y from GDB.
7 This file is part of GDB.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 /* Note that malloc's and realloc's in this file are transformed to
23 xmalloc and xrealloc respectively by the same sed command in the
24 makefile that remaps any other malloc/realloc inserted by the parser
25 generator. Doing this with #defines and trying to control the interaction
26 with include files (<malloc.h> and <stdlib.h> for example) just became
27 too messy, particularly when such includes can be inserted at random
28 times by the parser generator. */
30 /* The Bison manual says that %pure-parser is deprecated, but we use
31 it anyway because it also works with Byacc. That is also why
32 this uses %lex-param and %parse-param rather than the simpler
33 %param -- Byacc does not support the latter. */
34 %pure-parser
35 %lex-param {struct cpname_state *state}
36 %parse-param {struct cpname_state *state}
41 #include <unistd.h>
42 #include "gdbsupport/gdb-safe-ctype.h"
43 #include "demangle.h"
44 #include "cp-support.h"
45 #include "c-support.h"
46 #include "parser-defs.h"
47 #include "gdbsupport/selftest.h"
49 #define GDB_YY_REMAP_PREFIX cpname
50 #include "yy-remap.h"
54 %union
56 struct demangle_component *comp;
57 struct nested {
58 struct demangle_component *comp;
59 struct demangle_component **last;
60 } nested;
61 struct {
62 struct demangle_component *comp, *last;
63 } nested1;
64 struct {
65 struct demangle_component *comp, **last;
66 struct nested fn;
67 struct demangle_component *start;
68 int fold_flag;
69 } abstract;
70 int lval;
71 const char *opname;
76 struct cpname_state
78 /* LEXPTR is the current pointer into our lex buffer. PREV_LEXPTR
79 is the start of the last token lexed, only used for diagnostics.
80 ERROR_LEXPTR is the first place an error occurred. GLOBAL_ERRMSG
81 is the first error message encountered. */
83 const char *lexptr, *prev_lexptr, *error_lexptr, *global_errmsg;
85 demangle_parse_info *demangle_info;
87 /* The parse tree created by the parser is stored here after a
88 successful parse. */
90 struct demangle_component *global_result;
92 struct demangle_component *d_grab ();
94 /* Helper functions. These wrap the demangler tree interface,
95 handle allocation from our global store, and return the allocated
96 component. */
98 struct demangle_component *fill_comp (enum demangle_component_type d_type,
99 struct demangle_component *lhs,
100 struct demangle_component *rhs);
102 struct demangle_component *make_operator (const char *name, int args);
104 struct demangle_component *make_dtor (enum gnu_v3_dtor_kinds kind,
105 struct demangle_component *name);
107 struct demangle_component *make_builtin_type (const char *name);
109 struct demangle_component *make_name (const char *name, int len);
111 struct demangle_component *d_qualify (struct demangle_component *lhs,
112 int qualifiers, int is_method);
114 struct demangle_component *d_int_type (int flags);
116 struct demangle_component *d_unary (const char *name,
117 struct demangle_component *lhs);
119 struct demangle_component *d_binary (const char *name,
120 struct demangle_component *lhs,
121 struct demangle_component *rhs);
123 int parse_number (const char *p, int len, int parsed_float, YYSTYPE *lvalp);
126 struct demangle_component *
127 cpname_state::d_grab ()
129 return obstack_new<demangle_component> (&demangle_info->obstack);
132 /* Flags passed to d_qualify. */
134 #define QUAL_CONST 1
135 #define QUAL_RESTRICT 2
136 #define QUAL_VOLATILE 4
138 /* Flags passed to d_int_type. */
140 #define INT_CHAR (1 << 0)
141 #define INT_SHORT (1 << 1)
142 #define INT_LONG (1 << 2)
143 #define INT_LLONG (1 << 3)
145 #define INT_SIGNED (1 << 4)
146 #define INT_UNSIGNED (1 << 5)
148 /* Helper functions. These wrap the demangler tree interface, handle
149 allocation from our global store, and return the allocated component. */
151 struct demangle_component *
152 cpname_state::fill_comp (enum demangle_component_type d_type,
153 struct demangle_component *lhs,
154 struct demangle_component *rhs)
156 struct demangle_component *ret = d_grab ();
157 int i;
159 i = cplus_demangle_fill_component (ret, d_type, lhs, rhs);
160 gdb_assert (i);
162 return ret;
165 struct demangle_component *
166 cpname_state::make_operator (const char *name, int args)
168 struct demangle_component *ret = d_grab ();
169 int i;
171 i = cplus_demangle_fill_operator (ret, name, args);
172 gdb_assert (i);
174 return ret;
177 struct demangle_component *
178 cpname_state::make_dtor (enum gnu_v3_dtor_kinds kind,
179 struct demangle_component *name)
181 struct demangle_component *ret = d_grab ();
182 int i;
184 i = cplus_demangle_fill_dtor (ret, kind, name);
185 gdb_assert (i);
187 return ret;
190 struct demangle_component *
191 cpname_state::make_builtin_type (const char *name)
193 struct demangle_component *ret = d_grab ();
194 int i;
196 i = cplus_demangle_fill_builtin_type (ret, name);
197 gdb_assert (i);
199 return ret;
202 struct demangle_component *
203 cpname_state::make_name (const char *name, int len)
205 struct demangle_component *ret = d_grab ();
206 int i;
208 i = cplus_demangle_fill_name (ret, name, len);
209 gdb_assert (i);
211 return ret;
214 #define d_left(dc) (dc)->u.s_binary.left
215 #define d_right(dc) (dc)->u.s_binary.right
217 static int yylex (YYSTYPE *, cpname_state *);
218 static void yyerror (cpname_state *, const char *);
221 %type <comp> exp exp1 type start start_opt oper colon_name
222 %type <comp> unqualified_name colon_ext_name
223 %type <comp> templ template_arg
224 %type <comp> builtin_type
225 %type <comp> typespec_2 array_indicator
226 %type <comp> colon_ext_only ext_only_name
228 %type <comp> demangler_special function conversion_op
229 %type <nested> conversion_op_name
231 %type <abstract> abstract_declarator direct_abstract_declarator
232 %type <abstract> abstract_declarator_fn
233 %type <nested> declarator direct_declarator function_arglist
235 %type <nested> declarator_1 direct_declarator_1
237 %type <nested> template_params function_args
238 %type <nested> ptr_operator
240 %type <nested1> nested_name
242 %type <lval> qualifier qualifiers qualifiers_opt
244 %type <lval> int_part int_seq
246 %token <comp> INT
247 %token <comp> FLOAT
249 %token <comp> NAME
250 %type <comp> name
252 %token STRUCT CLASS UNION ENUM SIZEOF UNSIGNED COLONCOLON
253 %token TEMPLATE
254 %token ERROR
255 %token NEW DELETE OPERATOR
256 %token STATIC_CAST REINTERPRET_CAST DYNAMIC_CAST
258 /* Special type cases, put in to allow the parser to distinguish different
259 legal basetypes. */
260 %token SIGNED_KEYWORD LONG SHORT INT_KEYWORD CONST_KEYWORD VOLATILE_KEYWORD DOUBLE_KEYWORD BOOL
261 %token ELLIPSIS RESTRICT VOID FLOAT_KEYWORD CHAR WCHAR_T
263 %token <opname> ASSIGN_MODIFY
265 /* C++ */
266 %token TRUEKEYWORD
267 %token FALSEKEYWORD
269 /* Non-C++ things we get from the demangler. */
270 %token <lval> DEMANGLER_SPECIAL
271 %token CONSTRUCTION_VTABLE CONSTRUCTION_IN
273 /* Precedence declarations. */
275 /* Give NAME lower precedence than COLONCOLON, so that nested_name will
276 associate greedily. */
277 %nonassoc NAME
279 /* Give NEW and DELETE lower precedence than ']', because we can not
280 have an array of type operator new. This causes NEW '[' to be
281 parsed as operator new[]. */
282 %nonassoc NEW DELETE
284 /* Give VOID higher precedence than NAME. Then we can use %prec NAME
285 to prefer (VOID) to (function_args). */
286 %nonassoc VOID
288 /* Give VOID lower precedence than ')' for similar reasons. */
289 %nonassoc ')'
291 %left ','
292 %right '=' ASSIGN_MODIFY
293 %right '?'
294 %left OROR
295 %left ANDAND
296 %left '|'
297 %left '^'
298 %left '&'
299 %left EQUAL NOTEQUAL
300 %left '<' '>' LEQ GEQ SPACESHIP
301 %left LSH RSH
302 %left '@'
303 %left '+' '-'
304 %left '*' '/' '%'
305 %right UNARY INCREMENT DECREMENT
307 /* We don't need a precedence for '(' in this reduced grammar, and it
308 can mask some unpleasant bugs, so disable it for now. */
310 %right ARROW '.' '[' /* '(' */
311 %left COLONCOLON
316 result : start
318 state->global_result = $1;
320 /* Avoid warning about "yynerrs" being unused. */
321 (void) yynerrs;
325 start : type
327 | demangler_special
329 | function
333 start_opt : /* */
334 { $$ = NULL; }
335 | COLONCOLON start
336 { $$ = $2; }
339 function
340 /* Function with a return type. declarator_1 is used to prevent
341 ambiguity with the next rule. */
342 : typespec_2 declarator_1
343 { $$ = $2.comp;
344 *$2.last = $1;
347 /* Function without a return type. We need to use typespec_2
348 to prevent conflicts from qualifiers_opt - harmless. The
349 start_opt is used to handle "function-local" variables and
350 types. */
351 | typespec_2 function_arglist start_opt
352 { $$ = state->fill_comp (DEMANGLE_COMPONENT_TYPED_NAME,
353 $1, $2.comp);
354 if ($3)
355 $$ = state->fill_comp (DEMANGLE_COMPONENT_LOCAL_NAME,
356 $$, $3);
358 | colon_ext_only function_arglist start_opt
359 { $$ = state->fill_comp (DEMANGLE_COMPONENT_TYPED_NAME, $1, $2.comp);
360 if ($3) $$ = state->fill_comp (DEMANGLE_COMPONENT_LOCAL_NAME, $$, $3); }
362 | conversion_op_name start_opt
363 { $$ = $1.comp;
364 if ($2) $$ = state->fill_comp (DEMANGLE_COMPONENT_LOCAL_NAME, $$, $2); }
365 | conversion_op_name abstract_declarator_fn
366 { if ($2.last)
368 /* First complete the abstract_declarator's type using
369 the typespec from the conversion_op_name. */
370 *$2.last = *$1.last;
371 /* Then complete the conversion_op_name with the type. */
372 *$1.last = $2.comp;
374 /* If we have an arglist, build a function type. */
375 if ($2.fn.comp)
376 $$ = state->fill_comp (DEMANGLE_COMPONENT_TYPED_NAME, $1.comp, $2.fn.comp);
377 else
378 $$ = $1.comp;
379 if ($2.start) $$ = state->fill_comp (DEMANGLE_COMPONENT_LOCAL_NAME, $$, $2.start);
383 demangler_special
384 : DEMANGLER_SPECIAL start
385 { $$ = state->fill_comp ((enum demangle_component_type) $1, $2, NULL); }
386 | CONSTRUCTION_VTABLE start CONSTRUCTION_IN start
387 { $$ = state->fill_comp (DEMANGLE_COMPONENT_CONSTRUCTION_VTABLE, $2, $4); }
390 oper : OPERATOR NEW
392 /* Match the whitespacing of cplus_demangle_operators.
393 It would abort on unrecognized string otherwise. */
394 $$ = state->make_operator ("new", 3);
396 | OPERATOR DELETE
398 /* Match the whitespacing of cplus_demangle_operators.
399 It would abort on unrecognized string otherwise. */
400 $$ = state->make_operator ("delete ", 1);
402 | OPERATOR NEW '[' ']'
404 /* Match the whitespacing of cplus_demangle_operators.
405 It would abort on unrecognized string otherwise. */
406 $$ = state->make_operator ("new[]", 3);
408 | OPERATOR DELETE '[' ']'
410 /* Match the whitespacing of cplus_demangle_operators.
411 It would abort on unrecognized string otherwise. */
412 $$ = state->make_operator ("delete[] ", 1);
414 | OPERATOR '+'
415 { $$ = state->make_operator ("+", 2); }
416 | OPERATOR '-'
417 { $$ = state->make_operator ("-", 2); }
418 | OPERATOR '*'
419 { $$ = state->make_operator ("*", 2); }
420 | OPERATOR '/'
421 { $$ = state->make_operator ("/", 2); }
422 | OPERATOR '%'
423 { $$ = state->make_operator ("%", 2); }
424 | OPERATOR '^'
425 { $$ = state->make_operator ("^", 2); }
426 | OPERATOR '&'
427 { $$ = state->make_operator ("&", 2); }
428 | OPERATOR '|'
429 { $$ = state->make_operator ("|", 2); }
430 | OPERATOR '~'
431 { $$ = state->make_operator ("~", 1); }
432 | OPERATOR '!'
433 { $$ = state->make_operator ("!", 1); }
434 | OPERATOR '='
435 { $$ = state->make_operator ("=", 2); }
436 | OPERATOR '<'
437 { $$ = state->make_operator ("<", 2); }
438 | OPERATOR '>'
439 { $$ = state->make_operator (">", 2); }
440 | OPERATOR ASSIGN_MODIFY
441 { $$ = state->make_operator ($2, 2); }
442 | OPERATOR LSH
443 { $$ = state->make_operator ("<<", 2); }
444 | OPERATOR RSH
445 { $$ = state->make_operator (">>", 2); }
446 | OPERATOR EQUAL
447 { $$ = state->make_operator ("==", 2); }
448 | OPERATOR NOTEQUAL
449 { $$ = state->make_operator ("!=", 2); }
450 | OPERATOR LEQ
451 { $$ = state->make_operator ("<=", 2); }
452 | OPERATOR GEQ
453 { $$ = state->make_operator (">=", 2); }
454 | OPERATOR SPACESHIP
455 { $$ = state->make_operator ("<=>", 2); }
456 | OPERATOR ANDAND
457 { $$ = state->make_operator ("&&", 2); }
458 | OPERATOR OROR
459 { $$ = state->make_operator ("||", 2); }
460 | OPERATOR INCREMENT
461 { $$ = state->make_operator ("++", 1); }
462 | OPERATOR DECREMENT
463 { $$ = state->make_operator ("--", 1); }
464 | OPERATOR ','
465 { $$ = state->make_operator (",", 2); }
466 | OPERATOR ARROW '*'
467 { $$ = state->make_operator ("->*", 2); }
468 | OPERATOR ARROW
469 { $$ = state->make_operator ("->", 2); }
470 | OPERATOR '(' ')'
471 { $$ = state->make_operator ("()", 2); }
472 | OPERATOR '[' ']'
473 { $$ = state->make_operator ("[]", 2); }
476 /* Conversion operators. We don't try to handle some of
477 the wackier demangler output for function pointers,
478 since it's not clear that it's parseable. */
479 conversion_op
480 : OPERATOR typespec_2
481 { $$ = state->fill_comp (DEMANGLE_COMPONENT_CONVERSION, $2, NULL); }
484 conversion_op_name
485 : nested_name conversion_op
486 { $$.comp = $1.comp;
487 d_right ($1.last) = $2;
488 $$.last = &d_left ($2);
490 | conversion_op
491 { $$.comp = $1;
492 $$.last = &d_left ($1);
494 | COLONCOLON nested_name conversion_op
495 { $$.comp = $2.comp;
496 d_right ($2.last) = $3;
497 $$.last = &d_left ($3);
499 | COLONCOLON conversion_op
500 { $$.comp = $2;
501 $$.last = &d_left ($2);
505 /* DEMANGLE_COMPONENT_NAME */
506 /* This accepts certain invalid placements of '~'. */
507 unqualified_name: oper
508 | oper '<' template_params '>'
509 { $$ = state->fill_comp (DEMANGLE_COMPONENT_TEMPLATE, $1, $3.comp); }
510 | '~' NAME
511 { $$ = state->make_dtor (gnu_v3_complete_object_dtor, $2); }
514 /* This rule is used in name and nested_name, and expanded inline there
515 for efficiency. */
517 scope_id : NAME
518 | template
522 colon_name : name
523 | COLONCOLON name
524 { $$ = $2; }
527 /* DEMANGLE_COMPONENT_QUAL_NAME */
528 /* DEMANGLE_COMPONENT_CTOR / DEMANGLE_COMPONENT_DTOR ? */
529 name : nested_name NAME %prec NAME
530 { $$ = $1.comp; d_right ($1.last) = $2; }
531 | NAME %prec NAME
532 | nested_name templ %prec NAME
533 { $$ = $1.comp; d_right ($1.last) = $2; }
534 | templ %prec NAME
537 colon_ext_name : colon_name
538 | colon_ext_only
541 colon_ext_only : ext_only_name
542 | COLONCOLON ext_only_name
543 { $$ = $2; }
546 ext_only_name : nested_name unqualified_name
547 { $$ = $1.comp; d_right ($1.last) = $2; }
548 | unqualified_name
551 nested_name : NAME COLONCOLON
552 { $$.comp = state->fill_comp (DEMANGLE_COMPONENT_QUAL_NAME, $1, NULL);
553 $$.last = $$.comp;
555 | nested_name NAME COLONCOLON
556 { $$.comp = $1.comp;
557 d_right ($1.last) = state->fill_comp (DEMANGLE_COMPONENT_QUAL_NAME, $2, NULL);
558 $$.last = d_right ($1.last);
560 | templ COLONCOLON
561 { $$.comp = state->fill_comp (DEMANGLE_COMPONENT_QUAL_NAME, $1, NULL);
562 $$.last = $$.comp;
564 | nested_name templ COLONCOLON
565 { $$.comp = $1.comp;
566 d_right ($1.last) = state->fill_comp (DEMANGLE_COMPONENT_QUAL_NAME, $2, NULL);
567 $$.last = d_right ($1.last);
571 /* DEMANGLE_COMPONENT_TEMPLATE */
572 /* DEMANGLE_COMPONENT_TEMPLATE_ARGLIST */
573 templ : NAME '<' template_params '>'
574 { $$ = state->fill_comp (DEMANGLE_COMPONENT_TEMPLATE, $1, $3.comp); }
577 template_params : template_arg
578 { $$.comp = state->fill_comp (DEMANGLE_COMPONENT_TEMPLATE_ARGLIST, $1, NULL);
579 $$.last = &d_right ($$.comp); }
580 | template_params ',' template_arg
581 { $$.comp = $1.comp;
582 *$1.last = state->fill_comp (DEMANGLE_COMPONENT_TEMPLATE_ARGLIST, $3, NULL);
583 $$.last = &d_right (*$1.last);
587 /* "type" is inlined into template_arg and function_args. */
589 /* Also an integral constant-expression of integral type, and a
590 pointer to member (?) */
591 template_arg : typespec_2
592 | typespec_2 abstract_declarator
593 { $$ = $2.comp;
594 *$2.last = $1;
596 | '&' start
597 { $$ = state->fill_comp (DEMANGLE_COMPONENT_UNARY, state->make_operator ("&", 1), $2); }
598 | '&' '(' start ')'
599 { $$ = state->fill_comp (DEMANGLE_COMPONENT_UNARY, state->make_operator ("&", 1), $3); }
600 | exp
601 | function
604 function_args : typespec_2
605 { $$.comp = state->fill_comp (DEMANGLE_COMPONENT_ARGLIST, $1, NULL);
606 $$.last = &d_right ($$.comp);
608 | typespec_2 abstract_declarator
609 { *$2.last = $1;
610 $$.comp = state->fill_comp (DEMANGLE_COMPONENT_ARGLIST, $2.comp, NULL);
611 $$.last = &d_right ($$.comp);
613 | function_args ',' typespec_2
614 { *$1.last = state->fill_comp (DEMANGLE_COMPONENT_ARGLIST, $3, NULL);
615 $$.comp = $1.comp;
616 $$.last = &d_right (*$1.last);
618 | function_args ',' typespec_2 abstract_declarator
619 { *$4.last = $3;
620 *$1.last = state->fill_comp (DEMANGLE_COMPONENT_ARGLIST, $4.comp, NULL);
621 $$.comp = $1.comp;
622 $$.last = &d_right (*$1.last);
624 | function_args ',' ELLIPSIS
625 { *$1.last
626 = state->fill_comp (DEMANGLE_COMPONENT_ARGLIST,
627 state->make_builtin_type ("..."),
628 NULL);
629 $$.comp = $1.comp;
630 $$.last = &d_right (*$1.last);
634 function_arglist: '(' function_args ')' qualifiers_opt %prec NAME
635 { $$.comp = state->fill_comp (DEMANGLE_COMPONENT_FUNCTION_TYPE, NULL, $2.comp);
636 $$.last = &d_left ($$.comp);
637 $$.comp = state->d_qualify ($$.comp, $4, 1); }
638 | '(' VOID ')' qualifiers_opt
639 { $$.comp = state->fill_comp (DEMANGLE_COMPONENT_FUNCTION_TYPE, NULL, NULL);
640 $$.last = &d_left ($$.comp);
641 $$.comp = state->d_qualify ($$.comp, $4, 1); }
642 | '(' ')' qualifiers_opt
643 { $$.comp = state->fill_comp (DEMANGLE_COMPONENT_FUNCTION_TYPE, NULL, NULL);
644 $$.last = &d_left ($$.comp);
645 $$.comp = state->d_qualify ($$.comp, $3, 1); }
648 /* Should do something about DEMANGLE_COMPONENT_VENDOR_TYPE_QUAL */
649 qualifiers_opt : /* epsilon */
650 { $$ = 0; }
651 | qualifiers
654 qualifier : RESTRICT
655 { $$ = QUAL_RESTRICT; }
656 | VOLATILE_KEYWORD
657 { $$ = QUAL_VOLATILE; }
658 | CONST_KEYWORD
659 { $$ = QUAL_CONST; }
662 qualifiers : qualifier
663 | qualifier qualifiers
664 { $$ = $1 | $2; }
667 /* This accepts all sorts of invalid constructions and produces
668 invalid output for them - an error would be better. */
670 int_part : INT_KEYWORD
671 { $$ = 0; }
672 | SIGNED_KEYWORD
673 { $$ = INT_SIGNED; }
674 | UNSIGNED
675 { $$ = INT_UNSIGNED; }
676 | CHAR
677 { $$ = INT_CHAR; }
678 | LONG
679 { $$ = INT_LONG; }
680 | SHORT
681 { $$ = INT_SHORT; }
684 int_seq : int_part
685 | int_seq int_part
686 { $$ = $1 | $2; if ($1 & $2 & INT_LONG) $$ = $1 | INT_LLONG; }
689 builtin_type : int_seq
690 { $$ = state->d_int_type ($1); }
691 | FLOAT_KEYWORD
692 { $$ = state->make_builtin_type ("float"); }
693 | DOUBLE_KEYWORD
694 { $$ = state->make_builtin_type ("double"); }
695 | LONG DOUBLE_KEYWORD
696 { $$ = state->make_builtin_type ("long double"); }
697 | BOOL
698 { $$ = state->make_builtin_type ("bool"); }
699 | WCHAR_T
700 { $$ = state->make_builtin_type ("wchar_t"); }
701 | VOID
702 { $$ = state->make_builtin_type ("void"); }
705 ptr_operator : '*' qualifiers_opt
706 { $$.comp = state->fill_comp (DEMANGLE_COMPONENT_POINTER, NULL, NULL);
707 $$.last = &d_left ($$.comp);
708 $$.comp = state->d_qualify ($$.comp, $2, 0); }
709 /* g++ seems to allow qualifiers after the reference? */
710 | '&'
711 { $$.comp = state->fill_comp (DEMANGLE_COMPONENT_REFERENCE, NULL, NULL);
712 $$.last = &d_left ($$.comp); }
713 | ANDAND
714 { $$.comp = state->fill_comp (DEMANGLE_COMPONENT_RVALUE_REFERENCE, NULL, NULL);
715 $$.last = &d_left ($$.comp); }
716 | nested_name '*' qualifiers_opt
717 { $$.comp = state->fill_comp (DEMANGLE_COMPONENT_PTRMEM_TYPE, $1.comp, NULL);
718 /* Convert the innermost DEMANGLE_COMPONENT_QUAL_NAME to a DEMANGLE_COMPONENT_NAME. */
719 *$1.last = *d_left ($1.last);
720 $$.last = &d_right ($$.comp);
721 $$.comp = state->d_qualify ($$.comp, $3, 0); }
722 | COLONCOLON nested_name '*' qualifiers_opt
723 { $$.comp = state->fill_comp (DEMANGLE_COMPONENT_PTRMEM_TYPE, $2.comp, NULL);
724 /* Convert the innermost DEMANGLE_COMPONENT_QUAL_NAME to a DEMANGLE_COMPONENT_NAME. */
725 *$2.last = *d_left ($2.last);
726 $$.last = &d_right ($$.comp);
727 $$.comp = state->d_qualify ($$.comp, $4, 0); }
730 array_indicator : '[' ']'
731 { $$ = state->fill_comp (DEMANGLE_COMPONENT_ARRAY_TYPE, NULL, NULL); }
732 | '[' INT ']'
733 { $$ = state->fill_comp (DEMANGLE_COMPONENT_ARRAY_TYPE, $2, NULL); }
736 /* Details of this approach inspired by the G++ < 3.4 parser. */
738 /* This rule is only used in typespec_2, and expanded inline there for
739 efficiency. */
741 typespec : builtin_type
742 | colon_name
746 typespec_2 : builtin_type qualifiers
747 { $$ = state->d_qualify ($1, $2, 0); }
748 | builtin_type
749 | qualifiers builtin_type qualifiers
750 { $$ = state->d_qualify ($2, $1 | $3, 0); }
751 | qualifiers builtin_type
752 { $$ = state->d_qualify ($2, $1, 0); }
754 | name qualifiers
755 { $$ = state->d_qualify ($1, $2, 0); }
756 | name
757 | qualifiers name qualifiers
758 { $$ = state->d_qualify ($2, $1 | $3, 0); }
759 | qualifiers name
760 { $$ = state->d_qualify ($2, $1, 0); }
762 | COLONCOLON name qualifiers
763 { $$ = state->d_qualify ($2, $3, 0); }
764 | COLONCOLON name
765 { $$ = $2; }
766 | qualifiers COLONCOLON name qualifiers
767 { $$ = state->d_qualify ($3, $1 | $4, 0); }
768 | qualifiers COLONCOLON name
769 { $$ = state->d_qualify ($3, $1, 0); }
772 abstract_declarator
773 : ptr_operator
774 { $$.comp = $1.comp; $$.last = $1.last;
775 $$.fn.comp = NULL; $$.fn.last = NULL; }
776 | ptr_operator abstract_declarator
777 { $$ = $2; $$.fn.comp = NULL; $$.fn.last = NULL;
778 if ($2.fn.comp) { $$.last = $2.fn.last; *$2.last = $2.fn.comp; }
779 *$$.last = $1.comp;
780 $$.last = $1.last; }
781 | direct_abstract_declarator
782 { $$.fn.comp = NULL; $$.fn.last = NULL;
783 if ($1.fn.comp) { $$.last = $1.fn.last; *$1.last = $1.fn.comp; }
787 direct_abstract_declarator
788 : '(' abstract_declarator ')'
789 { $$ = $2; $$.fn.comp = NULL; $$.fn.last = NULL; $$.fold_flag = 1;
790 if ($2.fn.comp) { $$.last = $2.fn.last; *$2.last = $2.fn.comp; }
792 | direct_abstract_declarator function_arglist
793 { $$.fold_flag = 0;
794 if ($1.fn.comp) { $$.last = $1.fn.last; *$1.last = $1.fn.comp; }
795 if ($1.fold_flag)
797 *$$.last = $2.comp;
798 $$.last = $2.last;
800 else
801 $$.fn = $2;
803 | direct_abstract_declarator array_indicator
804 { $$.fn.comp = NULL; $$.fn.last = NULL; $$.fold_flag = 0;
805 if ($1.fn.comp) { $$.last = $1.fn.last; *$1.last = $1.fn.comp; }
806 *$1.last = $2;
807 $$.last = &d_right ($2);
809 | array_indicator
810 { $$.fn.comp = NULL; $$.fn.last = NULL; $$.fold_flag = 0;
811 $$.comp = $1;
812 $$.last = &d_right ($1);
814 /* G++ has the following except for () and (type). Then
815 (type) is handled in regcast_or_absdcl and () is handled
816 in fcast_or_absdcl.
818 However, this is only useful for function types, and
819 generates reduce/reduce conflicts with direct_declarator.
820 We're interested in pointer-to-function types, and in
821 functions, but not in function types - so leave this
822 out. */
823 /* | function_arglist */
826 abstract_declarator_fn
827 : ptr_operator
828 { $$.comp = $1.comp; $$.last = $1.last;
829 $$.fn.comp = NULL; $$.fn.last = NULL; $$.start = NULL; }
830 | ptr_operator abstract_declarator_fn
831 { $$ = $2;
832 if ($2.last)
833 *$$.last = $1.comp;
834 else
835 $$.comp = $1.comp;
836 $$.last = $1.last;
838 | direct_abstract_declarator
839 { $$.comp = $1.comp; $$.last = $1.last; $$.fn = $1.fn; $$.start = NULL; }
840 | direct_abstract_declarator function_arglist COLONCOLON start
841 { $$.start = $4;
842 if ($1.fn.comp) { $$.last = $1.fn.last; *$1.last = $1.fn.comp; }
843 if ($1.fold_flag)
845 *$$.last = $2.comp;
846 $$.last = $2.last;
848 else
849 $$.fn = $2;
851 | function_arglist start_opt
852 { $$.fn = $1;
853 $$.start = $2;
854 $$.comp = NULL; $$.last = NULL;
858 type : typespec_2
859 | typespec_2 abstract_declarator
860 { $$ = $2.comp;
861 *$2.last = $1;
865 declarator : ptr_operator declarator
866 { $$.comp = $2.comp;
867 $$.last = $1.last;
868 *$2.last = $1.comp; }
869 | direct_declarator
872 direct_declarator
873 : '(' declarator ')'
874 { $$ = $2; }
875 | direct_declarator function_arglist
876 { $$.comp = $1.comp;
877 *$1.last = $2.comp;
878 $$.last = $2.last;
880 | direct_declarator array_indicator
881 { $$.comp = $1.comp;
882 *$1.last = $2;
883 $$.last = &d_right ($2);
885 | colon_ext_name
886 { $$.comp = state->fill_comp (DEMANGLE_COMPONENT_TYPED_NAME, $1, NULL);
887 $$.last = &d_right ($$.comp);
891 /* These are similar to declarator and direct_declarator except that they
892 do not permit ( colon_ext_name ), which is ambiguous with a function
893 argument list. They also don't permit a few other forms with redundant
894 parentheses around the colon_ext_name; any colon_ext_name in parentheses
895 must be followed by an argument list or an array indicator, or preceded
896 by a pointer. */
897 declarator_1 : ptr_operator declarator_1
898 { $$.comp = $2.comp;
899 $$.last = $1.last;
900 *$2.last = $1.comp; }
901 | colon_ext_name
902 { $$.comp = state->fill_comp (DEMANGLE_COMPONENT_TYPED_NAME, $1, NULL);
903 $$.last = &d_right ($$.comp);
905 | direct_declarator_1
907 /* Function local variable or type. The typespec to
908 our left is the type of the containing function.
909 This should be OK, because function local types
910 can not be templates, so the return types of their
911 members will not be mangled. If they are hopefully
912 they'll end up to the right of the ::. */
913 | colon_ext_name function_arglist COLONCOLON start
914 { $$.comp = state->fill_comp (DEMANGLE_COMPONENT_TYPED_NAME, $1, $2.comp);
915 $$.last = $2.last;
916 $$.comp = state->fill_comp (DEMANGLE_COMPONENT_LOCAL_NAME, $$.comp, $4);
918 | direct_declarator_1 function_arglist COLONCOLON start
919 { $$.comp = $1.comp;
920 *$1.last = $2.comp;
921 $$.last = $2.last;
922 $$.comp = state->fill_comp (DEMANGLE_COMPONENT_LOCAL_NAME, $$.comp, $4);
926 direct_declarator_1
927 : '(' ptr_operator declarator ')'
928 { $$.comp = $3.comp;
929 $$.last = $2.last;
930 *$3.last = $2.comp; }
931 | direct_declarator_1 function_arglist
932 { $$.comp = $1.comp;
933 *$1.last = $2.comp;
934 $$.last = $2.last;
936 | direct_declarator_1 array_indicator
937 { $$.comp = $1.comp;
938 *$1.last = $2;
939 $$.last = &d_right ($2);
941 | colon_ext_name function_arglist
942 { $$.comp = state->fill_comp (DEMANGLE_COMPONENT_TYPED_NAME, $1, $2.comp);
943 $$.last = $2.last;
945 | colon_ext_name array_indicator
946 { $$.comp = state->fill_comp (DEMANGLE_COMPONENT_TYPED_NAME, $1, $2);
947 $$.last = &d_right ($2);
951 exp : '(' exp1 ')'
952 { $$ = $2; }
955 /* Silly trick. Only allow '>' when parenthesized, in order to
956 handle conflict with templates. */
957 exp1 : exp
960 exp1 : exp '>' exp
961 { $$ = state->d_binary (">", $1, $3); }
964 /* References. Not allowed everywhere in template parameters, only
965 at the top level, but treat them as expressions in case they are wrapped
966 in parentheses. */
967 exp1 : '&' start
968 { $$ = state->fill_comp (DEMANGLE_COMPONENT_UNARY, state->make_operator ("&", 1), $2); }
969 | '&' '(' start ')'
970 { $$ = state->fill_comp (DEMANGLE_COMPONENT_UNARY, state->make_operator ("&", 1), $3); }
973 /* Expressions, not including the comma operator. */
974 exp : '-' exp %prec UNARY
975 { $$ = state->d_unary ("-", $2); }
978 exp : '!' exp %prec UNARY
979 { $$ = state->d_unary ("!", $2); }
982 exp : '~' exp %prec UNARY
983 { $$ = state->d_unary ("~", $2); }
986 /* Casts. First your normal C-style cast. If exp is a LITERAL, just change
987 its type. */
989 exp : '(' type ')' exp %prec UNARY
990 { if ($4->type == DEMANGLE_COMPONENT_LITERAL
991 || $4->type == DEMANGLE_COMPONENT_LITERAL_NEG)
993 $$ = $4;
994 d_left ($4) = $2;
996 else
997 $$ = state->fill_comp (DEMANGLE_COMPONENT_UNARY,
998 state->fill_comp (DEMANGLE_COMPONENT_CAST, $2, NULL),
999 $4);
1003 /* Mangling does not differentiate between these, so we don't need to
1004 either. */
1005 exp : STATIC_CAST '<' type '>' '(' exp1 ')' %prec UNARY
1006 { $$ = state->fill_comp (DEMANGLE_COMPONENT_UNARY,
1007 state->fill_comp (DEMANGLE_COMPONENT_CAST, $3, NULL),
1008 $6);
1012 exp : DYNAMIC_CAST '<' type '>' '(' exp1 ')' %prec UNARY
1013 { $$ = state->fill_comp (DEMANGLE_COMPONENT_UNARY,
1014 state->fill_comp (DEMANGLE_COMPONENT_CAST, $3, NULL),
1015 $6);
1019 exp : REINTERPRET_CAST '<' type '>' '(' exp1 ')' %prec UNARY
1020 { $$ = state->fill_comp (DEMANGLE_COMPONENT_UNARY,
1021 state->fill_comp (DEMANGLE_COMPONENT_CAST, $3, NULL),
1022 $6);
1026 /* Another form of C++-style cast is "type ( exp1 )". This creates too many
1027 conflicts to support. For a while we supported the simpler
1028 "typespec_2 ( exp1 )", but that conflicts with "& ( start )" as a
1029 reference, deep within the wilderness of abstract declarators:
1030 Qux<int(&(*))> vs Qux<int(&(var))>, a shift-reduce conflict at the
1031 innermost left parenthesis. So we do not support function-like casts.
1032 Fortunately they never appear in demangler output. */
1034 /* TO INVESTIGATE: ._0 style anonymous names; anonymous namespaces */
1036 /* Binary operators in order of decreasing precedence. */
1038 exp : exp '*' exp
1039 { $$ = state->d_binary ("*", $1, $3); }
1042 exp : exp '/' exp
1043 { $$ = state->d_binary ("/", $1, $3); }
1046 exp : exp '%' exp
1047 { $$ = state->d_binary ("%", $1, $3); }
1050 exp : exp '+' exp
1051 { $$ = state->d_binary ("+", $1, $3); }
1054 exp : exp '-' exp
1055 { $$ = state->d_binary ("-", $1, $3); }
1058 exp : exp LSH exp
1059 { $$ = state->d_binary ("<<", $1, $3); }
1062 exp : exp RSH exp
1063 { $$ = state->d_binary (">>", $1, $3); }
1066 exp : exp EQUAL exp
1067 { $$ = state->d_binary ("==", $1, $3); }
1070 exp : exp NOTEQUAL exp
1071 { $$ = state->d_binary ("!=", $1, $3); }
1074 exp : exp LEQ exp
1075 { $$ = state->d_binary ("<=", $1, $3); }
1078 exp : exp GEQ exp
1079 { $$ = state->d_binary (">=", $1, $3); }
1082 exp : exp SPACESHIP exp
1083 { $$ = state->d_binary ("<=>", $1, $3); }
1086 exp : exp '<' exp
1087 { $$ = state->d_binary ("<", $1, $3); }
1090 exp : exp '&' exp
1091 { $$ = state->d_binary ("&", $1, $3); }
1094 exp : exp '^' exp
1095 { $$ = state->d_binary ("^", $1, $3); }
1098 exp : exp '|' exp
1099 { $$ = state->d_binary ("|", $1, $3); }
1102 exp : exp ANDAND exp
1103 { $$ = state->d_binary ("&&", $1, $3); }
1106 exp : exp OROR exp
1107 { $$ = state->d_binary ("||", $1, $3); }
1110 /* Not 100% sure these are necessary, but they're harmless. */
1111 exp : exp ARROW NAME
1112 { $$ = state->d_binary ("->", $1, $3); }
1115 exp : exp '.' NAME
1116 { $$ = state->d_binary (".", $1, $3); }
1119 exp : exp '?' exp ':' exp %prec '?'
1120 { $$ = state->fill_comp (DEMANGLE_COMPONENT_TRINARY, state->make_operator ("?", 3),
1121 state->fill_comp (DEMANGLE_COMPONENT_TRINARY_ARG1, $1,
1122 state->fill_comp (DEMANGLE_COMPONENT_TRINARY_ARG2, $3, $5)));
1126 exp : INT
1129 /* Not generally allowed. */
1130 exp : FLOAT
1133 exp : SIZEOF '(' type ')' %prec UNARY
1135 /* Match the whitespacing of cplus_demangle_operators.
1136 It would abort on unrecognized string otherwise. */
1137 $$ = state->d_unary ("sizeof ", $3);
1141 /* C++. */
1142 exp : TRUEKEYWORD
1143 { struct demangle_component *i;
1144 i = state->make_name ("1", 1);
1145 $$ = state->fill_comp (DEMANGLE_COMPONENT_LITERAL,
1146 state->make_builtin_type ( "bool"),
1151 exp : FALSEKEYWORD
1152 { struct demangle_component *i;
1153 i = state->make_name ("0", 1);
1154 $$ = state->fill_comp (DEMANGLE_COMPONENT_LITERAL,
1155 state->make_builtin_type ("bool"),
1160 /* end of C++. */
1164 /* Apply QUALIFIERS to LHS and return a qualified component. IS_METHOD
1165 is set if LHS is a method, in which case the qualifiers are logically
1166 applied to "this". We apply qualifiers in a consistent order; LHS
1167 may already be qualified; duplicate qualifiers are not created. */
1169 struct demangle_component *
1170 cpname_state::d_qualify (struct demangle_component *lhs, int qualifiers,
1171 int is_method)
1173 struct demangle_component **inner_p;
1174 enum demangle_component_type type;
1176 /* For now the order is CONST (innermost), VOLATILE, RESTRICT. */
1178 #define HANDLE_QUAL(TYPE, MTYPE, QUAL) \
1179 if ((qualifiers & QUAL) && (type != TYPE) && (type != MTYPE)) \
1181 *inner_p = fill_comp (is_method ? MTYPE : TYPE, \
1182 *inner_p, NULL); \
1183 inner_p = &d_left (*inner_p); \
1184 type = (*inner_p)->type; \
1186 else if (type == TYPE || type == MTYPE) \
1188 inner_p = &d_left (*inner_p); \
1189 type = (*inner_p)->type; \
1192 inner_p = &lhs;
1194 type = (*inner_p)->type;
1196 HANDLE_QUAL (DEMANGLE_COMPONENT_RESTRICT, DEMANGLE_COMPONENT_RESTRICT_THIS, QUAL_RESTRICT);
1197 HANDLE_QUAL (DEMANGLE_COMPONENT_VOLATILE, DEMANGLE_COMPONENT_VOLATILE_THIS, QUAL_VOLATILE);
1198 HANDLE_QUAL (DEMANGLE_COMPONENT_CONST, DEMANGLE_COMPONENT_CONST_THIS, QUAL_CONST);
1200 return lhs;
1203 /* Return a builtin type corresponding to FLAGS. */
1205 struct demangle_component *
1206 cpname_state::d_int_type (int flags)
1208 const char *name;
1210 switch (flags)
1212 case INT_SIGNED | INT_CHAR:
1213 name = "signed char";
1214 break;
1215 case INT_CHAR:
1216 name = "char";
1217 break;
1218 case INT_UNSIGNED | INT_CHAR:
1219 name = "unsigned char";
1220 break;
1221 case 0:
1222 case INT_SIGNED:
1223 name = "int";
1224 break;
1225 case INT_UNSIGNED:
1226 name = "unsigned int";
1227 break;
1228 case INT_LONG:
1229 case INT_SIGNED | INT_LONG:
1230 name = "long";
1231 break;
1232 case INT_UNSIGNED | INT_LONG:
1233 name = "unsigned long";
1234 break;
1235 case INT_SHORT:
1236 case INT_SIGNED | INT_SHORT:
1237 name = "short";
1238 break;
1239 case INT_UNSIGNED | INT_SHORT:
1240 name = "unsigned short";
1241 break;
1242 case INT_LLONG | INT_LONG:
1243 case INT_SIGNED | INT_LLONG | INT_LONG:
1244 name = "long long";
1245 break;
1246 case INT_UNSIGNED | INT_LLONG | INT_LONG:
1247 name = "unsigned long long";
1248 break;
1249 default:
1250 return NULL;
1253 return make_builtin_type (name);
1256 /* Wrapper to create a unary operation. */
1258 struct demangle_component *
1259 cpname_state::d_unary (const char *name, struct demangle_component *lhs)
1261 return fill_comp (DEMANGLE_COMPONENT_UNARY, make_operator (name, 1), lhs);
1264 /* Wrapper to create a binary operation. */
1266 struct demangle_component *
1267 cpname_state::d_binary (const char *name, struct demangle_component *lhs,
1268 struct demangle_component *rhs)
1270 return fill_comp (DEMANGLE_COMPONENT_BINARY, make_operator (name, 2),
1271 fill_comp (DEMANGLE_COMPONENT_BINARY_ARGS, lhs, rhs));
1274 /* Find the end of a symbol name starting at LEXPTR. */
1276 static const char *
1277 symbol_end (const char *lexptr)
1279 const char *p = lexptr;
1281 while (*p && (c_ident_is_alnum (*p) || *p == '_' || *p == '$' || *p == '.'))
1282 p++;
1284 return p;
1287 /* Take care of parsing a number (anything that starts with a digit).
1288 The number starts at P and contains LEN characters. Store the result in
1289 YYLVAL. */
1292 cpname_state::parse_number (const char *p, int len, int parsed_float,
1293 YYSTYPE *lvalp)
1295 int unsigned_p = 0;
1297 /* Number of "L" suffixes encountered. */
1298 int long_p = 0;
1300 struct demangle_component *type, *name;
1301 enum demangle_component_type literal_type;
1303 if (p[0] == '-')
1305 literal_type = DEMANGLE_COMPONENT_LITERAL_NEG;
1306 p++;
1307 len--;
1309 else
1310 literal_type = DEMANGLE_COMPONENT_LITERAL;
1312 if (parsed_float)
1314 /* It's a float since it contains a point or an exponent. */
1315 char c;
1317 /* The GDB lexer checks the result of scanf at this point. Not doing
1318 this leaves our error checking slightly weaker but only for invalid
1319 data. */
1321 /* See if it has `f' or `l' suffix (float or long double). */
1323 c = TOLOWER (p[len - 1]);
1325 if (c == 'f')
1327 len--;
1328 type = make_builtin_type ("float");
1330 else if (c == 'l')
1332 len--;
1333 type = make_builtin_type ("long double");
1335 else if (ISDIGIT (c) || c == '.')
1336 type = make_builtin_type ("double");
1337 else
1338 return ERROR;
1340 name = make_name (p, len);
1341 lvalp->comp = fill_comp (literal_type, type, name);
1343 return FLOAT;
1346 /* Note that we do not automatically generate unsigned types. This
1347 can't be done because we don't have access to the gdbarch
1348 here. */
1350 int base = 10;
1351 if (len > 1 && p[0] == '0')
1353 if (p[1] == 'x' || p[1] == 'X')
1355 base = 16;
1356 p += 2;
1357 len -= 2;
1359 else if (p[1] == 'b' || p[1] == 'B')
1361 base = 2;
1362 p += 2;
1363 len -= 2;
1365 else if (p[1] == 'd' || p[1] == 'D' || p[1] == 't' || p[1] == 'T')
1367 /* Apparently gdb extensions. */
1368 base = 10;
1369 p += 2;
1370 len -= 2;
1372 else
1373 base = 8;
1376 long_p = 0;
1377 unsigned_p = 0;
1378 while (len > 0)
1380 if (p[len - 1] == 'l' || p[len - 1] == 'L')
1382 len--;
1383 long_p++;
1384 continue;
1386 if (p[len - 1] == 'u' || p[len - 1] == 'U')
1388 len--;
1389 unsigned_p++;
1390 continue;
1392 break;
1395 /* Use gdb_mpz here in case a 128-bit value appears. */
1396 gdb_mpz value (0);
1397 for (int off = 0; off < len; ++off)
1399 int dig;
1400 if (ISDIGIT (p[off]))
1401 dig = p[off] - '0';
1402 else
1403 dig = TOLOWER (p[off]) - 'a' + 10;
1404 if (dig >= base)
1405 return ERROR;
1406 value *= base;
1407 value += dig;
1410 std::string printed = value.str ();
1411 const char *copy = obstack_strdup (&demangle_info->obstack, printed);
1413 if (long_p == 0)
1415 if (unsigned_p)
1416 type = make_builtin_type ("unsigned int");
1417 else
1418 type = make_builtin_type ("int");
1420 else if (long_p == 1)
1422 if (unsigned_p)
1423 type = make_builtin_type ("unsigned long");
1424 else
1425 type = make_builtin_type ("long");
1427 else
1429 if (unsigned_p)
1430 type = make_builtin_type ("unsigned long long");
1431 else
1432 type = make_builtin_type ("long long");
1435 name = make_name (copy, strlen (copy));
1436 lvalp->comp = fill_comp (literal_type, type, name);
1438 return INT;
1441 static const char backslashable[] = "abefnrtv";
1442 static const char represented[] = "\a\b\e\f\n\r\t\v";
1444 /* Translate the backslash the way we would in the host character set. */
1445 static int
1446 c_parse_backslash (int host_char, int *target_char)
1448 const char *ix;
1449 ix = strchr (backslashable, host_char);
1450 if (! ix)
1451 return 0;
1452 else
1453 *target_char = represented[ix - backslashable];
1454 return 1;
1457 /* Parse a C escape sequence. STRING_PTR points to a variable
1458 containing a pointer to the string to parse. That pointer
1459 should point to the character after the \. That pointer
1460 is updated past the characters we use. The value of the
1461 escape sequence is returned.
1463 A negative value means the sequence \ newline was seen,
1464 which is supposed to be equivalent to nothing at all.
1466 If \ is followed by a null character, we return a negative
1467 value and leave the string pointer pointing at the null character.
1469 If \ is followed by 000, we return 0 and leave the string pointer
1470 after the zeros. A value of 0 does not mean end of string. */
1472 static int
1473 cp_parse_escape (const char **string_ptr)
1475 int target_char;
1476 int c = *(*string_ptr)++;
1477 if (c_parse_backslash (c, &target_char))
1478 return target_char;
1479 else
1480 switch (c)
1482 case '\n':
1483 return -2;
1484 case 0:
1485 (*string_ptr)--;
1486 return 0;
1487 case '^':
1489 c = *(*string_ptr)++;
1491 if (c == '?')
1492 return 0177;
1493 else if (c == '\\')
1494 target_char = cp_parse_escape (string_ptr);
1495 else
1496 target_char = c;
1498 /* Now target_char is something like `c', and we want to find
1499 its control-character equivalent. */
1500 target_char = target_char & 037;
1502 return target_char;
1505 case '0':
1506 case '1':
1507 case '2':
1508 case '3':
1509 case '4':
1510 case '5':
1511 case '6':
1512 case '7':
1514 int i = c - '0';
1515 int count = 0;
1516 while (++count < 3)
1518 c = (**string_ptr);
1519 if (c >= '0' && c <= '7')
1521 (*string_ptr)++;
1522 i *= 8;
1523 i += c - '0';
1525 else
1527 break;
1530 return i;
1532 default:
1533 return c;
1537 #define HANDLE_SPECIAL(string, comp) \
1538 if (startswith (tokstart, string)) \
1540 state->lexptr = tokstart + sizeof (string) - 1; \
1541 lvalp->lval = comp; \
1542 return DEMANGLER_SPECIAL; \
1545 #define HANDLE_TOKEN2(string, token) \
1546 if (state->lexptr[1] == string[1]) \
1548 state->lexptr += 2; \
1549 lvalp->opname = string; \
1550 return token; \
1553 #define HANDLE_TOKEN3(string, token) \
1554 if (state->lexptr[1] == string[1] && state->lexptr[2] == string[2]) \
1556 state->lexptr += 3; \
1557 lvalp->opname = string; \
1558 return token; \
1561 /* Read one token, getting characters through LEXPTR. */
1563 static int
1564 yylex (YYSTYPE *lvalp, cpname_state *state)
1566 int c;
1567 int namelen;
1568 const char *tokstart;
1569 char *copy;
1571 retry:
1572 state->prev_lexptr = state->lexptr;
1573 tokstart = state->lexptr;
1575 switch (c = *tokstart)
1577 case 0:
1578 return 0;
1580 case ' ':
1581 case '\t':
1582 case '\n':
1583 state->lexptr++;
1584 goto retry;
1586 case '\'':
1587 /* We either have a character constant ('0' or '\177' for example)
1588 or we have a quoted symbol reference ('foo(int,int)' in C++
1589 for example). */
1590 state->lexptr++;
1591 c = *state->lexptr++;
1592 if (c == '\\')
1593 c = cp_parse_escape (&state->lexptr);
1594 else if (c == '\'')
1596 yyerror (state, _("empty character constant"));
1597 return ERROR;
1600 /* We over-allocate here, but it doesn't really matter . */
1601 copy = (char *) obstack_alloc (&state->demangle_info->obstack, 30);
1602 xsnprintf (copy, 30, "%d", c);
1604 c = *state->lexptr++;
1605 if (c != '\'')
1607 yyerror (state, _("invalid character constant"));
1608 return ERROR;
1611 lvalp->comp
1612 = state->fill_comp (DEMANGLE_COMPONENT_LITERAL,
1613 state->make_builtin_type ("char"),
1614 state->make_name (copy, strlen (copy)));
1616 return INT;
1618 case '(':
1619 if (startswith (tokstart, "(anonymous namespace)"))
1621 state->lexptr += 21;
1622 lvalp->comp = state->make_name ("(anonymous namespace)",
1623 sizeof "(anonymous namespace)" - 1);
1624 return NAME;
1626 [[fallthrough]];
1628 case ')':
1629 case ',':
1630 state->lexptr++;
1631 return c;
1633 case '.':
1634 if (state->lexptr[1] == '.' && state->lexptr[2] == '.')
1636 state->lexptr += 3;
1637 return ELLIPSIS;
1640 /* Might be a floating point number. */
1641 if (state->lexptr[1] < '0' || state->lexptr[1] > '9')
1642 goto symbol; /* Nope, must be a symbol. */
1644 goto try_number;
1646 case '-':
1647 HANDLE_TOKEN2 ("-=", ASSIGN_MODIFY);
1648 HANDLE_TOKEN2 ("--", DECREMENT);
1649 HANDLE_TOKEN2 ("->", ARROW);
1651 /* For construction vtables. This is kind of hokey. */
1652 if (startswith (tokstart, "-in-"))
1654 state->lexptr += 4;
1655 return CONSTRUCTION_IN;
1658 if (state->lexptr[1] < '0' || state->lexptr[1] > '9')
1660 state->lexptr++;
1661 return '-';
1664 try_number:
1665 [[fallthrough]];
1666 case '0':
1667 case '1':
1668 case '2':
1669 case '3':
1670 case '4':
1671 case '5':
1672 case '6':
1673 case '7':
1674 case '8':
1675 case '9':
1677 /* It's a number. */
1678 int got_dot = 0, got_e = 0, toktype;
1679 const char *p = tokstart;
1680 int hex = 0;
1682 if (c == '-')
1683 p++;
1685 if (c == '0' && (p[1] == 'x' || p[1] == 'X'))
1687 p += 2;
1688 hex = 1;
1690 else if (c == '0' && (p[1]=='t' || p[1]=='T' || p[1]=='d' || p[1]=='D'))
1692 p += 2;
1693 hex = 0;
1696 /* If the token includes the C++14 digits separator, we make a
1697 copy so that we don't have to handle the separator in
1698 parse_number. */
1699 std::optional<std::string> no_tick;
1700 for (;; ++p)
1702 /* This test includes !hex because 'e' is a valid hex digit
1703 and thus does not indicate a floating point number when
1704 the radix is hex. */
1705 if (!hex && !got_e && (*p == 'e' || *p == 'E'))
1706 got_dot = got_e = 1;
1707 /* This test does not include !hex, because a '.' always indicates
1708 a decimal floating point number regardless of the radix.
1710 NOTE drow/2005-03-09: This comment is not accurate in C99;
1711 however, it's not clear that all the floating point support
1712 in this file is doing any good here. */
1713 else if (!got_dot && *p == '.')
1714 got_dot = 1;
1715 else if (got_e && (p[-1] == 'e' || p[-1] == 'E')
1716 && (*p == '-' || *p == '+'))
1718 /* This is the sign of the exponent, not the end of
1719 the number. */
1721 /* C++14 allows a separator. */
1722 else if (*p == '\'')
1724 if (!no_tick.has_value ())
1725 no_tick.emplace (tokstart, p);
1726 continue;
1728 /* We will take any letters or digits. parse_number will
1729 complain if past the radix, or if L or U are not final. */
1730 else if (! ISALNUM (*p))
1731 break;
1732 if (no_tick.has_value ())
1733 no_tick->push_back (*p);
1735 if (no_tick.has_value ())
1736 toktype = state->parse_number (no_tick->c_str (),
1737 no_tick->length (),
1738 got_dot|got_e, lvalp);
1739 else
1740 toktype = state->parse_number (tokstart, p - tokstart,
1741 got_dot|got_e, lvalp);
1742 if (toktype == ERROR)
1744 yyerror (state, _("invalid number"));
1745 return ERROR;
1747 state->lexptr = p;
1748 return toktype;
1751 case '+':
1752 HANDLE_TOKEN2 ("+=", ASSIGN_MODIFY);
1753 HANDLE_TOKEN2 ("++", INCREMENT);
1754 state->lexptr++;
1755 return c;
1756 case '*':
1757 HANDLE_TOKEN2 ("*=", ASSIGN_MODIFY);
1758 state->lexptr++;
1759 return c;
1760 case '/':
1761 HANDLE_TOKEN2 ("/=", ASSIGN_MODIFY);
1762 state->lexptr++;
1763 return c;
1764 case '%':
1765 HANDLE_TOKEN2 ("%=", ASSIGN_MODIFY);
1766 state->lexptr++;
1767 return c;
1768 case '|':
1769 HANDLE_TOKEN2 ("|=", ASSIGN_MODIFY);
1770 HANDLE_TOKEN2 ("||", OROR);
1771 state->lexptr++;
1772 return c;
1773 case '&':
1774 HANDLE_TOKEN2 ("&=", ASSIGN_MODIFY);
1775 HANDLE_TOKEN2 ("&&", ANDAND);
1776 state->lexptr++;
1777 return c;
1778 case '^':
1779 HANDLE_TOKEN2 ("^=", ASSIGN_MODIFY);
1780 state->lexptr++;
1781 return c;
1782 case '!':
1783 HANDLE_TOKEN2 ("!=", NOTEQUAL);
1784 state->lexptr++;
1785 return c;
1786 case '<':
1787 HANDLE_TOKEN3 ("<<=", ASSIGN_MODIFY);
1788 HANDLE_TOKEN3 ("<=>", SPACESHIP);
1789 HANDLE_TOKEN2 ("<=", LEQ);
1790 HANDLE_TOKEN2 ("<<", LSH);
1791 state->lexptr++;
1792 return c;
1793 case '>':
1794 HANDLE_TOKEN3 (">>=", ASSIGN_MODIFY);
1795 HANDLE_TOKEN2 (">=", GEQ);
1796 HANDLE_TOKEN2 (">>", RSH);
1797 state->lexptr++;
1798 return c;
1799 case '=':
1800 HANDLE_TOKEN2 ("==", EQUAL);
1801 state->lexptr++;
1802 return c;
1803 case ':':
1804 HANDLE_TOKEN2 ("::", COLONCOLON);
1805 state->lexptr++;
1806 return c;
1808 case '[':
1809 case ']':
1810 case '?':
1811 case '@':
1812 case '~':
1813 case '{':
1814 case '}':
1815 symbol:
1816 state->lexptr++;
1817 return c;
1819 case '"':
1820 /* These can't occur in C++ names. */
1821 yyerror (state, _("unexpected string literal"));
1822 return ERROR;
1825 if (!(c == '_' || c == '$' || c_ident_is_alpha (c)))
1827 /* We must have come across a bad character (e.g. ';'). */
1828 yyerror (state, _("invalid character"));
1829 return ERROR;
1832 /* It's a name. See how long it is. */
1833 namelen = 0;
1835 c = tokstart[++namelen];
1836 while (c_ident_is_alnum (c) || c == '_' || c == '$');
1838 state->lexptr += namelen;
1840 /* Catch specific keywords. Notice that some of the keywords contain
1841 spaces, and are sorted by the length of the first word. They must
1842 all include a trailing space in the string comparison. */
1843 switch (namelen)
1845 case 16:
1846 if (startswith (tokstart, "reinterpret_cast"))
1847 return REINTERPRET_CAST;
1848 break;
1849 case 12:
1850 if (startswith (tokstart, "construction vtable for "))
1852 state->lexptr = tokstart + 24;
1853 return CONSTRUCTION_VTABLE;
1855 if (startswith (tokstart, "dynamic_cast"))
1856 return DYNAMIC_CAST;
1857 break;
1858 case 11:
1859 if (startswith (tokstart, "static_cast"))
1860 return STATIC_CAST;
1861 break;
1862 case 9:
1863 HANDLE_SPECIAL ("covariant return thunk to ", DEMANGLE_COMPONENT_COVARIANT_THUNK);
1864 HANDLE_SPECIAL ("reference temporary for ", DEMANGLE_COMPONENT_REFTEMP);
1865 break;
1866 case 8:
1867 HANDLE_SPECIAL ("typeinfo for ", DEMANGLE_COMPONENT_TYPEINFO);
1868 HANDLE_SPECIAL ("typeinfo fn for ", DEMANGLE_COMPONENT_TYPEINFO_FN);
1869 HANDLE_SPECIAL ("typeinfo name for ", DEMANGLE_COMPONENT_TYPEINFO_NAME);
1870 if (startswith (tokstart, "operator"))
1871 return OPERATOR;
1872 if (startswith (tokstart, "restrict"))
1873 return RESTRICT;
1874 if (startswith (tokstart, "unsigned"))
1875 return UNSIGNED;
1876 if (startswith (tokstart, "template"))
1877 return TEMPLATE;
1878 if (startswith (tokstart, "volatile"))
1879 return VOLATILE_KEYWORD;
1880 break;
1881 case 7:
1882 HANDLE_SPECIAL ("virtual thunk to ", DEMANGLE_COMPONENT_VIRTUAL_THUNK);
1883 if (startswith (tokstart, "wchar_t"))
1884 return WCHAR_T;
1885 break;
1886 case 6:
1887 if (startswith (tokstart, "global constructors keyed to "))
1889 const char *p;
1890 state->lexptr = tokstart + 29;
1891 lvalp->lval = DEMANGLE_COMPONENT_GLOBAL_CONSTRUCTORS;
1892 /* Find the end of the symbol. */
1893 p = symbol_end (state->lexptr);
1894 lvalp->comp = state->make_name (state->lexptr, p - state->lexptr);
1895 state->lexptr = p;
1896 return DEMANGLER_SPECIAL;
1898 if (startswith (tokstart, "global destructors keyed to "))
1900 const char *p;
1901 state->lexptr = tokstart + 28;
1902 lvalp->lval = DEMANGLE_COMPONENT_GLOBAL_DESTRUCTORS;
1903 /* Find the end of the symbol. */
1904 p = symbol_end (state->lexptr);
1905 lvalp->comp = state->make_name (state->lexptr, p - state->lexptr);
1906 state->lexptr = p;
1907 return DEMANGLER_SPECIAL;
1910 HANDLE_SPECIAL ("vtable for ", DEMANGLE_COMPONENT_VTABLE);
1911 if (startswith (tokstart, "delete"))
1912 return DELETE;
1913 if (startswith (tokstart, "struct"))
1914 return STRUCT;
1915 if (startswith (tokstart, "signed"))
1916 return SIGNED_KEYWORD;
1917 if (startswith (tokstart, "sizeof"))
1918 return SIZEOF;
1919 if (startswith (tokstart, "double"))
1920 return DOUBLE_KEYWORD;
1921 break;
1922 case 5:
1923 HANDLE_SPECIAL ("guard variable for ", DEMANGLE_COMPONENT_GUARD);
1924 if (startswith (tokstart, "false"))
1925 return FALSEKEYWORD;
1926 if (startswith (tokstart, "class"))
1927 return CLASS;
1928 if (startswith (tokstart, "union"))
1929 return UNION;
1930 if (startswith (tokstart, "float"))
1931 return FLOAT_KEYWORD;
1932 if (startswith (tokstart, "short"))
1933 return SHORT;
1934 if (startswith (tokstart, "const"))
1935 return CONST_KEYWORD;
1936 break;
1937 case 4:
1938 if (startswith (tokstart, "void"))
1939 return VOID;
1940 if (startswith (tokstart, "bool"))
1941 return BOOL;
1942 if (startswith (tokstart, "char"))
1943 return CHAR;
1944 if (startswith (tokstart, "enum"))
1945 return ENUM;
1946 if (startswith (tokstart, "long"))
1947 return LONG;
1948 if (startswith (tokstart, "true"))
1949 return TRUEKEYWORD;
1950 break;
1951 case 3:
1952 HANDLE_SPECIAL ("VTT for ", DEMANGLE_COMPONENT_VTT);
1953 HANDLE_SPECIAL ("non-virtual thunk to ", DEMANGLE_COMPONENT_THUNK);
1954 if (startswith (tokstart, "new"))
1955 return NEW;
1956 if (startswith (tokstart, "int"))
1957 return INT_KEYWORD;
1958 break;
1959 default:
1960 break;
1963 lvalp->comp = state->make_name (tokstart, namelen);
1964 return NAME;
1967 static void
1968 yyerror (cpname_state *state, const char *msg)
1970 if (state->global_errmsg)
1971 return;
1973 state->error_lexptr = state->prev_lexptr;
1974 state->global_errmsg = msg ? msg : "parse error";
1977 /* See cp-support.h. */
1979 gdb::unique_xmalloc_ptr<char>
1980 cp_comp_to_string (struct demangle_component *result, int estimated_len)
1982 size_t err;
1984 char *res = gdb_cplus_demangle_print (DMGL_PARAMS | DMGL_ANSI,
1985 result, estimated_len, &err);
1986 return gdb::unique_xmalloc_ptr<char> (res);
1989 /* Merge the two parse trees given by DEST and SRC. The parse tree
1990 in SRC is attached to DEST at the node represented by TARGET.
1992 NOTE 1: Since there is no API to merge obstacks, this function does
1993 even attempt to try it. Fortunately, we do not (yet?) need this ability.
1994 The code will assert if SRC->obstack is not empty.
1996 NOTE 2: The string from which SRC was parsed must not be freed, since
1997 this function will place pointers to that string into DEST. */
1999 void
2000 cp_merge_demangle_parse_infos (struct demangle_parse_info *dest,
2001 struct demangle_component *target,
2002 std::unique_ptr<demangle_parse_info> src)
2005 /* Copy the SRC's parse data into DEST. */
2006 *target = *src->tree;
2008 /* Make sure SRC is owned by DEST. */
2009 dest->infos.push_back (std::move (src));
2012 /* Convert a demangled name to a demangle_component tree. On success,
2013 a structure containing the root of the new tree is returned. On
2014 error, NULL is returned, and an error message will be set in
2015 *ERRMSG. */
2017 struct std::unique_ptr<demangle_parse_info>
2018 cp_demangled_name_to_comp (const char *demangled_name,
2019 std::string *errmsg)
2021 cpname_state state;
2023 state.prev_lexptr = state.lexptr = demangled_name;
2024 state.error_lexptr = NULL;
2025 state.global_errmsg = NULL;
2027 auto result = std::make_unique<demangle_parse_info> ();
2028 state.demangle_info = result.get ();
2030 if (yyparse (&state))
2032 if (state.global_errmsg && errmsg)
2033 *errmsg = state.global_errmsg;
2034 return NULL;
2037 result->tree = state.global_result;
2039 return result;
2042 #if GDB_SELF_TEST
2044 static void
2045 should_be_the_same (const char *one, const char *two)
2047 gdb::unique_xmalloc_ptr<char> cpone = cp_canonicalize_string (one);
2048 gdb::unique_xmalloc_ptr<char> cptwo = cp_canonicalize_string (two);
2050 if (cpone != nullptr)
2051 one = cpone.get ();
2052 if (cptwo != nullptr)
2053 two = cptwo.get ();
2055 SELF_CHECK (strcmp (one, two) == 0);
2058 static void
2059 should_parse (const char *name)
2061 std::string err;
2062 auto parsed = cp_demangled_name_to_comp (name, &err);
2063 SELF_CHECK (parsed != nullptr);
2066 static void
2067 canonicalize_tests ()
2069 should_be_the_same ("short int", "short");
2070 should_be_the_same ("int short", "short");
2072 should_be_the_same ("C<(char) 1>::m()", "C<(char) '\\001'>::m()");
2073 should_be_the_same ("x::y::z<1>", "x::y::z<0x01>");
2074 should_be_the_same ("x::y::z<1>", "x::y::z<01>");
2075 should_be_the_same ("x::y::z<(unsigned long long) 1>", "x::y::z<01ull>");
2076 should_be_the_same ("x::y::z<0b111>", "x::y::z<7>");
2077 should_be_the_same ("x::y::z<0b111>", "x::y::z<0t7>");
2078 should_be_the_same ("x::y::z<0b111>", "x::y::z<0D7>");
2080 should_be_the_same ("x::y::z<0xff'ff>", "x::y::z<65535>");
2082 should_be_the_same ("something<void ()>", "something< void() >");
2083 should_be_the_same ("something<void ()>", "something<void (void)>");
2085 should_parse ("void whatever::operator<=><int, int>");
2088 #endif
2090 void _initialize_cp_name_parser ();
2091 void
2092 _initialize_cp_name_parser ()
2094 #if GDB_SELF_TEST
2095 selftests::register_test ("canonicalize", canonicalize_tests);
2096 #endif