Document the GDB 10.2 release in gdb/ChangeLog
[binutils-gdb.git] / gdb / go-exp.y
blobaaa65170a0347e279288f8dfd6f0f0116f92c117
1 /* YACC parser for Go expressions, for GDB.
3 Copyright (C) 2012-2021 Free Software Foundation, Inc.
5 This file is part of GDB.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 /* This file is derived from c-exp.y, p-exp.y. */
22 /* Parse a Go expression from text in a string,
23 and return the result as a struct expression pointer.
24 That structure contains arithmetic operations in reverse polish,
25 with constants represented by operations that are followed by special data.
26 See expression.h for the details of the format.
27 What is important here is that it can be built up sequentially
28 during the process of parsing; the lower levels of the tree always
29 come first in the result.
31 Note that malloc's and realloc's in this file are transformed to
32 xmalloc and xrealloc respectively by the same sed command in the
33 makefile that remaps any other malloc/realloc inserted by the parser
34 generator. Doing this with #defines and trying to control the interaction
35 with include files (<malloc.h> and <stdlib.h> for example) just became
36 too messy, particularly when such includes can be inserted at random
37 times by the parser generator. */
39 /* Known bugs or limitations:
41 - Unicode
42 - &^
43 - '_' (blank identifier)
44 - automatic deref of pointers
45 - method expressions
46 - interfaces, channels, etc.
48 And lots of other things.
49 I'm sure there's some cleanup to do.
54 #include "defs.h"
55 #include <ctype.h>
56 #include "expression.h"
57 #include "value.h"
58 #include "parser-defs.h"
59 #include "language.h"
60 #include "c-lang.h"
61 #include "go-lang.h"
62 #include "bfd.h" /* Required by objfiles.h. */
63 #include "symfile.h" /* Required by objfiles.h. */
64 #include "objfiles.h" /* For have_full_symbols and have_partial_symbols */
65 #include "charset.h"
66 #include "block.h"
67 #include "expop.h"
69 #define parse_type(ps) builtin_type (ps->gdbarch ())
71 /* Remap normal yacc parser interface names (yyparse, yylex, yyerror,
72 etc). */
73 #define GDB_YY_REMAP_PREFIX go_
74 #include "yy-remap.h"
76 /* The state of the parser, used internally when we are parsing the
77 expression. */
79 static struct parser_state *pstate = NULL;
81 int yyparse (void);
83 static int yylex (void);
85 static void yyerror (const char *);
89 /* Although the yacc "value" of an expression is not used,
90 since the result is stored in the structure being created,
91 other node types do have values. */
93 %union
95 LONGEST lval;
96 struct {
97 LONGEST val;
98 struct type *type;
99 } typed_val_int;
100 struct {
101 gdb_byte val[16];
102 struct type *type;
103 } typed_val_float;
104 struct stoken sval;
105 struct symtoken ssym;
106 struct type *tval;
107 struct typed_stoken tsval;
108 struct ttype tsym;
109 int voidval;
110 enum exp_opcode opcode;
111 struct internalvar *ivar;
112 struct stoken_vector svec;
116 /* YYSTYPE gets defined by %union. */
117 static int parse_number (struct parser_state *,
118 const char *, int, int, YYSTYPE *);
120 using namespace expr;
123 %type <voidval> exp exp1 type_exp start variable lcurly
124 %type <lval> rcurly
125 %type <tval> type
127 %token <typed_val_int> INT
128 %token <typed_val_float> FLOAT
130 /* Both NAME and TYPENAME tokens represent symbols in the input,
131 and both convey their data as strings.
132 But a TYPENAME is a string that happens to be defined as a type
133 or builtin type name (such as int or char)
134 and a NAME is any other symbol.
135 Contexts where this distinction is not important can use the
136 nonterminal "name", which matches either NAME or TYPENAME. */
138 %token <tsval> RAW_STRING
139 %token <tsval> STRING
140 %token <tsval> CHAR
141 %token <ssym> NAME
142 %token <tsym> TYPENAME /* Not TYPE_NAME cus already taken. */
143 %token <voidval> COMPLETE
144 /*%type <sval> name*/
145 %type <svec> string_exp
146 %type <ssym> name_not_typename
148 /* A NAME_OR_INT is a symbol which is not known in the symbol table,
149 but which would parse as a valid number in the current input radix.
150 E.g. "c" when input_radix==16. Depending on the parse, it will be
151 turned into a name or into a number. */
152 %token <ssym> NAME_OR_INT
154 %token <lval> TRUE_KEYWORD FALSE_KEYWORD
155 %token STRUCT_KEYWORD INTERFACE_KEYWORD TYPE_KEYWORD CHAN_KEYWORD
156 %token SIZEOF_KEYWORD
157 %token LEN_KEYWORD CAP_KEYWORD
158 %token NEW_KEYWORD
159 %token IOTA_KEYWORD NIL_KEYWORD
160 %token CONST_KEYWORD
161 %token DOTDOTDOT
162 %token ENTRY
163 %token ERROR
165 /* Special type cases. */
166 %token BYTE_KEYWORD /* An alias of uint8. */
168 %token <sval> DOLLAR_VARIABLE
170 %token <opcode> ASSIGN_MODIFY
172 %left ','
173 %left ABOVE_COMMA
174 %right '=' ASSIGN_MODIFY
175 %right '?'
176 %left OROR
177 %left ANDAND
178 %left '|'
179 %left '^'
180 %left '&'
181 %left ANDNOT
182 %left EQUAL NOTEQUAL
183 %left '<' '>' LEQ GEQ
184 %left LSH RSH
185 %left '@'
186 %left '+' '-'
187 %left '*' '/' '%'
188 %right UNARY INCREMENT DECREMENT
189 %right LEFT_ARROW '.' '[' '('
194 start : exp1
195 | type_exp
198 type_exp: type
199 { pstate->push_new<type_operation> ($1); }
202 /* Expressions, including the comma operator. */
203 exp1 : exp
204 | exp1 ',' exp
205 { pstate->wrap2<comma_operation> (); }
208 /* Expressions, not including the comma operator. */
209 exp : '*' exp %prec UNARY
210 { pstate->wrap<unop_ind_operation> (); }
213 exp : '&' exp %prec UNARY
214 { pstate->wrap<unop_addr_operation> (); }
217 exp : '-' exp %prec UNARY
218 { pstate->wrap<unary_neg_operation> (); }
221 exp : '+' exp %prec UNARY
222 { pstate->wrap<unary_plus_operation> (); }
225 exp : '!' exp %prec UNARY
226 { pstate->wrap<unary_logical_not_operation> (); }
229 exp : '^' exp %prec UNARY
230 { pstate->wrap<unary_complement_operation> (); }
233 exp : exp INCREMENT %prec UNARY
234 { pstate->wrap<postinc_operation> (); }
237 exp : exp DECREMENT %prec UNARY
238 { pstate->wrap<postdec_operation> (); }
241 /* foo->bar is not in Go. May want as a gdb extension. Later. */
243 exp : exp '.' name_not_typename
245 pstate->push_new<structop_operation>
246 (pstate->pop (), copy_name ($3.stoken));
250 exp : exp '.' name_not_typename COMPLETE
252 structop_base_operation *op
253 = new structop_operation (pstate->pop (),
254 copy_name ($3.stoken));
255 pstate->mark_struct_expression (op);
256 pstate->push (operation_up (op));
260 exp : exp '.' COMPLETE
262 structop_base_operation *op
263 = new structop_operation (pstate->pop (), "");
264 pstate->mark_struct_expression (op);
265 pstate->push (operation_up (op));
269 exp : exp '[' exp1 ']'
270 { pstate->wrap2<subscript_operation> (); }
273 exp : exp '('
274 /* This is to save the value of arglist_len
275 being accumulated by an outer function call. */
276 { pstate->start_arglist (); }
277 arglist ')' %prec LEFT_ARROW
279 std::vector<operation_up> args
280 = pstate->pop_vector (pstate->end_arglist ());
281 pstate->push_new<funcall_operation>
282 (pstate->pop (), std::move (args));
286 lcurly : '{'
287 { pstate->start_arglist (); }
290 arglist :
293 arglist : exp
294 { pstate->arglist_len = 1; }
297 arglist : arglist ',' exp %prec ABOVE_COMMA
298 { pstate->arglist_len++; }
301 rcurly : '}'
302 { $$ = pstate->end_arglist () - 1; }
305 exp : lcurly type rcurly exp %prec UNARY
307 pstate->push_new<unop_memval_operation>
308 (pstate->pop (), $2);
312 exp : type '(' exp ')' %prec UNARY
314 pstate->push_new<unop_cast_operation>
315 (pstate->pop (), $1);
319 exp : '(' exp1 ')'
323 /* Binary operators in order of decreasing precedence. */
325 exp : exp '@' exp
326 { pstate->wrap2<repeat_operation> (); }
329 exp : exp '*' exp
330 { pstate->wrap2<mul_operation> (); }
333 exp : exp '/' exp
334 { pstate->wrap2<div_operation> (); }
337 exp : exp '%' exp
338 { pstate->wrap2<rem_operation> (); }
341 exp : exp '+' exp
342 { pstate->wrap2<add_operation> (); }
345 exp : exp '-' exp
346 { pstate->wrap2<sub_operation> (); }
349 exp : exp LSH exp
350 { pstate->wrap2<lsh_operation> (); }
353 exp : exp RSH exp
354 { pstate->wrap2<rsh_operation> (); }
357 exp : exp EQUAL exp
358 { pstate->wrap2<equal_operation> (); }
361 exp : exp NOTEQUAL exp
362 { pstate->wrap2<notequal_operation> (); }
365 exp : exp LEQ exp
366 { pstate->wrap2<leq_operation> (); }
369 exp : exp GEQ exp
370 { pstate->wrap2<geq_operation> (); }
373 exp : exp '<' exp
374 { pstate->wrap2<less_operation> (); }
377 exp : exp '>' exp
378 { pstate->wrap2<gtr_operation> (); }
381 exp : exp '&' exp
382 { pstate->wrap2<bitwise_and_operation> (); }
385 exp : exp '^' exp
386 { pstate->wrap2<bitwise_xor_operation> (); }
389 exp : exp '|' exp
390 { pstate->wrap2<bitwise_ior_operation> (); }
393 exp : exp ANDAND exp
394 { pstate->wrap2<logical_and_operation> (); }
397 exp : exp OROR exp
398 { pstate->wrap2<logical_or_operation> (); }
401 exp : exp '?' exp ':' exp %prec '?'
403 operation_up last = pstate->pop ();
404 operation_up mid = pstate->pop ();
405 operation_up first = pstate->pop ();
406 pstate->push_new<ternop_cond_operation>
407 (std::move (first), std::move (mid),
408 std::move (last));
412 exp : exp '=' exp
413 { pstate->wrap2<assign_operation> (); }
416 exp : exp ASSIGN_MODIFY exp
418 operation_up rhs = pstate->pop ();
419 operation_up lhs = pstate->pop ();
420 pstate->push_new<assign_modify_operation>
421 ($2, std::move (lhs), std::move (rhs));
425 exp : INT
427 pstate->push_new<long_const_operation>
428 ($1.type, $1.val);
432 exp : CHAR
434 struct stoken_vector vec;
435 vec.len = 1;
436 vec.tokens = &$1;
437 pstate->push_c_string ($1.type, &vec);
441 exp : NAME_OR_INT
442 { YYSTYPE val;
443 parse_number (pstate, $1.stoken.ptr,
444 $1.stoken.length, 0, &val);
445 pstate->push_new<long_const_operation>
446 (val.typed_val_int.type,
447 val.typed_val_int.val);
452 exp : FLOAT
454 float_data data;
455 std::copy (std::begin ($1.val), std::end ($1.val),
456 std::begin (data));
457 pstate->push_new<float_const_operation> ($1.type, data);
461 exp : variable
464 exp : DOLLAR_VARIABLE
466 pstate->push_dollar ($1);
470 exp : SIZEOF_KEYWORD '(' type ')' %prec UNARY
472 /* TODO(dje): Go objects in structs. */
473 /* TODO(dje): What's the right type here? */
474 struct type *size_type
475 = parse_type (pstate)->builtin_unsigned_int;
476 $3 = check_typedef ($3);
477 pstate->push_new<long_const_operation>
478 (size_type, (LONGEST) TYPE_LENGTH ($3));
482 exp : SIZEOF_KEYWORD '(' exp ')' %prec UNARY
484 /* TODO(dje): Go objects in structs. */
485 pstate->wrap<unop_sizeof_operation> ();
488 string_exp:
489 STRING
491 /* We copy the string here, and not in the
492 lexer, to guarantee that we do not leak a
493 string. */
494 /* Note that we NUL-terminate here, but just
495 for convenience. */
496 struct typed_stoken *vec = XNEW (struct typed_stoken);
497 $$.len = 1;
498 $$.tokens = vec;
500 vec->type = $1.type;
501 vec->length = $1.length;
502 vec->ptr = (char *) malloc ($1.length + 1);
503 memcpy (vec->ptr, $1.ptr, $1.length + 1);
506 | string_exp '+' STRING
508 /* Note that we NUL-terminate here, but just
509 for convenience. */
510 char *p;
511 ++$$.len;
512 $$.tokens = XRESIZEVEC (struct typed_stoken,
513 $$.tokens, $$.len);
515 p = (char *) malloc ($3.length + 1);
516 memcpy (p, $3.ptr, $3.length + 1);
518 $$.tokens[$$.len - 1].type = $3.type;
519 $$.tokens[$$.len - 1].length = $3.length;
520 $$.tokens[$$.len - 1].ptr = p;
524 exp : string_exp %prec ABOVE_COMMA
526 int i;
528 /* Always utf8. */
529 pstate->push_c_string (0, &$1);
530 for (i = 0; i < $1.len; ++i)
531 free ($1.tokens[i].ptr);
532 free ($1.tokens);
536 exp : TRUE_KEYWORD
537 { pstate->push_new<bool_operation> ($1); }
540 exp : FALSE_KEYWORD
541 { pstate->push_new<bool_operation> ($1); }
544 variable: name_not_typename ENTRY
545 { struct symbol *sym = $1.sym.symbol;
547 if (sym == NULL
548 || !SYMBOL_IS_ARGUMENT (sym)
549 || !symbol_read_needs_frame (sym))
550 error (_("@entry can be used only for function "
551 "parameters, not for \"%s\""),
552 copy_name ($1.stoken).c_str ());
554 pstate->push_new<var_entry_value_operation> (sym);
558 variable: name_not_typename
559 { struct block_symbol sym = $1.sym;
561 if (sym.symbol)
563 if (symbol_read_needs_frame (sym.symbol))
564 pstate->block_tracker->update (sym);
566 pstate->push_new<var_value_operation> (sym);
568 else if ($1.is_a_field_of_this)
570 /* TODO(dje): Can we get here?
571 E.g., via a mix of c++ and go? */
572 gdb_assert_not_reached ("go with `this' field");
574 else
576 struct bound_minimal_symbol msymbol;
577 std::string arg = copy_name ($1.stoken);
579 msymbol =
580 lookup_bound_minimal_symbol (arg.c_str ());
581 if (msymbol.minsym != NULL)
582 pstate->push_new<var_msym_value_operation>
583 (msymbol);
584 else if (!have_full_symbols ()
585 && !have_partial_symbols ())
586 error (_("No symbol table is loaded. "
587 "Use the \"file\" command."));
588 else
589 error (_("No symbol \"%s\" in current context."),
590 arg.c_str ());
595 /* TODO
596 method_exp: PACKAGENAME '.' name '.' name
602 type /* Implements (approximately): [*] type-specifier */
603 : '*' type
604 { $$ = lookup_pointer_type ($2); }
605 | TYPENAME
606 { $$ = $1.type; }
608 | STRUCT_KEYWORD name
609 { $$ = lookup_struct (copy_name ($2),
610 expression_context_block); }
612 | BYTE_KEYWORD
613 { $$ = builtin_go_type (pstate->gdbarch ())
614 ->builtin_uint8; }
617 /* TODO
618 name : NAME { $$ = $1.stoken; }
619 | TYPENAME { $$ = $1.stoken; }
620 | NAME_OR_INT { $$ = $1.stoken; }
624 name_not_typename
625 : NAME
626 /* These would be useful if name_not_typename was useful, but it is just
627 a fake for "variable", so these cause reduce/reduce conflicts because
628 the parser can't tell whether NAME_OR_INT is a name_not_typename (=variable,
629 =exp) or just an exp. If name_not_typename was ever used in an lvalue
630 context where only a name could occur, this might be useful.
631 | NAME_OR_INT
637 /* Take care of parsing a number (anything that starts with a digit).
638 Set yylval and return the token type; update lexptr.
639 LEN is the number of characters in it. */
641 /* FIXME: Needs some error checking for the float case. */
642 /* FIXME(dje): IWBN to use c-exp.y's parse_number if we could.
643 That will require moving the guts into a function that we both call
644 as our YYSTYPE is different than c-exp.y's */
646 static int
647 parse_number (struct parser_state *par_state,
648 const char *p, int len, int parsed_float, YYSTYPE *putithere)
650 /* FIXME: Shouldn't these be unsigned? We don't deal with negative values
651 here, and we do kind of silly things like cast to unsigned. */
652 LONGEST n = 0;
653 LONGEST prevn = 0;
654 ULONGEST un;
656 int i = 0;
657 int c;
658 int base = input_radix;
659 int unsigned_p = 0;
661 /* Number of "L" suffixes encountered. */
662 int long_p = 0;
664 /* We have found a "L" or "U" suffix. */
665 int found_suffix = 0;
667 ULONGEST high_bit;
668 struct type *signed_type;
669 struct type *unsigned_type;
671 if (parsed_float)
673 const struct builtin_go_type *builtin_go_types
674 = builtin_go_type (par_state->gdbarch ());
676 /* Handle suffixes: 'f' for float32, 'l' for long double.
677 FIXME: This appears to be an extension -- do we want this? */
678 if (len >= 1 && tolower (p[len - 1]) == 'f')
680 putithere->typed_val_float.type
681 = builtin_go_types->builtin_float32;
682 len--;
684 else if (len >= 1 && tolower (p[len - 1]) == 'l')
686 putithere->typed_val_float.type
687 = parse_type (par_state)->builtin_long_double;
688 len--;
690 /* Default type for floating-point literals is float64. */
691 else
693 putithere->typed_val_float.type
694 = builtin_go_types->builtin_float64;
697 if (!parse_float (p, len,
698 putithere->typed_val_float.type,
699 putithere->typed_val_float.val))
700 return ERROR;
701 return FLOAT;
704 /* Handle base-switching prefixes 0x, 0t, 0d, 0. */
705 if (p[0] == '0')
706 switch (p[1])
708 case 'x':
709 case 'X':
710 if (len >= 3)
712 p += 2;
713 base = 16;
714 len -= 2;
716 break;
718 case 'b':
719 case 'B':
720 if (len >= 3)
722 p += 2;
723 base = 2;
724 len -= 2;
726 break;
728 case 't':
729 case 'T':
730 case 'd':
731 case 'D':
732 if (len >= 3)
734 p += 2;
735 base = 10;
736 len -= 2;
738 break;
740 default:
741 base = 8;
742 break;
745 while (len-- > 0)
747 c = *p++;
748 if (c >= 'A' && c <= 'Z')
749 c += 'a' - 'A';
750 if (c != 'l' && c != 'u')
751 n *= base;
752 if (c >= '0' && c <= '9')
754 if (found_suffix)
755 return ERROR;
756 n += i = c - '0';
758 else
760 if (base > 10 && c >= 'a' && c <= 'f')
762 if (found_suffix)
763 return ERROR;
764 n += i = c - 'a' + 10;
766 else if (c == 'l')
768 ++long_p;
769 found_suffix = 1;
771 else if (c == 'u')
773 unsigned_p = 1;
774 found_suffix = 1;
776 else
777 return ERROR; /* Char not a digit */
779 if (i >= base)
780 return ERROR; /* Invalid digit in this base. */
782 /* Portably test for overflow (only works for nonzero values, so make
783 a second check for zero). FIXME: Can't we just make n and prevn
784 unsigned and avoid this? */
785 if (c != 'l' && c != 'u' && (prevn >= n) && n != 0)
786 unsigned_p = 1; /* Try something unsigned. */
788 /* Portably test for unsigned overflow.
789 FIXME: This check is wrong; for example it doesn't find overflow
790 on 0x123456789 when LONGEST is 32 bits. */
791 if (c != 'l' && c != 'u' && n != 0)
793 if ((unsigned_p && (ULONGEST) prevn >= (ULONGEST) n))
794 error (_("Numeric constant too large."));
796 prevn = n;
799 /* An integer constant is an int, a long, or a long long. An L
800 suffix forces it to be long; an LL suffix forces it to be long
801 long. If not forced to a larger size, it gets the first type of
802 the above that it fits in. To figure out whether it fits, we
803 shift it right and see whether anything remains. Note that we
804 can't shift sizeof (LONGEST) * HOST_CHAR_BIT bits or more in one
805 operation, because many compilers will warn about such a shift
806 (which always produces a zero result). Sometimes gdbarch_int_bit
807 or gdbarch_long_bit will be that big, sometimes not. To deal with
808 the case where it is we just always shift the value more than
809 once, with fewer bits each time. */
811 un = (ULONGEST)n >> 2;
812 if (long_p == 0
813 && (un >> (gdbarch_int_bit (par_state->gdbarch ()) - 2)) == 0)
815 high_bit
816 = ((ULONGEST)1) << (gdbarch_int_bit (par_state->gdbarch ()) - 1);
818 /* A large decimal (not hex or octal) constant (between INT_MAX
819 and UINT_MAX) is a long or unsigned long, according to ANSI,
820 never an unsigned int, but this code treats it as unsigned
821 int. This probably should be fixed. GCC gives a warning on
822 such constants. */
824 unsigned_type = parse_type (par_state)->builtin_unsigned_int;
825 signed_type = parse_type (par_state)->builtin_int;
827 else if (long_p <= 1
828 && (un >> (gdbarch_long_bit (par_state->gdbarch ()) - 2)) == 0)
830 high_bit
831 = ((ULONGEST)1) << (gdbarch_long_bit (par_state->gdbarch ()) - 1);
832 unsigned_type = parse_type (par_state)->builtin_unsigned_long;
833 signed_type = parse_type (par_state)->builtin_long;
835 else
837 int shift;
838 if (sizeof (ULONGEST) * HOST_CHAR_BIT
839 < gdbarch_long_long_bit (par_state->gdbarch ()))
840 /* A long long does not fit in a LONGEST. */
841 shift = (sizeof (ULONGEST) * HOST_CHAR_BIT - 1);
842 else
843 shift = (gdbarch_long_long_bit (par_state->gdbarch ()) - 1);
844 high_bit = (ULONGEST) 1 << shift;
845 unsigned_type = parse_type (par_state)->builtin_unsigned_long_long;
846 signed_type = parse_type (par_state)->builtin_long_long;
849 putithere->typed_val_int.val = n;
851 /* If the high bit of the worked out type is set then this number
852 has to be unsigned. */
854 if (unsigned_p || (n & high_bit))
856 putithere->typed_val_int.type = unsigned_type;
858 else
860 putithere->typed_val_int.type = signed_type;
863 return INT;
866 /* Temporary obstack used for holding strings. */
867 static struct obstack tempbuf;
868 static int tempbuf_init;
870 /* Parse a string or character literal from TOKPTR. The string or
871 character may be wide or unicode. *OUTPTR is set to just after the
872 end of the literal in the input string. The resulting token is
873 stored in VALUE. This returns a token value, either STRING or
874 CHAR, depending on what was parsed. *HOST_CHARS is set to the
875 number of host characters in the literal. */
877 static int
878 parse_string_or_char (const char *tokptr, const char **outptr,
879 struct typed_stoken *value, int *host_chars)
881 int quote;
883 /* Build the gdb internal form of the input string in tempbuf. Note
884 that the buffer is null byte terminated *only* for the
885 convenience of debugging gdb itself and printing the buffer
886 contents when the buffer contains no embedded nulls. Gdb does
887 not depend upon the buffer being null byte terminated, it uses
888 the length string instead. This allows gdb to handle C strings
889 (as well as strings in other languages) with embedded null
890 bytes */
892 if (!tempbuf_init)
893 tempbuf_init = 1;
894 else
895 obstack_free (&tempbuf, NULL);
896 obstack_init (&tempbuf);
898 /* Skip the quote. */
899 quote = *tokptr;
900 ++tokptr;
902 *host_chars = 0;
904 while (*tokptr)
906 char c = *tokptr;
907 if (c == '\\')
909 ++tokptr;
910 *host_chars += c_parse_escape (&tokptr, &tempbuf);
912 else if (c == quote)
913 break;
914 else
916 obstack_1grow (&tempbuf, c);
917 ++tokptr;
918 /* FIXME: this does the wrong thing with multi-byte host
919 characters. We could use mbrlen here, but that would
920 make "set host-charset" a bit less useful. */
921 ++*host_chars;
925 if (*tokptr != quote)
927 if (quote == '"')
928 error (_("Unterminated string in expression."));
929 else
930 error (_("Unmatched single quote."));
932 ++tokptr;
934 value->type = (int) C_STRING | (quote == '\'' ? C_CHAR : 0); /*FIXME*/
935 value->ptr = (char *) obstack_base (&tempbuf);
936 value->length = obstack_object_size (&tempbuf);
938 *outptr = tokptr;
940 return quote == '\'' ? CHAR : STRING;
943 struct token
945 const char *oper;
946 int token;
947 enum exp_opcode opcode;
950 static const struct token tokentab3[] =
952 {">>=", ASSIGN_MODIFY, BINOP_RSH},
953 {"<<=", ASSIGN_MODIFY, BINOP_LSH},
954 /*{"&^=", ASSIGN_MODIFY, BINOP_BITWISE_ANDNOT}, TODO */
955 {"...", DOTDOTDOT, OP_NULL},
958 static const struct token tokentab2[] =
960 {"+=", ASSIGN_MODIFY, BINOP_ADD},
961 {"-=", ASSIGN_MODIFY, BINOP_SUB},
962 {"*=", ASSIGN_MODIFY, BINOP_MUL},
963 {"/=", ASSIGN_MODIFY, BINOP_DIV},
964 {"%=", ASSIGN_MODIFY, BINOP_REM},
965 {"|=", ASSIGN_MODIFY, BINOP_BITWISE_IOR},
966 {"&=", ASSIGN_MODIFY, BINOP_BITWISE_AND},
967 {"^=", ASSIGN_MODIFY, BINOP_BITWISE_XOR},
968 {"++", INCREMENT, OP_NULL},
969 {"--", DECREMENT, OP_NULL},
970 /*{"->", RIGHT_ARROW, OP_NULL}, Doesn't exist in Go. */
971 {"<-", LEFT_ARROW, OP_NULL},
972 {"&&", ANDAND, OP_NULL},
973 {"||", OROR, OP_NULL},
974 {"<<", LSH, OP_NULL},
975 {">>", RSH, OP_NULL},
976 {"==", EQUAL, OP_NULL},
977 {"!=", NOTEQUAL, OP_NULL},
978 {"<=", LEQ, OP_NULL},
979 {">=", GEQ, OP_NULL},
980 /*{"&^", ANDNOT, OP_NULL}, TODO */
983 /* Identifier-like tokens. */
984 static const struct token ident_tokens[] =
986 {"true", TRUE_KEYWORD, OP_NULL},
987 {"false", FALSE_KEYWORD, OP_NULL},
988 {"nil", NIL_KEYWORD, OP_NULL},
989 {"const", CONST_KEYWORD, OP_NULL},
990 {"struct", STRUCT_KEYWORD, OP_NULL},
991 {"type", TYPE_KEYWORD, OP_NULL},
992 {"interface", INTERFACE_KEYWORD, OP_NULL},
993 {"chan", CHAN_KEYWORD, OP_NULL},
994 {"byte", BYTE_KEYWORD, OP_NULL}, /* An alias of uint8. */
995 {"len", LEN_KEYWORD, OP_NULL},
996 {"cap", CAP_KEYWORD, OP_NULL},
997 {"new", NEW_KEYWORD, OP_NULL},
998 {"iota", IOTA_KEYWORD, OP_NULL},
1001 /* This is set if a NAME token appeared at the very end of the input
1002 string, with no whitespace separating the name from the EOF. This
1003 is used only when parsing to do field name completion. */
1004 static int saw_name_at_eof;
1006 /* This is set if the previously-returned token was a structure
1007 operator -- either '.' or ARROW. This is used only when parsing to
1008 do field name completion. */
1009 static int last_was_structop;
1011 /* Depth of parentheses. */
1012 static int paren_depth;
1014 /* Read one token, getting characters through lexptr. */
1016 static int
1017 lex_one_token (struct parser_state *par_state)
1019 int c;
1020 int namelen;
1021 unsigned int i;
1022 const char *tokstart;
1023 int saw_structop = last_was_structop;
1025 last_was_structop = 0;
1027 retry:
1029 par_state->prev_lexptr = par_state->lexptr;
1031 tokstart = par_state->lexptr;
1032 /* See if it is a special token of length 3. */
1033 for (i = 0; i < sizeof (tokentab3) / sizeof (tokentab3[0]); i++)
1034 if (strncmp (tokstart, tokentab3[i].oper, 3) == 0)
1036 par_state->lexptr += 3;
1037 yylval.opcode = tokentab3[i].opcode;
1038 return tokentab3[i].token;
1041 /* See if it is a special token of length 2. */
1042 for (i = 0; i < sizeof (tokentab2) / sizeof (tokentab2[0]); i++)
1043 if (strncmp (tokstart, tokentab2[i].oper, 2) == 0)
1045 par_state->lexptr += 2;
1046 yylval.opcode = tokentab2[i].opcode;
1047 /* NOTE: -> doesn't exist in Go, so we don't need to watch for
1048 setting last_was_structop here. */
1049 return tokentab2[i].token;
1052 switch (c = *tokstart)
1054 case 0:
1055 if (saw_name_at_eof)
1057 saw_name_at_eof = 0;
1058 return COMPLETE;
1060 else if (saw_structop)
1061 return COMPLETE;
1062 else
1063 return 0;
1065 case ' ':
1066 case '\t':
1067 case '\n':
1068 par_state->lexptr++;
1069 goto retry;
1071 case '[':
1072 case '(':
1073 paren_depth++;
1074 par_state->lexptr++;
1075 return c;
1077 case ']':
1078 case ')':
1079 if (paren_depth == 0)
1080 return 0;
1081 paren_depth--;
1082 par_state->lexptr++;
1083 return c;
1085 case ',':
1086 if (pstate->comma_terminates
1087 && paren_depth == 0)
1088 return 0;
1089 par_state->lexptr++;
1090 return c;
1092 case '.':
1093 /* Might be a floating point number. */
1094 if (par_state->lexptr[1] < '0' || par_state->lexptr[1] > '9')
1096 if (pstate->parse_completion)
1097 last_was_structop = 1;
1098 goto symbol; /* Nope, must be a symbol. */
1100 /* FALL THRU. */
1102 case '0':
1103 case '1':
1104 case '2':
1105 case '3':
1106 case '4':
1107 case '5':
1108 case '6':
1109 case '7':
1110 case '8':
1111 case '9':
1113 /* It's a number. */
1114 int got_dot = 0, got_e = 0, toktype;
1115 const char *p = tokstart;
1116 int hex = input_radix > 10;
1118 if (c == '0' && (p[1] == 'x' || p[1] == 'X'))
1120 p += 2;
1121 hex = 1;
1124 for (;; ++p)
1126 /* This test includes !hex because 'e' is a valid hex digit
1127 and thus does not indicate a floating point number when
1128 the radix is hex. */
1129 if (!hex && !got_e && (*p == 'e' || *p == 'E'))
1130 got_dot = got_e = 1;
1131 /* This test does not include !hex, because a '.' always indicates
1132 a decimal floating point number regardless of the radix. */
1133 else if (!got_dot && *p == '.')
1134 got_dot = 1;
1135 else if (got_e && (p[-1] == 'e' || p[-1] == 'E')
1136 && (*p == '-' || *p == '+'))
1137 /* This is the sign of the exponent, not the end of the
1138 number. */
1139 continue;
1140 /* We will take any letters or digits. parse_number will
1141 complain if past the radix, or if L or U are not final. */
1142 else if ((*p < '0' || *p > '9')
1143 && ((*p < 'a' || *p > 'z')
1144 && (*p < 'A' || *p > 'Z')))
1145 break;
1147 toktype = parse_number (par_state, tokstart, p - tokstart,
1148 got_dot|got_e, &yylval);
1149 if (toktype == ERROR)
1151 char *err_copy = (char *) alloca (p - tokstart + 1);
1153 memcpy (err_copy, tokstart, p - tokstart);
1154 err_copy[p - tokstart] = 0;
1155 error (_("Invalid number \"%s\"."), err_copy);
1157 par_state->lexptr = p;
1158 return toktype;
1161 case '@':
1163 const char *p = &tokstart[1];
1164 size_t len = strlen ("entry");
1166 while (isspace (*p))
1167 p++;
1168 if (strncmp (p, "entry", len) == 0 && !isalnum (p[len])
1169 && p[len] != '_')
1171 par_state->lexptr = &p[len];
1172 return ENTRY;
1175 /* FALLTHRU */
1176 case '+':
1177 case '-':
1178 case '*':
1179 case '/':
1180 case '%':
1181 case '|':
1182 case '&':
1183 case '^':
1184 case '~':
1185 case '!':
1186 case '<':
1187 case '>':
1188 case '?':
1189 case ':':
1190 case '=':
1191 case '{':
1192 case '}':
1193 symbol:
1194 par_state->lexptr++;
1195 return c;
1197 case '\'':
1198 case '"':
1199 case '`':
1201 int host_len;
1202 int result = parse_string_or_char (tokstart, &par_state->lexptr,
1203 &yylval.tsval, &host_len);
1204 if (result == CHAR)
1206 if (host_len == 0)
1207 error (_("Empty character constant."));
1208 else if (host_len > 2 && c == '\'')
1210 ++tokstart;
1211 namelen = par_state->lexptr - tokstart - 1;
1212 goto tryname;
1214 else if (host_len > 1)
1215 error (_("Invalid character constant."));
1217 return result;
1221 if (!(c == '_' || c == '$'
1222 || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
1223 /* We must have come across a bad character (e.g. ';'). */
1224 error (_("Invalid character '%c' in expression."), c);
1226 /* It's a name. See how long it is. */
1227 namelen = 0;
1228 for (c = tokstart[namelen];
1229 (c == '_' || c == '$' || (c >= '0' && c <= '9')
1230 || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'));)
1232 c = tokstart[++namelen];
1235 /* The token "if" terminates the expression and is NOT removed from
1236 the input stream. It doesn't count if it appears in the
1237 expansion of a macro. */
1238 if (namelen == 2
1239 && tokstart[0] == 'i'
1240 && tokstart[1] == 'f')
1242 return 0;
1245 /* For the same reason (breakpoint conditions), "thread N"
1246 terminates the expression. "thread" could be an identifier, but
1247 an identifier is never followed by a number without intervening
1248 punctuation.
1249 Handle abbreviations of these, similarly to
1250 breakpoint.c:find_condition_and_thread.
1251 TODO: Watch for "goroutine" here? */
1252 if (namelen >= 1
1253 && strncmp (tokstart, "thread", namelen) == 0
1254 && (tokstart[namelen] == ' ' || tokstart[namelen] == '\t'))
1256 const char *p = tokstart + namelen + 1;
1258 while (*p == ' ' || *p == '\t')
1259 p++;
1260 if (*p >= '0' && *p <= '9')
1261 return 0;
1264 par_state->lexptr += namelen;
1266 tryname:
1268 yylval.sval.ptr = tokstart;
1269 yylval.sval.length = namelen;
1271 /* Catch specific keywords. */
1272 std::string copy = copy_name (yylval.sval);
1273 for (i = 0; i < sizeof (ident_tokens) / sizeof (ident_tokens[0]); i++)
1274 if (copy == ident_tokens[i].oper)
1276 /* It is ok to always set this, even though we don't always
1277 strictly need to. */
1278 yylval.opcode = ident_tokens[i].opcode;
1279 return ident_tokens[i].token;
1282 if (*tokstart == '$')
1283 return DOLLAR_VARIABLE;
1285 if (pstate->parse_completion && *par_state->lexptr == '\0')
1286 saw_name_at_eof = 1;
1287 return NAME;
1290 /* An object of this type is pushed on a FIFO by the "outer" lexer. */
1291 struct token_and_value
1293 int token;
1294 YYSTYPE value;
1297 /* A FIFO of tokens that have been read but not yet returned to the
1298 parser. */
1299 static std::vector<token_and_value> token_fifo;
1301 /* Non-zero if the lexer should return tokens from the FIFO. */
1302 static int popping;
1304 /* Temporary storage for yylex; this holds symbol names as they are
1305 built up. */
1306 static auto_obstack name_obstack;
1308 /* Build "package.name" in name_obstack.
1309 For convenience of the caller, the name is NUL-terminated,
1310 but the NUL is not included in the recorded length. */
1312 static struct stoken
1313 build_packaged_name (const char *package, int package_len,
1314 const char *name, int name_len)
1316 struct stoken result;
1318 name_obstack.clear ();
1319 obstack_grow (&name_obstack, package, package_len);
1320 obstack_grow_str (&name_obstack, ".");
1321 obstack_grow (&name_obstack, name, name_len);
1322 obstack_grow (&name_obstack, "", 1);
1323 result.ptr = (char *) obstack_base (&name_obstack);
1324 result.length = obstack_object_size (&name_obstack) - 1;
1326 return result;
1329 /* Return non-zero if NAME is a package name.
1330 BLOCK is the scope in which to interpret NAME; this can be NULL
1331 to mean the global scope. */
1333 static int
1334 package_name_p (const char *name, const struct block *block)
1336 struct symbol *sym;
1337 struct field_of_this_result is_a_field_of_this;
1339 sym = lookup_symbol (name, block, STRUCT_DOMAIN, &is_a_field_of_this).symbol;
1341 if (sym
1342 && SYMBOL_CLASS (sym) == LOC_TYPEDEF
1343 && SYMBOL_TYPE (sym)->code () == TYPE_CODE_MODULE)
1344 return 1;
1346 return 0;
1349 /* Classify a (potential) function in the "unsafe" package.
1350 We fold these into "keywords" to keep things simple, at least until
1351 something more complex is warranted. */
1353 static int
1354 classify_unsafe_function (struct stoken function_name)
1356 std::string copy = copy_name (function_name);
1358 if (copy == "Sizeof")
1360 yylval.sval = function_name;
1361 return SIZEOF_KEYWORD;
1364 error (_("Unknown function in `unsafe' package: %s"), copy.c_str ());
1367 /* Classify token(s) "name1.name2" where name1 is known to be a package.
1368 The contents of the token are in `yylval'.
1369 Updates yylval and returns the new token type.
1371 The result is one of NAME, NAME_OR_INT, or TYPENAME. */
1373 static int
1374 classify_packaged_name (const struct block *block)
1376 struct block_symbol sym;
1377 struct field_of_this_result is_a_field_of_this;
1379 std::string copy = copy_name (yylval.sval);
1381 sym = lookup_symbol (copy.c_str (), block, VAR_DOMAIN, &is_a_field_of_this);
1383 if (sym.symbol)
1385 yylval.ssym.sym = sym;
1386 yylval.ssym.is_a_field_of_this = is_a_field_of_this.type != NULL;
1389 return NAME;
1392 /* Classify a NAME token.
1393 The contents of the token are in `yylval'.
1394 Updates yylval and returns the new token type.
1395 BLOCK is the block in which lookups start; this can be NULL
1396 to mean the global scope.
1398 The result is one of NAME, NAME_OR_INT, or TYPENAME. */
1400 static int
1401 classify_name (struct parser_state *par_state, const struct block *block)
1403 struct type *type;
1404 struct block_symbol sym;
1405 struct field_of_this_result is_a_field_of_this;
1407 std::string copy = copy_name (yylval.sval);
1409 /* Try primitive types first so they win over bad/weird debug info. */
1410 type = language_lookup_primitive_type (par_state->language (),
1411 par_state->gdbarch (),
1412 copy.c_str ());
1413 if (type != NULL)
1415 /* NOTE: We take advantage of the fact that yylval coming in was a
1416 NAME, and that struct ttype is a compatible extension of struct
1417 stoken, so yylval.tsym.stoken is already filled in. */
1418 yylval.tsym.type = type;
1419 return TYPENAME;
1422 /* TODO: What about other types? */
1424 sym = lookup_symbol (copy.c_str (), block, VAR_DOMAIN, &is_a_field_of_this);
1426 if (sym.symbol)
1428 yylval.ssym.sym = sym;
1429 yylval.ssym.is_a_field_of_this = is_a_field_of_this.type != NULL;
1430 return NAME;
1433 /* If we didn't find a symbol, look again in the current package.
1434 This is to, e.g., make "p global_var" work without having to specify
1435 the package name. We intentionally only looks for objects in the
1436 current package. */
1439 char *current_package_name = go_block_package_name (block);
1441 if (current_package_name != NULL)
1443 struct stoken sval =
1444 build_packaged_name (current_package_name,
1445 strlen (current_package_name),
1446 copy.c_str (), copy.size ());
1448 xfree (current_package_name);
1449 sym = lookup_symbol (sval.ptr, block, VAR_DOMAIN,
1450 &is_a_field_of_this);
1451 if (sym.symbol)
1453 yylval.ssym.stoken = sval;
1454 yylval.ssym.sym = sym;
1455 yylval.ssym.is_a_field_of_this = is_a_field_of_this.type != NULL;
1456 return NAME;
1461 /* Input names that aren't symbols but ARE valid hex numbers, when
1462 the input radix permits them, can be names or numbers depending
1463 on the parse. Note we support radixes > 16 here. */
1464 if ((copy[0] >= 'a' && copy[0] < 'a' + input_radix - 10)
1465 || (copy[0] >= 'A' && copy[0] < 'A' + input_radix - 10))
1467 YYSTYPE newlval; /* Its value is ignored. */
1468 int hextype = parse_number (par_state, copy.c_str (),
1469 yylval.sval.length, 0, &newlval);
1470 if (hextype == INT)
1472 yylval.ssym.sym.symbol = NULL;
1473 yylval.ssym.sym.block = NULL;
1474 yylval.ssym.is_a_field_of_this = 0;
1475 return NAME_OR_INT;
1479 yylval.ssym.sym.symbol = NULL;
1480 yylval.ssym.sym.block = NULL;
1481 yylval.ssym.is_a_field_of_this = 0;
1482 return NAME;
1485 /* This is taken from c-exp.y mostly to get something working.
1486 The basic structure has been kept because we may yet need some of it. */
1488 static int
1489 yylex (void)
1491 token_and_value current, next;
1493 if (popping && !token_fifo.empty ())
1495 token_and_value tv = token_fifo[0];
1496 token_fifo.erase (token_fifo.begin ());
1497 yylval = tv.value;
1498 /* There's no need to fall through to handle package.name
1499 as that can never happen here. In theory. */
1500 return tv.token;
1502 popping = 0;
1504 current.token = lex_one_token (pstate);
1506 /* TODO: Need a way to force specifying name1 as a package.
1507 .name1.name2 ? */
1509 if (current.token != NAME)
1510 return current.token;
1512 /* See if we have "name1 . name2". */
1514 current.value = yylval;
1515 next.token = lex_one_token (pstate);
1516 next.value = yylval;
1518 if (next.token == '.')
1520 token_and_value name2;
1522 name2.token = lex_one_token (pstate);
1523 name2.value = yylval;
1525 if (name2.token == NAME)
1527 /* Ok, we have "name1 . name2". */
1528 std::string copy = copy_name (current.value.sval);
1530 if (copy == "unsafe")
1532 popping = 1;
1533 return classify_unsafe_function (name2.value.sval);
1536 if (package_name_p (copy.c_str (), pstate->expression_context_block))
1538 popping = 1;
1539 yylval.sval = build_packaged_name (current.value.sval.ptr,
1540 current.value.sval.length,
1541 name2.value.sval.ptr,
1542 name2.value.sval.length);
1543 return classify_packaged_name (pstate->expression_context_block);
1547 token_fifo.push_back (next);
1548 token_fifo.push_back (name2);
1550 else
1551 token_fifo.push_back (next);
1553 /* If we arrive here we don't have a package-qualified name. */
1555 popping = 1;
1556 yylval = current.value;
1557 return classify_name (pstate, pstate->expression_context_block);
1560 /* See language.h. */
1563 go_language::parser (struct parser_state *par_state) const
1565 /* Setting up the parser state. */
1566 scoped_restore pstate_restore = make_scoped_restore (&pstate);
1567 gdb_assert (par_state != NULL);
1568 pstate = par_state;
1570 scoped_restore restore_yydebug = make_scoped_restore (&yydebug,
1571 parser_debug);
1573 /* Initialize some state used by the lexer. */
1574 last_was_structop = 0;
1575 saw_name_at_eof = 0;
1576 paren_depth = 0;
1578 token_fifo.clear ();
1579 popping = 0;
1580 name_obstack.clear ();
1582 int result = yyparse ();
1583 if (!result)
1584 pstate->set_operation (pstate->pop ());
1585 return result;
1588 static void
1589 yyerror (const char *msg)
1591 if (pstate->prev_lexptr)
1592 pstate->lexptr = pstate->prev_lexptr;
1594 error (_("A %s in expression, near `%s'."), msg, pstate->lexptr);