[binutils, ARM, 12/16] Scalar Low Overhead loop instructions for Armv8.1-M Mainline
[binutils-gdb.git] / gdb / go-exp.y
blob0084b2f7eb6fe6f56cc780be552e7cdcd690b7f1
1 /* YACC parser for Go expressions, for GDB.
3 Copyright (C) 2012-2019 Free Software Foundation, Inc.
5 This file is part of GDB.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 /* This file is derived from c-exp.y, p-exp.y. */
22 /* Parse a Go expression from text in a string,
23 and return the result as a struct expression pointer.
24 That structure contains arithmetic operations in reverse polish,
25 with constants represented by operations that are followed by special data.
26 See expression.h for the details of the format.
27 What is important here is that it can be built up sequentially
28 during the process of parsing; the lower levels of the tree always
29 come first in the result.
31 Note that malloc's and realloc's in this file are transformed to
32 xmalloc and xrealloc respectively by the same sed command in the
33 makefile that remaps any other malloc/realloc inserted by the parser
34 generator. Doing this with #defines and trying to control the interaction
35 with include files (<malloc.h> and <stdlib.h> for example) just became
36 too messy, particularly when such includes can be inserted at random
37 times by the parser generator. */
39 /* Known bugs or limitations:
41 - Unicode
42 - &^
43 - '_' (blank identifier)
44 - automatic deref of pointers
45 - method expressions
46 - interfaces, channels, etc.
48 And lots of other things.
49 I'm sure there's some cleanup to do.
54 #include "defs.h"
55 #include <ctype.h>
56 #include "expression.h"
57 #include "value.h"
58 #include "parser-defs.h"
59 #include "language.h"
60 #include "c-lang.h"
61 #include "go-lang.h"
62 #include "bfd.h" /* Required by objfiles.h. */
63 #include "symfile.h" /* Required by objfiles.h. */
64 #include "objfiles.h" /* For have_full_symbols and have_partial_symbols */
65 #include "charset.h"
66 #include "block.h"
68 #define parse_type(ps) builtin_type (ps->gdbarch ())
70 /* Remap normal yacc parser interface names (yyparse, yylex, yyerror,
71 etc). */
72 #define GDB_YY_REMAP_PREFIX go_
73 #include "yy-remap.h"
75 /* The state of the parser, used internally when we are parsing the
76 expression. */
78 static struct parser_state *pstate = NULL;
80 int yyparse (void);
82 static int yylex (void);
84 static void yyerror (const char *);
88 /* Although the yacc "value" of an expression is not used,
89 since the result is stored in the structure being created,
90 other node types do have values. */
92 %union
94 LONGEST lval;
95 struct {
96 LONGEST val;
97 struct type *type;
98 } typed_val_int;
99 struct {
100 gdb_byte val[16];
101 struct type *type;
102 } typed_val_float;
103 struct stoken sval;
104 struct symtoken ssym;
105 struct type *tval;
106 struct typed_stoken tsval;
107 struct ttype tsym;
108 int voidval;
109 enum exp_opcode opcode;
110 struct internalvar *ivar;
111 struct stoken_vector svec;
115 /* YYSTYPE gets defined by %union. */
116 static int parse_number (struct parser_state *,
117 const char *, int, int, YYSTYPE *);
120 %type <voidval> exp exp1 type_exp start variable lcurly
121 %type <lval> rcurly
122 %type <tval> type
124 %token <typed_val_int> INT
125 %token <typed_val_float> FLOAT
127 /* Both NAME and TYPENAME tokens represent symbols in the input,
128 and both convey their data as strings.
129 But a TYPENAME is a string that happens to be defined as a type
130 or builtin type name (such as int or char)
131 and a NAME is any other symbol.
132 Contexts where this distinction is not important can use the
133 nonterminal "name", which matches either NAME or TYPENAME. */
135 %token <tsval> RAW_STRING
136 %token <tsval> STRING
137 %token <tsval> CHAR
138 %token <ssym> NAME
139 %token <tsym> TYPENAME /* Not TYPE_NAME cus already taken. */
140 %token <voidval> COMPLETE
141 /*%type <sval> name*/
142 %type <svec> string_exp
143 %type <ssym> name_not_typename
145 /* A NAME_OR_INT is a symbol which is not known in the symbol table,
146 but which would parse as a valid number in the current input radix.
147 E.g. "c" when input_radix==16. Depending on the parse, it will be
148 turned into a name or into a number. */
149 %token <ssym> NAME_OR_INT
151 %token <lval> TRUE_KEYWORD FALSE_KEYWORD
152 %token STRUCT_KEYWORD INTERFACE_KEYWORD TYPE_KEYWORD CHAN_KEYWORD
153 %token SIZEOF_KEYWORD
154 %token LEN_KEYWORD CAP_KEYWORD
155 %token NEW_KEYWORD
156 %token IOTA_KEYWORD NIL_KEYWORD
157 %token CONST_KEYWORD
158 %token DOTDOTDOT
159 %token ENTRY
160 %token ERROR
162 /* Special type cases. */
163 %token BYTE_KEYWORD /* An alias of uint8. */
165 %token <sval> DOLLAR_VARIABLE
167 %token <opcode> ASSIGN_MODIFY
169 %left ','
170 %left ABOVE_COMMA
171 %right '=' ASSIGN_MODIFY
172 %right '?'
173 %left OROR
174 %left ANDAND
175 %left '|'
176 %left '^'
177 %left '&'
178 %left ANDNOT
179 %left EQUAL NOTEQUAL
180 %left '<' '>' LEQ GEQ
181 %left LSH RSH
182 %left '@'
183 %left '+' '-'
184 %left '*' '/' '%'
185 %right UNARY INCREMENT DECREMENT
186 %right LEFT_ARROW '.' '[' '('
191 start : exp1
192 | type_exp
195 type_exp: type
196 { write_exp_elt_opcode (pstate, OP_TYPE);
197 write_exp_elt_type (pstate, $1);
198 write_exp_elt_opcode (pstate, OP_TYPE); }
201 /* Expressions, including the comma operator. */
202 exp1 : exp
203 | exp1 ',' exp
204 { write_exp_elt_opcode (pstate, BINOP_COMMA); }
207 /* Expressions, not including the comma operator. */
208 exp : '*' exp %prec UNARY
209 { write_exp_elt_opcode (pstate, UNOP_IND); }
212 exp : '&' exp %prec UNARY
213 { write_exp_elt_opcode (pstate, UNOP_ADDR); }
216 exp : '-' exp %prec UNARY
217 { write_exp_elt_opcode (pstate, UNOP_NEG); }
220 exp : '+' exp %prec UNARY
221 { write_exp_elt_opcode (pstate, UNOP_PLUS); }
224 exp : '!' exp %prec UNARY
225 { write_exp_elt_opcode (pstate, UNOP_LOGICAL_NOT); }
228 exp : '^' exp %prec UNARY
229 { write_exp_elt_opcode (pstate, UNOP_COMPLEMENT); }
232 exp : exp INCREMENT %prec UNARY
233 { write_exp_elt_opcode (pstate, UNOP_POSTINCREMENT); }
236 exp : exp DECREMENT %prec UNARY
237 { write_exp_elt_opcode (pstate, UNOP_POSTDECREMENT); }
240 /* foo->bar is not in Go. May want as a gdb extension. Later. */
242 exp : exp '.' name_not_typename
243 { write_exp_elt_opcode (pstate, STRUCTOP_STRUCT);
244 write_exp_string (pstate, $3.stoken);
245 write_exp_elt_opcode (pstate, STRUCTOP_STRUCT); }
248 exp : exp '.' name_not_typename COMPLETE
249 { pstate->mark_struct_expression ();
250 write_exp_elt_opcode (pstate, STRUCTOP_STRUCT);
251 write_exp_string (pstate, $3.stoken);
252 write_exp_elt_opcode (pstate, STRUCTOP_STRUCT); }
255 exp : exp '.' COMPLETE
256 { struct stoken s;
257 pstate->mark_struct_expression ();
258 write_exp_elt_opcode (pstate, STRUCTOP_STRUCT);
259 s.ptr = "";
260 s.length = 0;
261 write_exp_string (pstate, s);
262 write_exp_elt_opcode (pstate, STRUCTOP_STRUCT); }
265 exp : exp '[' exp1 ']'
266 { write_exp_elt_opcode (pstate, BINOP_SUBSCRIPT); }
269 exp : exp '('
270 /* This is to save the value of arglist_len
271 being accumulated by an outer function call. */
272 { pstate->start_arglist (); }
273 arglist ')' %prec LEFT_ARROW
274 { write_exp_elt_opcode (pstate, OP_FUNCALL);
275 write_exp_elt_longcst (pstate,
276 pstate->end_arglist ());
277 write_exp_elt_opcode (pstate, OP_FUNCALL); }
280 lcurly : '{'
281 { pstate->start_arglist (); }
284 arglist :
287 arglist : exp
288 { pstate->arglist_len = 1; }
291 arglist : arglist ',' exp %prec ABOVE_COMMA
292 { pstate->arglist_len++; }
295 rcurly : '}'
296 { $$ = pstate->end_arglist () - 1; }
299 exp : lcurly type rcurly exp %prec UNARY
300 { write_exp_elt_opcode (pstate, UNOP_MEMVAL);
301 write_exp_elt_type (pstate, $2);
302 write_exp_elt_opcode (pstate, UNOP_MEMVAL); }
305 exp : type '(' exp ')' %prec UNARY
306 { write_exp_elt_opcode (pstate, UNOP_CAST);
307 write_exp_elt_type (pstate, $1);
308 write_exp_elt_opcode (pstate, UNOP_CAST); }
311 exp : '(' exp1 ')'
315 /* Binary operators in order of decreasing precedence. */
317 exp : exp '@' exp
318 { write_exp_elt_opcode (pstate, BINOP_REPEAT); }
321 exp : exp '*' exp
322 { write_exp_elt_opcode (pstate, BINOP_MUL); }
325 exp : exp '/' exp
326 { write_exp_elt_opcode (pstate, BINOP_DIV); }
329 exp : exp '%' exp
330 { write_exp_elt_opcode (pstate, BINOP_REM); }
333 exp : exp '+' exp
334 { write_exp_elt_opcode (pstate, BINOP_ADD); }
337 exp : exp '-' exp
338 { write_exp_elt_opcode (pstate, BINOP_SUB); }
341 exp : exp LSH exp
342 { write_exp_elt_opcode (pstate, BINOP_LSH); }
345 exp : exp RSH exp
346 { write_exp_elt_opcode (pstate, BINOP_RSH); }
349 exp : exp EQUAL exp
350 { write_exp_elt_opcode (pstate, BINOP_EQUAL); }
353 exp : exp NOTEQUAL exp
354 { write_exp_elt_opcode (pstate, BINOP_NOTEQUAL); }
357 exp : exp LEQ exp
358 { write_exp_elt_opcode (pstate, BINOP_LEQ); }
361 exp : exp GEQ exp
362 { write_exp_elt_opcode (pstate, BINOP_GEQ); }
365 exp : exp '<' exp
366 { write_exp_elt_opcode (pstate, BINOP_LESS); }
369 exp : exp '>' exp
370 { write_exp_elt_opcode (pstate, BINOP_GTR); }
373 exp : exp '&' exp
374 { write_exp_elt_opcode (pstate, BINOP_BITWISE_AND); }
377 exp : exp '^' exp
378 { write_exp_elt_opcode (pstate, BINOP_BITWISE_XOR); }
381 exp : exp '|' exp
382 { write_exp_elt_opcode (pstate, BINOP_BITWISE_IOR); }
385 exp : exp ANDAND exp
386 { write_exp_elt_opcode (pstate, BINOP_LOGICAL_AND); }
389 exp : exp OROR exp
390 { write_exp_elt_opcode (pstate, BINOP_LOGICAL_OR); }
393 exp : exp '?' exp ':' exp %prec '?'
394 { write_exp_elt_opcode (pstate, TERNOP_COND); }
397 exp : exp '=' exp
398 { write_exp_elt_opcode (pstate, BINOP_ASSIGN); }
401 exp : exp ASSIGN_MODIFY exp
402 { write_exp_elt_opcode (pstate, BINOP_ASSIGN_MODIFY);
403 write_exp_elt_opcode (pstate, $2);
404 write_exp_elt_opcode (pstate, BINOP_ASSIGN_MODIFY); }
407 exp : INT
408 { write_exp_elt_opcode (pstate, OP_LONG);
409 write_exp_elt_type (pstate, $1.type);
410 write_exp_elt_longcst (pstate, (LONGEST)($1.val));
411 write_exp_elt_opcode (pstate, OP_LONG); }
414 exp : CHAR
416 struct stoken_vector vec;
417 vec.len = 1;
418 vec.tokens = &$1;
419 write_exp_string_vector (pstate, $1.type, &vec);
423 exp : NAME_OR_INT
424 { YYSTYPE val;
425 parse_number (pstate, $1.stoken.ptr,
426 $1.stoken.length, 0, &val);
427 write_exp_elt_opcode (pstate, OP_LONG);
428 write_exp_elt_type (pstate, val.typed_val_int.type);
429 write_exp_elt_longcst (pstate, (LONGEST)
430 val.typed_val_int.val);
431 write_exp_elt_opcode (pstate, OP_LONG);
436 exp : FLOAT
437 { write_exp_elt_opcode (pstate, OP_FLOAT);
438 write_exp_elt_type (pstate, $1.type);
439 write_exp_elt_floatcst (pstate, $1.val);
440 write_exp_elt_opcode (pstate, OP_FLOAT); }
443 exp : variable
446 exp : DOLLAR_VARIABLE
448 write_dollar_variable (pstate, $1);
452 exp : SIZEOF_KEYWORD '(' type ')' %prec UNARY
454 /* TODO(dje): Go objects in structs. */
455 write_exp_elt_opcode (pstate, OP_LONG);
456 /* TODO(dje): What's the right type here? */
457 write_exp_elt_type
458 (pstate,
459 parse_type (pstate)->builtin_unsigned_int);
460 $3 = check_typedef ($3);
461 write_exp_elt_longcst (pstate,
462 (LONGEST) TYPE_LENGTH ($3));
463 write_exp_elt_opcode (pstate, OP_LONG);
467 exp : SIZEOF_KEYWORD '(' exp ')' %prec UNARY
469 /* TODO(dje): Go objects in structs. */
470 write_exp_elt_opcode (pstate, UNOP_SIZEOF);
473 string_exp:
474 STRING
476 /* We copy the string here, and not in the
477 lexer, to guarantee that we do not leak a
478 string. */
479 /* Note that we NUL-terminate here, but just
480 for convenience. */
481 struct typed_stoken *vec = XNEW (struct typed_stoken);
482 $$.len = 1;
483 $$.tokens = vec;
485 vec->type = $1.type;
486 vec->length = $1.length;
487 vec->ptr = (char *) malloc ($1.length + 1);
488 memcpy (vec->ptr, $1.ptr, $1.length + 1);
491 | string_exp '+' STRING
493 /* Note that we NUL-terminate here, but just
494 for convenience. */
495 char *p;
496 ++$$.len;
497 $$.tokens = XRESIZEVEC (struct typed_stoken,
498 $$.tokens, $$.len);
500 p = (char *) malloc ($3.length + 1);
501 memcpy (p, $3.ptr, $3.length + 1);
503 $$.tokens[$$.len - 1].type = $3.type;
504 $$.tokens[$$.len - 1].length = $3.length;
505 $$.tokens[$$.len - 1].ptr = p;
509 exp : string_exp %prec ABOVE_COMMA
511 int i;
513 write_exp_string_vector (pstate, 0 /*always utf8*/,
514 &$1);
515 for (i = 0; i < $1.len; ++i)
516 free ($1.tokens[i].ptr);
517 free ($1.tokens);
521 exp : TRUE_KEYWORD
522 { write_exp_elt_opcode (pstate, OP_BOOL);
523 write_exp_elt_longcst (pstate, (LONGEST) $1);
524 write_exp_elt_opcode (pstate, OP_BOOL); }
527 exp : FALSE_KEYWORD
528 { write_exp_elt_opcode (pstate, OP_BOOL);
529 write_exp_elt_longcst (pstate, (LONGEST) $1);
530 write_exp_elt_opcode (pstate, OP_BOOL); }
533 variable: name_not_typename ENTRY
534 { struct symbol *sym = $1.sym.symbol;
536 if (sym == NULL
537 || !SYMBOL_IS_ARGUMENT (sym)
538 || !symbol_read_needs_frame (sym))
539 error (_("@entry can be used only for function "
540 "parameters, not for \"%s\""),
541 copy_name ($1.stoken));
543 write_exp_elt_opcode (pstate, OP_VAR_ENTRY_VALUE);
544 write_exp_elt_sym (pstate, sym);
545 write_exp_elt_opcode (pstate, OP_VAR_ENTRY_VALUE);
549 variable: name_not_typename
550 { struct block_symbol sym = $1.sym;
552 if (sym.symbol)
554 if (symbol_read_needs_frame (sym.symbol))
555 pstate->block_tracker->update (sym);
557 write_exp_elt_opcode (pstate, OP_VAR_VALUE);
558 write_exp_elt_block (pstate, sym.block);
559 write_exp_elt_sym (pstate, sym.symbol);
560 write_exp_elt_opcode (pstate, OP_VAR_VALUE);
562 else if ($1.is_a_field_of_this)
564 /* TODO(dje): Can we get here?
565 E.g., via a mix of c++ and go? */
566 gdb_assert_not_reached ("go with `this' field");
568 else
570 struct bound_minimal_symbol msymbol;
571 char *arg = copy_name ($1.stoken);
573 msymbol =
574 lookup_bound_minimal_symbol (arg);
575 if (msymbol.minsym != NULL)
576 write_exp_msymbol (pstate, msymbol);
577 else if (!have_full_symbols ()
578 && !have_partial_symbols ())
579 error (_("No symbol table is loaded. "
580 "Use the \"file\" command."));
581 else
582 error (_("No symbol \"%s\" in current context."),
583 copy_name ($1.stoken));
588 /* TODO
589 method_exp: PACKAGENAME '.' name '.' name
595 type /* Implements (approximately): [*] type-specifier */
596 : '*' type
597 { $$ = lookup_pointer_type ($2); }
598 | TYPENAME
599 { $$ = $1.type; }
601 | STRUCT_KEYWORD name
602 { $$ = lookup_struct (copy_name ($2),
603 expression_context_block); }
605 | BYTE_KEYWORD
606 { $$ = builtin_go_type (pstate->gdbarch ())
607 ->builtin_uint8; }
610 /* TODO
611 name : NAME { $$ = $1.stoken; }
612 | TYPENAME { $$ = $1.stoken; }
613 | NAME_OR_INT { $$ = $1.stoken; }
617 name_not_typename
618 : NAME
619 /* These would be useful if name_not_typename was useful, but it is just
620 a fake for "variable", so these cause reduce/reduce conflicts because
621 the parser can't tell whether NAME_OR_INT is a name_not_typename (=variable,
622 =exp) or just an exp. If name_not_typename was ever used in an lvalue
623 context where only a name could occur, this might be useful.
624 | NAME_OR_INT
630 /* Take care of parsing a number (anything that starts with a digit).
631 Set yylval and return the token type; update lexptr.
632 LEN is the number of characters in it. */
634 /* FIXME: Needs some error checking for the float case. */
635 /* FIXME(dje): IWBN to use c-exp.y's parse_number if we could.
636 That will require moving the guts into a function that we both call
637 as our YYSTYPE is different than c-exp.y's */
639 static int
640 parse_number (struct parser_state *par_state,
641 const char *p, int len, int parsed_float, YYSTYPE *putithere)
643 /* FIXME: Shouldn't these be unsigned? We don't deal with negative values
644 here, and we do kind of silly things like cast to unsigned. */
645 LONGEST n = 0;
646 LONGEST prevn = 0;
647 ULONGEST un;
649 int i = 0;
650 int c;
651 int base = input_radix;
652 int unsigned_p = 0;
654 /* Number of "L" suffixes encountered. */
655 int long_p = 0;
657 /* We have found a "L" or "U" suffix. */
658 int found_suffix = 0;
660 ULONGEST high_bit;
661 struct type *signed_type;
662 struct type *unsigned_type;
664 if (parsed_float)
666 const struct builtin_go_type *builtin_go_types
667 = builtin_go_type (par_state->gdbarch ());
669 /* Handle suffixes: 'f' for float32, 'l' for long double.
670 FIXME: This appears to be an extension -- do we want this? */
671 if (len >= 1 && tolower (p[len - 1]) == 'f')
673 putithere->typed_val_float.type
674 = builtin_go_types->builtin_float32;
675 len--;
677 else if (len >= 1 && tolower (p[len - 1]) == 'l')
679 putithere->typed_val_float.type
680 = parse_type (par_state)->builtin_long_double;
681 len--;
683 /* Default type for floating-point literals is float64. */
684 else
686 putithere->typed_val_float.type
687 = builtin_go_types->builtin_float64;
690 if (!parse_float (p, len,
691 putithere->typed_val_float.type,
692 putithere->typed_val_float.val))
693 return ERROR;
694 return FLOAT;
697 /* Handle base-switching prefixes 0x, 0t, 0d, 0. */
698 if (p[0] == '0')
699 switch (p[1])
701 case 'x':
702 case 'X':
703 if (len >= 3)
705 p += 2;
706 base = 16;
707 len -= 2;
709 break;
711 case 'b':
712 case 'B':
713 if (len >= 3)
715 p += 2;
716 base = 2;
717 len -= 2;
719 break;
721 case 't':
722 case 'T':
723 case 'd':
724 case 'D':
725 if (len >= 3)
727 p += 2;
728 base = 10;
729 len -= 2;
731 break;
733 default:
734 base = 8;
735 break;
738 while (len-- > 0)
740 c = *p++;
741 if (c >= 'A' && c <= 'Z')
742 c += 'a' - 'A';
743 if (c != 'l' && c != 'u')
744 n *= base;
745 if (c >= '0' && c <= '9')
747 if (found_suffix)
748 return ERROR;
749 n += i = c - '0';
751 else
753 if (base > 10 && c >= 'a' && c <= 'f')
755 if (found_suffix)
756 return ERROR;
757 n += i = c - 'a' + 10;
759 else if (c == 'l')
761 ++long_p;
762 found_suffix = 1;
764 else if (c == 'u')
766 unsigned_p = 1;
767 found_suffix = 1;
769 else
770 return ERROR; /* Char not a digit */
772 if (i >= base)
773 return ERROR; /* Invalid digit in this base. */
775 /* Portably test for overflow (only works for nonzero values, so make
776 a second check for zero). FIXME: Can't we just make n and prevn
777 unsigned and avoid this? */
778 if (c != 'l' && c != 'u' && (prevn >= n) && n != 0)
779 unsigned_p = 1; /* Try something unsigned. */
781 /* Portably test for unsigned overflow.
782 FIXME: This check is wrong; for example it doesn't find overflow
783 on 0x123456789 when LONGEST is 32 bits. */
784 if (c != 'l' && c != 'u' && n != 0)
786 if ((unsigned_p && (ULONGEST) prevn >= (ULONGEST) n))
787 error (_("Numeric constant too large."));
789 prevn = n;
792 /* An integer constant is an int, a long, or a long long. An L
793 suffix forces it to be long; an LL suffix forces it to be long
794 long. If not forced to a larger size, it gets the first type of
795 the above that it fits in. To figure out whether it fits, we
796 shift it right and see whether anything remains. Note that we
797 can't shift sizeof (LONGEST) * HOST_CHAR_BIT bits or more in one
798 operation, because many compilers will warn about such a shift
799 (which always produces a zero result). Sometimes gdbarch_int_bit
800 or gdbarch_long_bit will be that big, sometimes not. To deal with
801 the case where it is we just always shift the value more than
802 once, with fewer bits each time. */
804 un = (ULONGEST)n >> 2;
805 if (long_p == 0
806 && (un >> (gdbarch_int_bit (par_state->gdbarch ()) - 2)) == 0)
808 high_bit
809 = ((ULONGEST)1) << (gdbarch_int_bit (par_state->gdbarch ()) - 1);
811 /* A large decimal (not hex or octal) constant (between INT_MAX
812 and UINT_MAX) is a long or unsigned long, according to ANSI,
813 never an unsigned int, but this code treats it as unsigned
814 int. This probably should be fixed. GCC gives a warning on
815 such constants. */
817 unsigned_type = parse_type (par_state)->builtin_unsigned_int;
818 signed_type = parse_type (par_state)->builtin_int;
820 else if (long_p <= 1
821 && (un >> (gdbarch_long_bit (par_state->gdbarch ()) - 2)) == 0)
823 high_bit
824 = ((ULONGEST)1) << (gdbarch_long_bit (par_state->gdbarch ()) - 1);
825 unsigned_type = parse_type (par_state)->builtin_unsigned_long;
826 signed_type = parse_type (par_state)->builtin_long;
828 else
830 int shift;
831 if (sizeof (ULONGEST) * HOST_CHAR_BIT
832 < gdbarch_long_long_bit (par_state->gdbarch ()))
833 /* A long long does not fit in a LONGEST. */
834 shift = (sizeof (ULONGEST) * HOST_CHAR_BIT - 1);
835 else
836 shift = (gdbarch_long_long_bit (par_state->gdbarch ()) - 1);
837 high_bit = (ULONGEST) 1 << shift;
838 unsigned_type = parse_type (par_state)->builtin_unsigned_long_long;
839 signed_type = parse_type (par_state)->builtin_long_long;
842 putithere->typed_val_int.val = n;
844 /* If the high bit of the worked out type is set then this number
845 has to be unsigned. */
847 if (unsigned_p || (n & high_bit))
849 putithere->typed_val_int.type = unsigned_type;
851 else
853 putithere->typed_val_int.type = signed_type;
856 return INT;
859 /* Temporary obstack used for holding strings. */
860 static struct obstack tempbuf;
861 static int tempbuf_init;
863 /* Parse a string or character literal from TOKPTR. The string or
864 character may be wide or unicode. *OUTPTR is set to just after the
865 end of the literal in the input string. The resulting token is
866 stored in VALUE. This returns a token value, either STRING or
867 CHAR, depending on what was parsed. *HOST_CHARS is set to the
868 number of host characters in the literal. */
870 static int
871 parse_string_or_char (const char *tokptr, const char **outptr,
872 struct typed_stoken *value, int *host_chars)
874 int quote;
876 /* Build the gdb internal form of the input string in tempbuf. Note
877 that the buffer is null byte terminated *only* for the
878 convenience of debugging gdb itself and printing the buffer
879 contents when the buffer contains no embedded nulls. Gdb does
880 not depend upon the buffer being null byte terminated, it uses
881 the length string instead. This allows gdb to handle C strings
882 (as well as strings in other languages) with embedded null
883 bytes */
885 if (!tempbuf_init)
886 tempbuf_init = 1;
887 else
888 obstack_free (&tempbuf, NULL);
889 obstack_init (&tempbuf);
891 /* Skip the quote. */
892 quote = *tokptr;
893 ++tokptr;
895 *host_chars = 0;
897 while (*tokptr)
899 char c = *tokptr;
900 if (c == '\\')
902 ++tokptr;
903 *host_chars += c_parse_escape (&tokptr, &tempbuf);
905 else if (c == quote)
906 break;
907 else
909 obstack_1grow (&tempbuf, c);
910 ++tokptr;
911 /* FIXME: this does the wrong thing with multi-byte host
912 characters. We could use mbrlen here, but that would
913 make "set host-charset" a bit less useful. */
914 ++*host_chars;
918 if (*tokptr != quote)
920 if (quote == '"')
921 error (_("Unterminated string in expression."));
922 else
923 error (_("Unmatched single quote."));
925 ++tokptr;
927 value->type = C_STRING | (quote == '\'' ? C_CHAR : 0); /*FIXME*/
928 value->ptr = (char *) obstack_base (&tempbuf);
929 value->length = obstack_object_size (&tempbuf);
931 *outptr = tokptr;
933 return quote == '\'' ? CHAR : STRING;
936 struct token
938 const char *oper;
939 int token;
940 enum exp_opcode opcode;
943 static const struct token tokentab3[] =
945 {">>=", ASSIGN_MODIFY, BINOP_RSH},
946 {"<<=", ASSIGN_MODIFY, BINOP_LSH},
947 /*{"&^=", ASSIGN_MODIFY, BINOP_BITWISE_ANDNOT}, TODO */
948 {"...", DOTDOTDOT, OP_NULL},
951 static const struct token tokentab2[] =
953 {"+=", ASSIGN_MODIFY, BINOP_ADD},
954 {"-=", ASSIGN_MODIFY, BINOP_SUB},
955 {"*=", ASSIGN_MODIFY, BINOP_MUL},
956 {"/=", ASSIGN_MODIFY, BINOP_DIV},
957 {"%=", ASSIGN_MODIFY, BINOP_REM},
958 {"|=", ASSIGN_MODIFY, BINOP_BITWISE_IOR},
959 {"&=", ASSIGN_MODIFY, BINOP_BITWISE_AND},
960 {"^=", ASSIGN_MODIFY, BINOP_BITWISE_XOR},
961 {"++", INCREMENT, BINOP_END},
962 {"--", DECREMENT, BINOP_END},
963 /*{"->", RIGHT_ARROW, BINOP_END}, Doesn't exist in Go. */
964 {"<-", LEFT_ARROW, BINOP_END},
965 {"&&", ANDAND, BINOP_END},
966 {"||", OROR, BINOP_END},
967 {"<<", LSH, BINOP_END},
968 {">>", RSH, BINOP_END},
969 {"==", EQUAL, BINOP_END},
970 {"!=", NOTEQUAL, BINOP_END},
971 {"<=", LEQ, BINOP_END},
972 {">=", GEQ, BINOP_END},
973 /*{"&^", ANDNOT, BINOP_END}, TODO */
976 /* Identifier-like tokens. */
977 static const struct token ident_tokens[] =
979 {"true", TRUE_KEYWORD, OP_NULL},
980 {"false", FALSE_KEYWORD, OP_NULL},
981 {"nil", NIL_KEYWORD, OP_NULL},
982 {"const", CONST_KEYWORD, OP_NULL},
983 {"struct", STRUCT_KEYWORD, OP_NULL},
984 {"type", TYPE_KEYWORD, OP_NULL},
985 {"interface", INTERFACE_KEYWORD, OP_NULL},
986 {"chan", CHAN_KEYWORD, OP_NULL},
987 {"byte", BYTE_KEYWORD, OP_NULL}, /* An alias of uint8. */
988 {"len", LEN_KEYWORD, OP_NULL},
989 {"cap", CAP_KEYWORD, OP_NULL},
990 {"new", NEW_KEYWORD, OP_NULL},
991 {"iota", IOTA_KEYWORD, OP_NULL},
994 /* This is set if a NAME token appeared at the very end of the input
995 string, with no whitespace separating the name from the EOF. This
996 is used only when parsing to do field name completion. */
997 static int saw_name_at_eof;
999 /* This is set if the previously-returned token was a structure
1000 operator -- either '.' or ARROW. This is used only when parsing to
1001 do field name completion. */
1002 static int last_was_structop;
1004 /* Depth of parentheses. */
1005 static int paren_depth;
1007 /* Read one token, getting characters through lexptr. */
1009 static int
1010 lex_one_token (struct parser_state *par_state)
1012 int c;
1013 int namelen;
1014 unsigned int i;
1015 const char *tokstart;
1016 int saw_structop = last_was_structop;
1017 char *copy;
1019 last_was_structop = 0;
1021 retry:
1023 par_state->prev_lexptr = par_state->lexptr;
1025 tokstart = par_state->lexptr;
1026 /* See if it is a special token of length 3. */
1027 for (i = 0; i < sizeof (tokentab3) / sizeof (tokentab3[0]); i++)
1028 if (strncmp (tokstart, tokentab3[i].oper, 3) == 0)
1030 par_state->lexptr += 3;
1031 yylval.opcode = tokentab3[i].opcode;
1032 return tokentab3[i].token;
1035 /* See if it is a special token of length 2. */
1036 for (i = 0; i < sizeof (tokentab2) / sizeof (tokentab2[0]); i++)
1037 if (strncmp (tokstart, tokentab2[i].oper, 2) == 0)
1039 par_state->lexptr += 2;
1040 yylval.opcode = tokentab2[i].opcode;
1041 /* NOTE: -> doesn't exist in Go, so we don't need to watch for
1042 setting last_was_structop here. */
1043 return tokentab2[i].token;
1046 switch (c = *tokstart)
1048 case 0:
1049 if (saw_name_at_eof)
1051 saw_name_at_eof = 0;
1052 return COMPLETE;
1054 else if (saw_structop)
1055 return COMPLETE;
1056 else
1057 return 0;
1059 case ' ':
1060 case '\t':
1061 case '\n':
1062 par_state->lexptr++;
1063 goto retry;
1065 case '[':
1066 case '(':
1067 paren_depth++;
1068 par_state->lexptr++;
1069 return c;
1071 case ']':
1072 case ')':
1073 if (paren_depth == 0)
1074 return 0;
1075 paren_depth--;
1076 par_state->lexptr++;
1077 return c;
1079 case ',':
1080 if (pstate->comma_terminates
1081 && paren_depth == 0)
1082 return 0;
1083 par_state->lexptr++;
1084 return c;
1086 case '.':
1087 /* Might be a floating point number. */
1088 if (par_state->lexptr[1] < '0' || par_state->lexptr[1] > '9')
1090 if (pstate->parse_completion)
1091 last_was_structop = 1;
1092 goto symbol; /* Nope, must be a symbol. */
1094 /* FALL THRU. */
1096 case '0':
1097 case '1':
1098 case '2':
1099 case '3':
1100 case '4':
1101 case '5':
1102 case '6':
1103 case '7':
1104 case '8':
1105 case '9':
1107 /* It's a number. */
1108 int got_dot = 0, got_e = 0, toktype;
1109 const char *p = tokstart;
1110 int hex = input_radix > 10;
1112 if (c == '0' && (p[1] == 'x' || p[1] == 'X'))
1114 p += 2;
1115 hex = 1;
1118 for (;; ++p)
1120 /* This test includes !hex because 'e' is a valid hex digit
1121 and thus does not indicate a floating point number when
1122 the radix is hex. */
1123 if (!hex && !got_e && (*p == 'e' || *p == 'E'))
1124 got_dot = got_e = 1;
1125 /* This test does not include !hex, because a '.' always indicates
1126 a decimal floating point number regardless of the radix. */
1127 else if (!got_dot && *p == '.')
1128 got_dot = 1;
1129 else if (got_e && (p[-1] == 'e' || p[-1] == 'E')
1130 && (*p == '-' || *p == '+'))
1131 /* This is the sign of the exponent, not the end of the
1132 number. */
1133 continue;
1134 /* We will take any letters or digits. parse_number will
1135 complain if past the radix, or if L or U are not final. */
1136 else if ((*p < '0' || *p > '9')
1137 && ((*p < 'a' || *p > 'z')
1138 && (*p < 'A' || *p > 'Z')))
1139 break;
1141 toktype = parse_number (par_state, tokstart, p - tokstart,
1142 got_dot|got_e, &yylval);
1143 if (toktype == ERROR)
1145 char *err_copy = (char *) alloca (p - tokstart + 1);
1147 memcpy (err_copy, tokstart, p - tokstart);
1148 err_copy[p - tokstart] = 0;
1149 error (_("Invalid number \"%s\"."), err_copy);
1151 par_state->lexptr = p;
1152 return toktype;
1155 case '@':
1157 const char *p = &tokstart[1];
1158 size_t len = strlen ("entry");
1160 while (isspace (*p))
1161 p++;
1162 if (strncmp (p, "entry", len) == 0 && !isalnum (p[len])
1163 && p[len] != '_')
1165 par_state->lexptr = &p[len];
1166 return ENTRY;
1169 /* FALLTHRU */
1170 case '+':
1171 case '-':
1172 case '*':
1173 case '/':
1174 case '%':
1175 case '|':
1176 case '&':
1177 case '^':
1178 case '~':
1179 case '!':
1180 case '<':
1181 case '>':
1182 case '?':
1183 case ':':
1184 case '=':
1185 case '{':
1186 case '}':
1187 symbol:
1188 par_state->lexptr++;
1189 return c;
1191 case '\'':
1192 case '"':
1193 case '`':
1195 int host_len;
1196 int result = parse_string_or_char (tokstart, &par_state->lexptr,
1197 &yylval.tsval, &host_len);
1198 if (result == CHAR)
1200 if (host_len == 0)
1201 error (_("Empty character constant."));
1202 else if (host_len > 2 && c == '\'')
1204 ++tokstart;
1205 namelen = par_state->lexptr - tokstart - 1;
1206 goto tryname;
1208 else if (host_len > 1)
1209 error (_("Invalid character constant."));
1211 return result;
1215 if (!(c == '_' || c == '$'
1216 || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
1217 /* We must have come across a bad character (e.g. ';'). */
1218 error (_("Invalid character '%c' in expression."), c);
1220 /* It's a name. See how long it is. */
1221 namelen = 0;
1222 for (c = tokstart[namelen];
1223 (c == '_' || c == '$' || (c >= '0' && c <= '9')
1224 || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'));)
1226 c = tokstart[++namelen];
1229 /* The token "if" terminates the expression and is NOT removed from
1230 the input stream. It doesn't count if it appears in the
1231 expansion of a macro. */
1232 if (namelen == 2
1233 && tokstart[0] == 'i'
1234 && tokstart[1] == 'f')
1236 return 0;
1239 /* For the same reason (breakpoint conditions), "thread N"
1240 terminates the expression. "thread" could be an identifier, but
1241 an identifier is never followed by a number without intervening
1242 punctuation.
1243 Handle abbreviations of these, similarly to
1244 breakpoint.c:find_condition_and_thread.
1245 TODO: Watch for "goroutine" here? */
1246 if (namelen >= 1
1247 && strncmp (tokstart, "thread", namelen) == 0
1248 && (tokstart[namelen] == ' ' || tokstart[namelen] == '\t'))
1250 const char *p = tokstart + namelen + 1;
1252 while (*p == ' ' || *p == '\t')
1253 p++;
1254 if (*p >= '0' && *p <= '9')
1255 return 0;
1258 par_state->lexptr += namelen;
1260 tryname:
1262 yylval.sval.ptr = tokstart;
1263 yylval.sval.length = namelen;
1265 /* Catch specific keywords. */
1266 copy = copy_name (yylval.sval);
1267 for (i = 0; i < sizeof (ident_tokens) / sizeof (ident_tokens[0]); i++)
1268 if (strcmp (copy, ident_tokens[i].oper) == 0)
1270 /* It is ok to always set this, even though we don't always
1271 strictly need to. */
1272 yylval.opcode = ident_tokens[i].opcode;
1273 return ident_tokens[i].token;
1276 if (*tokstart == '$')
1277 return DOLLAR_VARIABLE;
1279 if (pstate->parse_completion && *par_state->lexptr == '\0')
1280 saw_name_at_eof = 1;
1281 return NAME;
1284 /* An object of this type is pushed on a FIFO by the "outer" lexer. */
1285 struct token_and_value
1287 int token;
1288 YYSTYPE value;
1291 /* A FIFO of tokens that have been read but not yet returned to the
1292 parser. */
1293 static std::vector<token_and_value> token_fifo;
1295 /* Non-zero if the lexer should return tokens from the FIFO. */
1296 static int popping;
1298 /* Temporary storage for yylex; this holds symbol names as they are
1299 built up. */
1300 static auto_obstack name_obstack;
1302 /* Build "package.name" in name_obstack.
1303 For convenience of the caller, the name is NUL-terminated,
1304 but the NUL is not included in the recorded length. */
1306 static struct stoken
1307 build_packaged_name (const char *package, int package_len,
1308 const char *name, int name_len)
1310 struct stoken result;
1312 name_obstack.clear ();
1313 obstack_grow (&name_obstack, package, package_len);
1314 obstack_grow_str (&name_obstack, ".");
1315 obstack_grow (&name_obstack, name, name_len);
1316 obstack_grow (&name_obstack, "", 1);
1317 result.ptr = (char *) obstack_base (&name_obstack);
1318 result.length = obstack_object_size (&name_obstack) - 1;
1320 return result;
1323 /* Return non-zero if NAME is a package name.
1324 BLOCK is the scope in which to interpret NAME; this can be NULL
1325 to mean the global scope. */
1327 static int
1328 package_name_p (const char *name, const struct block *block)
1330 struct symbol *sym;
1331 struct field_of_this_result is_a_field_of_this;
1333 sym = lookup_symbol (name, block, STRUCT_DOMAIN, &is_a_field_of_this).symbol;
1335 if (sym
1336 && SYMBOL_CLASS (sym) == LOC_TYPEDEF
1337 && TYPE_CODE (SYMBOL_TYPE (sym)) == TYPE_CODE_MODULE)
1338 return 1;
1340 return 0;
1343 /* Classify a (potential) function in the "unsafe" package.
1344 We fold these into "keywords" to keep things simple, at least until
1345 something more complex is warranted. */
1347 static int
1348 classify_unsafe_function (struct stoken function_name)
1350 char *copy = copy_name (function_name);
1352 if (strcmp (copy, "Sizeof") == 0)
1354 yylval.sval = function_name;
1355 return SIZEOF_KEYWORD;
1358 error (_("Unknown function in `unsafe' package: %s"), copy);
1361 /* Classify token(s) "name1.name2" where name1 is known to be a package.
1362 The contents of the token are in `yylval'.
1363 Updates yylval and returns the new token type.
1365 The result is one of NAME, NAME_OR_INT, or TYPENAME. */
1367 static int
1368 classify_packaged_name (const struct block *block)
1370 char *copy;
1371 struct block_symbol sym;
1372 struct field_of_this_result is_a_field_of_this;
1374 copy = copy_name (yylval.sval);
1376 sym = lookup_symbol (copy, block, VAR_DOMAIN, &is_a_field_of_this);
1378 if (sym.symbol)
1380 yylval.ssym.sym = sym;
1381 yylval.ssym.is_a_field_of_this = is_a_field_of_this.type != NULL;
1384 return NAME;
1387 /* Classify a NAME token.
1388 The contents of the token are in `yylval'.
1389 Updates yylval and returns the new token type.
1390 BLOCK is the block in which lookups start; this can be NULL
1391 to mean the global scope.
1393 The result is one of NAME, NAME_OR_INT, or TYPENAME. */
1395 static int
1396 classify_name (struct parser_state *par_state, const struct block *block)
1398 struct type *type;
1399 struct block_symbol sym;
1400 char *copy;
1401 struct field_of_this_result is_a_field_of_this;
1403 copy = copy_name (yylval.sval);
1405 /* Try primitive types first so they win over bad/weird debug info. */
1406 type = language_lookup_primitive_type (par_state->language (),
1407 par_state->gdbarch (),
1408 copy);
1409 if (type != NULL)
1411 /* NOTE: We take advantage of the fact that yylval coming in was a
1412 NAME, and that struct ttype is a compatible extension of struct
1413 stoken, so yylval.tsym.stoken is already filled in. */
1414 yylval.tsym.type = type;
1415 return TYPENAME;
1418 /* TODO: What about other types? */
1420 sym = lookup_symbol (copy, block, VAR_DOMAIN, &is_a_field_of_this);
1422 if (sym.symbol)
1424 yylval.ssym.sym = sym;
1425 yylval.ssym.is_a_field_of_this = is_a_field_of_this.type != NULL;
1426 return NAME;
1429 /* If we didn't find a symbol, look again in the current package.
1430 This is to, e.g., make "p global_var" work without having to specify
1431 the package name. We intentionally only looks for objects in the
1432 current package. */
1435 char *current_package_name = go_block_package_name (block);
1437 if (current_package_name != NULL)
1439 struct stoken sval =
1440 build_packaged_name (current_package_name,
1441 strlen (current_package_name),
1442 copy, strlen (copy));
1444 xfree (current_package_name);
1445 sym = lookup_symbol (sval.ptr, block, VAR_DOMAIN,
1446 &is_a_field_of_this);
1447 if (sym.symbol)
1449 yylval.ssym.stoken = sval;
1450 yylval.ssym.sym = sym;
1451 yylval.ssym.is_a_field_of_this = is_a_field_of_this.type != NULL;
1452 return NAME;
1457 /* Input names that aren't symbols but ARE valid hex numbers, when
1458 the input radix permits them, can be names or numbers depending
1459 on the parse. Note we support radixes > 16 here. */
1460 if ((copy[0] >= 'a' && copy[0] < 'a' + input_radix - 10)
1461 || (copy[0] >= 'A' && copy[0] < 'A' + input_radix - 10))
1463 YYSTYPE newlval; /* Its value is ignored. */
1464 int hextype = parse_number (par_state, copy, yylval.sval.length,
1465 0, &newlval);
1466 if (hextype == INT)
1468 yylval.ssym.sym.symbol = NULL;
1469 yylval.ssym.sym.block = NULL;
1470 yylval.ssym.is_a_field_of_this = 0;
1471 return NAME_OR_INT;
1475 yylval.ssym.sym.symbol = NULL;
1476 yylval.ssym.sym.block = NULL;
1477 yylval.ssym.is_a_field_of_this = 0;
1478 return NAME;
1481 /* This is taken from c-exp.y mostly to get something working.
1482 The basic structure has been kept because we may yet need some of it. */
1484 static int
1485 yylex (void)
1487 token_and_value current, next;
1489 if (popping && !token_fifo.empty ())
1491 token_and_value tv = token_fifo[0];
1492 token_fifo.erase (token_fifo.begin ());
1493 yylval = tv.value;
1494 /* There's no need to fall through to handle package.name
1495 as that can never happen here. In theory. */
1496 return tv.token;
1498 popping = 0;
1500 current.token = lex_one_token (pstate);
1502 /* TODO: Need a way to force specifying name1 as a package.
1503 .name1.name2 ? */
1505 if (current.token != NAME)
1506 return current.token;
1508 /* See if we have "name1 . name2". */
1510 current.value = yylval;
1511 next.token = lex_one_token (pstate);
1512 next.value = yylval;
1514 if (next.token == '.')
1516 token_and_value name2;
1518 name2.token = lex_one_token (pstate);
1519 name2.value = yylval;
1521 if (name2.token == NAME)
1523 /* Ok, we have "name1 . name2". */
1524 char *copy;
1526 copy = copy_name (current.value.sval);
1528 if (strcmp (copy, "unsafe") == 0)
1530 popping = 1;
1531 return classify_unsafe_function (name2.value.sval);
1534 if (package_name_p (copy, pstate->expression_context_block))
1536 popping = 1;
1537 yylval.sval = build_packaged_name (current.value.sval.ptr,
1538 current.value.sval.length,
1539 name2.value.sval.ptr,
1540 name2.value.sval.length);
1541 return classify_packaged_name (pstate->expression_context_block);
1545 token_fifo.push_back (next);
1546 token_fifo.push_back (name2);
1548 else
1549 token_fifo.push_back (next);
1551 /* If we arrive here we don't have a package-qualified name. */
1553 popping = 1;
1554 yylval = current.value;
1555 return classify_name (pstate, pstate->expression_context_block);
1559 go_parse (struct parser_state *par_state)
1561 /* Setting up the parser state. */
1562 scoped_restore pstate_restore = make_scoped_restore (&pstate);
1563 gdb_assert (par_state != NULL);
1564 pstate = par_state;
1566 scoped_restore restore_yydebug = make_scoped_restore (&yydebug,
1567 parser_debug);
1569 /* Initialize some state used by the lexer. */
1570 last_was_structop = 0;
1571 saw_name_at_eof = 0;
1572 paren_depth = 0;
1574 token_fifo.clear ();
1575 popping = 0;
1576 name_obstack.clear ();
1578 return yyparse ();
1581 static void
1582 yyerror (const char *msg)
1584 if (pstate->prev_lexptr)
1585 pstate->lexptr = pstate->prev_lexptr;
1587 error (_("A %s in expression, near `%s'."), msg, pstate->lexptr);