src/backend/parser/parser.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * parser.c
   4  *              Main entry point/driver for PostgreSQL grammar
   5  *
   6  * Note that the grammar is not allowed to perform any table access
   7  * (since we need to be able to do basic parsing even while inside an
   8  * aborted transaction).  Therefore, the data structures returned by
   9  * the grammar are "raw" parsetrees that still need to be analyzed by
  10  * analyze.c and related files.
  11  *
  12  *
  13  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  14  * Portions Copyright (c) 1994, Regents of the University of California
  15  *
  16  * IDENTIFICATION
  17  *        $PostgreSQL$
  18  *
  19  *-------------------------------------------------------------------------
  20  */
  21
  22 #include "postgres.h"
  23
  24 #include "parser/gramparse.h"   /* required before parser/gram.h! */
  25 #include "parser/gram.h"
  26 #include "parser/parser.h"
  27
  28
  29 List       *parsetree;                  /* result of parsing is left here */
  30
  31 static bool have_lookahead;             /* is lookahead info valid? */
  32 static int      lookahead_token;        /* one-token lookahead */
  33 static YYSTYPE lookahead_yylval;        /* yylval for lookahead token */
  34 static YYLTYPE lookahead_yylloc;        /* yylloc for lookahead token */
  35
  36
  37 /*
  38  * raw_parser
  39  *              Given a query in string form, do lexical and grammatical analysis.
  40  *
  41  * Returns a list of raw (un-analyzed) parse trees.
  42  */
  43 List *
  44 raw_parser(const char *str)
  45 {
  46         int                     yyresult;
  47
  48         parsetree = NIL;                        /* in case grammar forgets to set it */
  49         have_lookahead = false;
  50
  51         scanner_init(str);
  52         parser_init();
  53
  54         yyresult = base_yyparse();
  55
  56         scanner_finish();
  57
  58         if (yyresult)                           /* error */
  59                 return NIL;
  60
  61         return parsetree;
  62 }
  63
  64
  65 /*
  66  * pg_parse_string_token - get the value represented by a string literal
  67  *
  68  * Given the textual form of a SQL string literal, produce the represented
  69  * value as a palloc'd string.  It is caller's responsibility that the
  70  * passed string does represent one single string literal.
  71  *
  72  * We export this function to avoid having plpgsql depend on internal details
  73  * of the core grammar (such as the token code assigned to SCONST).  Note
  74  * that since the scanner isn't presently re-entrant, this cannot be used
  75  * during use of the main parser/scanner.
  76  */
  77 char *
  78 pg_parse_string_token(const char *token)
  79 {
  80         int                     ctoken;
  81
  82         scanner_init(token);
  83
  84         ctoken = base_yylex();
  85
  86         if (ctoken != SCONST)           /* caller error */
  87                 elog(ERROR, "expected string constant, got token code %d", ctoken);
  88
  89         scanner_finish();
  90
  91         return base_yylval.str;
  92 }
  93
  94
  95 /*
  96  * Intermediate filter between parser and base lexer (base_yylex in scan.l).
  97  *
  98  * The filter is needed because in some cases the standard SQL grammar
  99  * requires more than one token lookahead.      We reduce these cases to one-token
 100  * lookahead by combining tokens here, in order to keep the grammar LALR(1).
 101  *
 102  * Using a filter is simpler than trying to recognize multiword tokens
 103  * directly in scan.l, because we'd have to allow for comments between the
 104  * words.  Furthermore it's not clear how to do it without re-introducing
 105  * scanner backtrack, which would cost more performance than this filter
 106  * layer does.
 107  */
 108 int
 109 filtered_base_yylex(void)
 110 {
 111         int                     cur_token;
 112         int                     next_token;
 113         YYSTYPE         cur_yylval;
 114         YYLTYPE         cur_yylloc;
 115
 116         /* Get next token --- we might already have it */
 117         if (have_lookahead)
 118         {
 119                 cur_token = lookahead_token;
 120                 base_yylval = lookahead_yylval;
 121                 base_yylloc = lookahead_yylloc;
 122                 have_lookahead = false;
 123         }
 124         else
 125                 cur_token = base_yylex();
 126
 127         /* Do we need to look ahead for a possible multiword token? */
 128         switch (cur_token)
 129         {
 130                 case NULLS_P:
 131
 132                         /*
 133                          * NULLS FIRST and NULLS LAST must be reduced to one token
 134                          */
 135                         cur_yylval = base_yylval;
 136                         cur_yylloc = base_yylloc;
 137                         next_token = base_yylex();
 138                         switch (next_token)
 139                         {
 140                                 case FIRST_P:
 141                                         cur_token = NULLS_FIRST;
 142                                         break;
 143                                 case LAST_P:
 144                                         cur_token = NULLS_LAST;
 145                                         break;
 146                                 default:
 147                                         /* save the lookahead token for next time */
 148                                         lookahead_token = next_token;
 149                                         lookahead_yylval = base_yylval;
 150                                         lookahead_yylloc = base_yylloc;
 151                                         have_lookahead = true;
 152                                         /* and back up the output info to cur_token */
 153                                         base_yylval = cur_yylval;
 154                                         base_yylloc = cur_yylloc;
 155                                         break;
 156                         }
 157                         break;
 158
 159                 case WITH:
 160
 161                         /*
 162                          * WITH TIME must be reduced to one token
 163                          */
 164                         cur_yylval = base_yylval;
 165                         cur_yylloc = base_yylloc;
 166                         next_token = base_yylex();
 167                         switch (next_token)
 168                         {
 169                                 case TIME:
 170                                         cur_token = WITH_TIME;
 171                                         break;
 172                                 default:
 173                                         /* save the lookahead token for next time */
 174                                         lookahead_token = next_token;
 175                                         lookahead_yylval = base_yylval;
 176                                         lookahead_yylloc = base_yylloc;
 177                                         have_lookahead = true;
 178                                         /* and back up the output info to cur_token */
 179                                         base_yylval = cur_yylval;
 180                                         base_yylloc = cur_yylloc;
 181                                         break;
 182                         }
 183                         break;
 184
 185                 default:
 186                         break;
 187         }
 188
 189         return cur_token;
 190 }