2 /*-------------------------------------------------------------------------
5 * lexical scanner for psql
7 * This code is mainly needed to determine where the end of a SQL statement
8 * is: we are looking for semicolons that are not within quotes, comments,
9 * or parentheses. The most reliable way to handle this is to borrow the
10 * backend's flex lexer rules, lock, stock, and barrel. The rules below
11 * are (except for a few) the same as the backend's, but their actions are
12 * just ECHO whereas the backend's actions generally do other things.
14 * XXX The rules in this file must be kept in sync with the backend lexer!!!
16 * XXX Avoid creating backtracking cases --- see the backend lexer for info.
18 * The most difficult aspect of this code is that we need to work in multibyte
19 * encodings that are not ASCII-safe. A "safe" encoding is one in which each
20 * byte of a multibyte character has the high bit set (it's >= 0x80). Since
21 * all our lexing rules treat all high-bit-set characters alike, we don't
22 * really need to care whether such a byte is part of a sequence or not.
23 * In an "unsafe" encoding, we still expect the first byte of a multibyte
24 * sequence to be >= 0x80, but later bytes might not be. If we scan such
25 * a sequence as-is, the lexing rules could easily be fooled into matching
26 * such bytes to ordinary ASCII characters. Our solution for this is to
27 * substitute 0xFF for each non-first byte within the data presented to flex.
28 * The flex rules will then pass the FF's through unmolested. The emit()
29 * subroutine is responsible for looking back to the original string and
30 * replacing FF's with the corresponding original bytes.
32 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
33 * Portions Copyright (c) 1994, Regents of the University of California
38 *-------------------------------------------------------------------------
40 #include "postgres_fe.h"
48 #include "variables.h"
52 * We use a stack of flex buffers to handle substitution of psql variables.
53 * Each stacked buffer contains the as-yet-unread text from one psql variable.
54 * When we pop the stack all the way, we resume reading from the outer buffer
55 * identified by scanbufhandle.
57 typedef struct StackElem
59 YY_BUFFER_STATE buf; /* flex input control structure */
60 char *bufstring; /* data actually being scanned by flex */
61 char *origstring; /* copy of original data, if needed */
62 struct StackElem *next;
66 * All working state of the lexer must be stored in PsqlScanStateData
67 * between calls. This allows us to have multiple open lexer operations,
68 * which is needed for nested include files. The lexer itself is not
69 * recursive, but it must be re-entrant.
71 typedef struct PsqlScanStateData
73 StackElem *buffer_stack; /* stack of variable expansion buffers */
75 * These variables always refer to the outer buffer, never to any
76 * stacked variable-expansion buffer.
78 YY_BUFFER_STATE scanbufhandle;
79 char *scanbuf; /* start of outer-level input buffer */
80 const char *scanline; /* current input line at outer level */
82 /* safe_encoding, curline, refline are used by emit() to replace FFs */
83 int encoding; /* encoding being used now */
84 bool safe_encoding; /* is current encoding "safe"? */
85 const char *curline; /* actual flex input string for cur buf */
86 const char *refline; /* original data for cur buffer */
89 * All this state lives across successive input lines, until explicitly
90 * reset by psql_scan_reset.
92 int start_state; /* saved YY_START */
93 int paren_depth; /* depth of nesting in parentheses */
94 int xcdepth; /* depth of nesting in slash-star comments */
95 char *dolqstart; /* current $foo$ quote start string */
98 static PsqlScanState cur_state; /* current state while active */
100 static PQExpBuffer output_buf; /* current output buffer */
102 /* these variables do not need to be saved across calls */
103 static enum slash_option_type option_type;
104 static char *option_quote;
107 /* Return values from yylex() */
108 #define LEXRES_EOL 0 /* end of input */
109 #define LEXRES_SEMI 1 /* command-terminating semicolon found */
110 #define LEXRES_BACKSLASH 2 /* backslash command start */
111 #define LEXRES_OK 3 /* OK completion of backslash argument */
116 static void push_new_buffer(const char *newstr);
117 static YY_BUFFER_STATE prepare_buffer(const char *txt, int len,
119 static void emit(const char *txt, int len);
121 #define ECHO emit(yytext, yyleng)
126 %option never-interactive
133 * All of the following definitions and rules should exactly match
134 * src/backend/parser/scan.l so far as the flex patterns are concerned.
135 * The rule bodies are just ECHO as opposed to what the backend does,
136 * however. (But be sure to duplicate code that affects the lexing process,
137 * such as BEGIN().) Also, psqlscan uses a single <<EOF>> rule whereas
138 * scan.l has a separate one for each exclusive state.
142 * OK, here is a short description of lex/flex rules behavior.
143 * The longest pattern which matches an input string is always chosen.
144 * For equal-length patterns, the first occurring in the rules list is chosen.
145 * INITIAL is the starting state, to which all non-conditional rules apply.
146 * Exclusive states change parsing rules while the state is active. When in
147 * an exclusive state, only those rules defined for that state apply.
149 * We use exclusive states for quoted strings, extended comments,
150 * and to eliminate parsing troubles for numeric strings.
152 * <xb> bit string literal
153 * <xc> extended C-style comments
154 * <xd> delimited identifiers (double-quoted identifiers)
155 * <xh> hexadecimal numeric string
156 * <xq> standard quoted strings
157 * <xe> extended quoted strings (support backslash escape sequences)
158 * <xdolq> $foo$ quoted strings
159 * <xui> quoted identifier with Unicode escapes
160 * <xus> quoted string with Unicode escapes
172 /* Additional exclusive states for psql only: lex backslash commands */
183 * In order to make the world safe for Windows and Mac clients as well as
184 * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
185 * sequence will be seen as two successive newlines, but that doesn't cause
186 * any problems. Comments that start with -- and extend to the next
187 * newline are treated as equivalent to a single whitespace character.
189 * NOTE a fine point: if there is no newline following --, we will absorb
190 * everything to the end of the input as a comment. This is correct. Older
191 * versions of Postgres failed to recognize -- as a comment if the input
192 * did not end with a newline.
194 * XXX perhaps \f (formfeed) should be treated as a newline as well?
202 comment ("--"{non_newline}*)
204 whitespace ({space}+|{comment})
207 * SQL requires at least one newline in the whitespace separating
208 * string literals that are to be concatenated. Silly, but who are we
209 * to argue? Note that {whitespace_with_newline} should not have * after
210 * it, whereas {whitespace} should generally have a * after it...
213 special_whitespace ({space}+|{comment}{newline})
214 horiz_whitespace ({horiz_space}|{comment})
215 whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
218 * To ensure that {quotecontinue} can be scanned without having to back up
219 * if the full pattern isn't matched, we include trailing whitespace in
220 * {quotestop}. This matches all cases where {quotecontinue} fails to match,
221 * except for {quote} followed by whitespace and just one "-" (not two,
222 * which would start a {comment}). To cover that we have {quotefail}.
223 * The actions for {quotestop} and {quotefail} must throw back characters
224 * beyond the quote proper.
227 quotestop {quote}{whitespace}*
228 quotecontinue {quote}{whitespace_with_newline}{quote}
229 quotefail {quote}{whitespace}*"-"
232 * It is tempting to scan the string for only those characters
233 * which are allowed. However, this leads to silently swallowed
234 * characters if illegal characters are included in the string.
235 * For example, if xbinside is [01] then B'ABCD' is interpreted
236 * as a zero-length string, and the ABCD' is lost!
237 * Better to pass the string forward and let the input routines
238 * validate the contents.
243 /* Hexadecimal number */
247 /* National character */
250 /* Quoted string that allows backslash escapes */
254 xeoctesc [\\][0-7]{1,3}
255 xehexesc [\\]x[0-9A-Fa-f]{1,2}
258 * xqdouble implements embedded quote, ''''
261 xqdouble {quote}{quote}
264 /* $foo$ style quotes ("dollar quoting")
265 * The quoted string starts with $foo$ where "foo" is an optional string
266 * in the form of an identifier, except that it may not contain "$",
267 * and extends to the first occurrence of an identical string.
268 * There is *no* processing of the quoted text.
270 * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
271 * fails to match its trailing "$".
273 dolq_start [A-Za-z\200-\377_]
274 dolq_cont [A-Za-z\200-\377_0-9]
275 dolqdelim \$({dolq_start}{dolq_cont}*)?\$
276 dolqfailed \${dolq_start}{dolq_cont}*
280 * Allows embedded spaces and other special characters into identifiers.
285 xddouble {dquote}{dquote}
288 /* Unicode escapes */
289 uescape [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
290 /* error rule to avoid backup */
291 uescapefail ("-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU])
293 /* Quoted identifier with Unicode escapes */
294 xuistart [uU]&{dquote}
295 xuistop1 {dquote}{whitespace}*{uescapefail}?
296 xuistop2 {dquote}{whitespace}*{uescape}
298 /* Quoted string with Unicode escapes */
299 xusstart [uU]&{quote}
300 xusstop1 {quote}{whitespace}*{uescapefail}?
301 xusstop2 {quote}{whitespace}*{uescape}
303 /* error rule to avoid backup */
309 * The "extended comment" syntax closely resembles allowable operator syntax.
310 * The tricky part here is to get lex to recognize a string starting with
311 * slash-star as a comment, when interpreting it as an operator would produce
312 * a longer match --- remember lex will prefer a longer match! Also, if we
313 * have something like plus-slash-star, lex will think this is a 3-character
314 * operator whereas we want to see it as a + operator and a comment start.
315 * The solution is two-fold:
316 * 1. append {op_chars}* to xcstart so that it matches as much text as
317 * {operator} would. Then the tie-breaker (first matching rule of same
318 * length) ensures xcstart wins. We put back the extra stuff with yyless()
319 * in case it contains a star-slash that should terminate the comment.
320 * 2. In the operator rule, check for slash-star within the operator, and
321 * if found throw it back with yyless(). This handles the plus-slash-star
323 * Dash-dash comments have similar interactions with the operator rule.
325 xcstart \/\*{op_chars}*
330 ident_start [A-Za-z\200-\377_]
331 ident_cont [A-Za-z\200-\377_0-9\$]
333 identifier {ident_start}{ident_cont}*
338 * "self" is the set of chars that should be returned as single-character
339 * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
340 * which can be one or more characters long (but if a single-char token
341 * appears in the "self" set, it is not to be returned as an Op). Note
342 * that the sets overlap, but each has some chars that are not in the other.
344 * If you change either set, adjust the character lists appearing in the
345 * rule for "operator"!
347 self [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
348 op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
351 /* we no longer allow unary minus in numbers.
352 * instead we pass it separately to parser. there it gets
353 * coerced via doNegate() -- Leon aug 20 1999
355 * {realfail1} and {realfail2} are added to prevent the need for scanner
356 * backup when the {real} rule fails to match completely.
360 decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
361 real ({integer}|{decimal})[Ee][-+]?{digit}+
362 realfail1 ({integer}|{decimal})[Ee]
363 realfail2 ({integer}|{decimal})[Ee][-+]
370 * Dollar quoted strings are totally opaque, and no escaping is done on them.
371 * Other quoted strings must allow some special characters such as single-quote
373 * Embedded single-quotes are implemented both in the SQL standard
374 * style of two adjacent single quotes "''" and in the Postgres/Java style
375 * of escaped-quote "\'".
376 * Other embedded escaped characters are matched explicitly and the leading
377 * backslash is dropped from the string.
378 * Note that xcstart must appear before operator, as explained above!
379 * Also whitespace (comment) must appear before operator.
386 * Note that the whitespace rule includes both true
387 * whitespace and single-line ("--" style) comments.
388 * We suppress whitespace at the start of the query
389 * buffer. We also suppress all single-line comments,
390 * which is pretty dubious but is the historical
393 if (!(output_buf->len == 0 || yytext[0] == '-'))
398 cur_state->xcdepth = 0;
400 /* Put back any characters past slash-star; see above */
406 cur_state->xcdepth++;
407 /* Put back any characters past slash-star; see above */
413 if (cur_state->xcdepth <= 0)
418 cur_state->xcdepth--;
448 <xh>{quotecontinue} |
449 <xb>{quotecontinue} {
454 /* Hexadecimal bit type.
455 * At some point we should simply pass the string
456 * forward to the parser and label it there.
457 * In the meantime, place a leading "x" on the string
458 * to mark it for the input routine as a hex string.
471 yyless(1); /* eat only 'n' this time */
476 if (standard_strings())
505 <xq,xe,xus>{xqdouble} {
523 <xq,xe,xus>{quotecontinue} {
527 /* This is only needed for \ just before EOF */
532 cur_state->dolqstart = pg_strdup(yytext);
537 /* throw back all but the initial "$" */
542 if (strcmp(yytext, cur_state->dolqstart) == 0)
544 free(cur_state->dolqstart);
545 cur_state->dolqstart = NULL;
551 * When we fail to match $...$ to dolqstart, transfer
552 * the $... part to the output, but put back the final
553 * $ for rescanning. Consider $delim$...$junk$delim$
559 <xdolq>{dolqinside} {
562 <xdolq>{dolqfailed} {
566 /* This is only needed for $ inside the quoted text */
599 /* throw back all but the initial u/U */
609 * These rules are specific to psql --- they implement parenthesis
610 * counting and detection of command-ending semicolon. These must
611 * appear before the {self} rule so that they take precedence over it.
615 cur_state->paren_depth++;
620 if (cur_state->paren_depth > 0)
621 cur_state->paren_depth--;
627 if (cur_state->paren_depth == 0)
629 /* Terminate lexing temporarily */
635 * psql-specific rules to handle backslash commands and variable
636 * substitution. We want these before {self}, also.
640 /* Force a semicolon or colon into the query buffer */
645 /* Terminate lexing temporarily */
646 return LEXRES_BACKSLASH;
650 /* Possible psql variable substitution */
653 value = GetVariable(pset.vars, yytext + 1);
657 /* It is a variable, perform substitution */
658 push_new_buffer(value);
659 /* yy_scan_string already made buffer active */
664 * if the variable doesn't exist we'll copy the
672 * Back to backend-compatible rules.
681 * Check for embedded slash-star or dash-dash; those
682 * are comment starts, so operator must stop there.
683 * Note that slash-star or dash-dash at the first
684 * character will match a prior rule, not this one.
687 char *slashstar = strstr(yytext, "/*");
688 char *dashdash = strstr(yytext, "--");
690 if (slashstar && dashdash)
692 /* if both appear, take the first one */
693 if (slashstar > dashdash)
694 slashstar = dashdash;
697 slashstar = dashdash;
699 nchars = slashstar - yytext;
702 * For SQL compatibility, '+' and '-' cannot be the
703 * last char of a multi-char operator unless the operator
704 * contains chars that are not in SQL operators.
705 * The idea is to lex '=-' as two operators, but not
706 * to forbid operator names like '?-' that could not be
707 * sequences of SQL operators.
710 (yytext[nchars-1] == '+' ||
711 yytext[nchars-1] == '-'))
715 for (ic = nchars-2; ic >= 0; ic--)
717 if (strchr("~!@#^&|`?%", yytext[ic]))
721 break; /* found a char that makes it OK */
722 nchars--; /* else remove the +/-, and check again */
727 /* Strip the unwanted chars from the token */
748 * throw back the [Ee], and treat as {decimal}. Note
749 * that it is possible the input is actually {integer},
750 * but since this case will almost certainly lead to a
751 * syntax error anyway, we don't bother to distinguish.
757 /* throw back the [Ee][+-], and proceed as above */
773 * Everything from here down is psql-specific.
777 StackElem *stackelem = cur_state->buffer_stack;
779 if (stackelem == NULL)
780 return LEXRES_EOL; /* end of input reached */
783 * We were expanding a variable, so pop the inclusion
784 * stack and keep lexing
786 cur_state->buffer_stack = stackelem->next;
787 yy_delete_buffer(stackelem->buf);
788 free(stackelem->bufstring);
789 if (stackelem->origstring)
790 free(stackelem->origstring);
793 stackelem = cur_state->buffer_stack;
794 if (stackelem != NULL)
796 yy_switch_to_buffer(stackelem->buf);
797 cur_state->curline = stackelem->bufstring;
798 cur_state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
802 yy_switch_to_buffer(cur_state->scanbufhandle);
803 cur_state->curline = cur_state->scanbuf;
804 cur_state->refline = cur_state->scanline;
809 * Exclusive lexer states to handle backslash command lexing
813 /* command name ends at whitespace or backslash; eat all else */
825 /* eat any whitespace, then decide what to do at first nonblank */
831 * backslash is end of command or next command, do not eat
833 * XXX this means we can't conveniently accept options
834 * that start with a backslash; therefore, option
835 * processing that encourages use of backslashes is rather
843 *option_quote = '\'';
848 if (option_type == OT_VERBATIM)
850 /* in verbatim mode, backquote is not special */
852 BEGIN(xslashdefaultarg);
857 BEGIN(xslashbackquote);
862 /* Possible psql variable substitution */
863 if (option_type == OT_VERBATIM)
869 value = GetVariable(pset.vars, yytext + 1);
872 * The variable value is just emitted without any
873 * further examination. This is consistent with the
874 * pre-8.0 code behavior, if not with the way that
875 * variables are handled outside backslash commands.
878 appendPQExpBufferStr(output_buf, value);
888 if (option_type == OT_FILEPIPE)
890 /* treat like whole-string case */
891 BEGIN(xslashwholeline);
895 /* treat like default case */
896 BEGIN(xslashdefaultarg);
903 BEGIN(xslashquotedarg);
908 BEGIN(xslashdefaultarg);
915 * single-quoted text: copy literally except for '' and backslash
919 {quote} { return LEXRES_OK; }
921 {xqdouble} { appendPQExpBufferChar(output_buf, '\''); }
923 "\\n" { appendPQExpBufferChar(output_buf, '\n'); }
924 "\\t" { appendPQExpBufferChar(output_buf, '\t'); }
925 "\\b" { appendPQExpBufferChar(output_buf, '\b'); }
926 "\\r" { appendPQExpBufferChar(output_buf, '\r'); }
927 "\\f" { appendPQExpBufferChar(output_buf, '\f'); }
931 appendPQExpBufferChar(output_buf,
932 (char) strtol(yytext + 1, NULL, 8));
937 appendPQExpBufferChar(output_buf,
938 (char) strtol(yytext + 2, NULL, 16));
941 "\\". { emit(yytext + 1, 1); }
949 * backticked text: copy everything until next backquote or end of line.
950 * Invocation of the command will happen in psql_scan_slash_option.
953 "`" { return LEXRES_OK; }
961 * Copy everything until unquoted whitespace or end of line. Quotes
962 * do not get stripped yet.
972 * unquoted backslash is end of command or next command,
975 * (this was not the behavior pre-8.0, but it seems
985 BEGIN(xslashquotedarg);
993 /* double-quoted text within a default-type argument: copy */
997 BEGIN(xslashdefaultarg);
1000 {other}|\n { ECHO; }
1005 /* copy everything until end of input line */
1006 /* but suppress leading whitespace */
1009 if (output_buf->len > 0)
1018 /* at end of command, eat a double backslash, but not anything else */
1020 "\\\\" { return LEXRES_OK; }
1032 * Create a lexer working state struct.
1035 psql_scan_create(void)
1037 PsqlScanState state;
1039 state = (PsqlScanStateData *) pg_malloc_zero(sizeof(PsqlScanStateData));
1041 psql_scan_reset(state);
1047 * Destroy a lexer working state struct, releasing all resources.
1050 psql_scan_destroy(PsqlScanState state)
1052 psql_scan_finish(state);
1054 psql_scan_reset(state);
1060 * Set up to perform lexing of the given input line.
1062 * The text at *line, extending for line_len bytes, will be scanned by
1063 * subsequent calls to the psql_scan routines. psql_scan_finish should
1064 * be called when scanning is complete. Note that the lexer retains
1065 * a pointer to the storage at *line --- this string must not be altered
1066 * or freed until after psql_scan_finish is called.
1069 psql_scan_setup(PsqlScanState state,
1070 const char *line, int line_len)
1072 /* Mustn't be scanning already */
1073 psql_assert(state->scanbufhandle == NULL);
1074 psql_assert(state->buffer_stack == NULL);
1076 /* Do we need to hack the character set encoding? */
1077 state->encoding = pset.encoding;
1078 state->safe_encoding = pg_valid_server_encoding_id(state->encoding);
1080 /* needed for prepare_buffer */
1083 /* Set up flex input buffer with appropriate translation and padding */
1084 state->scanbufhandle = prepare_buffer(line, line_len,
1086 state->scanline = line;
1088 /* Set lookaside data in case we have to map unsafe encoding */
1089 state->curline = state->scanbuf;
1090 state->refline = state->scanline;
1094 * Do lexical analysis of SQL command text.
1096 * The text previously passed to psql_scan_setup is scanned, and appended
1097 * (possibly with transformation) to query_buf.
1099 * The return value indicates the condition that stopped scanning:
1101 * PSCAN_SEMICOLON: found a command-ending semicolon. (The semicolon is
1102 * transferred to query_buf.) The command accumulated in query_buf should
1103 * be executed, then clear query_buf and call again to scan the remainder
1106 * PSCAN_BACKSLASH: found a backslash that starts a psql special command.
1107 * Any previous data on the line has been transferred to query_buf.
1108 * The caller will typically next call psql_scan_slash_command(),
1109 * perhaps psql_scan_slash_option(), and psql_scan_slash_command_end().
1111 * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
1112 * incomplete SQL command. *prompt is set to the appropriate prompt type.
1114 * PSCAN_EOL: the end of the line was reached, and there is no lexical
1115 * reason to consider the command incomplete. The caller may or may not
1116 * choose to send it. *prompt is set to the appropriate prompt type if
1117 * the caller chooses to collect more input.
1119 * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
1120 * be called next, then the cycle may be repeated with a fresh input line.
1122 * In all cases, *prompt is set to an appropriate prompt type code for the
1123 * next line-input operation.
1126 psql_scan(PsqlScanState state,
1127 PQExpBuffer query_buf,
1128 promptStatus_t *prompt)
1130 PsqlScanResult result;
1133 /* Must be scanning already */
1134 psql_assert(state->scanbufhandle);
1136 /* Set up static variables that will be used by yylex */
1138 output_buf = query_buf;
1140 if (state->buffer_stack != NULL)
1141 yy_switch_to_buffer(state->buffer_stack->buf);
1143 yy_switch_to_buffer(state->scanbufhandle);
1145 BEGIN(state->start_state);
1148 lexresult = yylex();
1150 /* Update static vars back to the state struct */
1151 state->start_state = YY_START;
1154 * Check termination state and return appropriate result info.
1158 case LEXRES_EOL: /* end of input */
1159 switch (state->start_state)
1162 if (state->paren_depth > 0)
1164 result = PSCAN_INCOMPLETE;
1165 *prompt = PROMPT_PAREN;
1167 else if (query_buf->len > 0)
1170 *prompt = PROMPT_CONTINUE;
1174 /* never bother to send an empty buffer */
1175 result = PSCAN_INCOMPLETE;
1176 *prompt = PROMPT_READY;
1180 result = PSCAN_INCOMPLETE;
1181 *prompt = PROMPT_SINGLEQUOTE;
1184 result = PSCAN_INCOMPLETE;
1185 *prompt = PROMPT_COMMENT;
1188 result = PSCAN_INCOMPLETE;
1189 *prompt = PROMPT_DOUBLEQUOTE;
1192 result = PSCAN_INCOMPLETE;
1193 *prompt = PROMPT_SINGLEQUOTE;
1196 result = PSCAN_INCOMPLETE;
1197 *prompt = PROMPT_SINGLEQUOTE;
1200 result = PSCAN_INCOMPLETE;
1201 *prompt = PROMPT_SINGLEQUOTE;
1204 result = PSCAN_INCOMPLETE;
1205 *prompt = PROMPT_DOLLARQUOTE;
1208 /* can't get here */
1209 fprintf(stderr, "invalid YY_START\n");
1213 case LEXRES_SEMI: /* semicolon */
1214 result = PSCAN_SEMICOLON;
1215 *prompt = PROMPT_READY;
1217 case LEXRES_BACKSLASH: /* backslash */
1218 result = PSCAN_BACKSLASH;
1219 *prompt = PROMPT_READY;
1222 /* can't get here */
1223 fprintf(stderr, "invalid yylex result\n");
1231 * Clean up after scanning a string. This flushes any unread input and
1232 * releases resources (but not the PsqlScanState itself). Note however
1233 * that this does not reset the lexer scan state; that can be done by
1234 * psql_scan_reset(), which is an orthogonal operation.
1236 * It is legal to call this when not scanning anything (makes it easier
1237 * to deal with error recovery).
1240 psql_scan_finish(PsqlScanState state)
1242 /* Drop any incomplete variable expansions. */
1243 while (state->buffer_stack != NULL)
1245 StackElem *stackelem = state->buffer_stack;
1247 state->buffer_stack = stackelem->next;
1248 yy_delete_buffer(stackelem->buf);
1249 free(stackelem->bufstring);
1250 if (stackelem->origstring)
1251 free(stackelem->origstring);
1255 /* Done with the outer scan buffer, too */
1256 if (state->scanbufhandle)
1257 yy_delete_buffer(state->scanbufhandle);
1258 state->scanbufhandle = NULL;
1260 free(state->scanbuf);
1261 state->scanbuf = NULL;
1265 * Reset lexer scanning state to start conditions. This is appropriate
1266 * for executing \r psql commands (or any other time that we discard the
1267 * prior contents of query_buf). It is not, however, necessary to do this
1268 * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
1269 * PSCAN_EOL scan result, because the scan state must be INITIAL when those
1270 * conditions are returned.
1272 * Note that this is unrelated to flushing unread input; that task is
1273 * done by psql_scan_finish().
1276 psql_scan_reset(PsqlScanState state)
1278 state->start_state = INITIAL;
1279 state->paren_depth = 0;
1280 state->xcdepth = 0; /* not really necessary */
1281 if (state->dolqstart)
1282 free(state->dolqstart);
1283 state->dolqstart = NULL;
1287 * Return true if lexer is currently in an "inside quotes" state.
1289 * This is pretty grotty but is needed to preserve the old behavior
1290 * that mainloop.c drops blank lines not inside quotes without even
1294 psql_scan_in_quote(PsqlScanState state)
1296 return state->start_state != INITIAL;
1300 * Scan the command name of a psql backslash command. This should be called
1301 * after psql_scan() returns PSCAN_BACKSLASH. It is assumed that the input
1302 * has been consumed through the leading backslash.
1304 * The return value is a malloc'd copy of the command name, as parsed off
1308 psql_scan_slash_command(PsqlScanState state)
1310 PQExpBufferData mybuf;
1313 /* Must be scanning already */
1314 psql_assert(state->scanbufhandle);
1316 /* Build a local buffer that we'll return the data of */
1317 initPQExpBuffer(&mybuf);
1319 /* Set up static variables that will be used by yylex */
1321 output_buf = &mybuf;
1323 if (state->buffer_stack != NULL)
1324 yy_switch_to_buffer(state->buffer_stack->buf);
1326 yy_switch_to_buffer(state->scanbufhandle);
1331 lexresult = yylex();
1333 /* There are no possible errors in this lex state... */
1339 * Parse off the next argument for a backslash command, and return it as a
1340 * malloc'd string. If there are no more arguments, returns NULL.
1342 * type tells what processing, if any, to perform on the option string;
1343 * for example, if it's a SQL identifier, we want to downcase any unquoted
1346 * if quote is not NULL, *quote is set to 0 if no quoting was found, else
1349 * if semicolon is true, unquoted trailing semicolon(s) that would otherwise
1350 * be taken as part of the option string will be stripped.
1352 * NOTE: the only possible syntax errors for backslash options are unmatched
1353 * quotes, which are detected when we run out of input. Therefore, on a
1354 * syntax error we just throw away the string and return NULL; there is no
1355 * need to worry about flushing remaining input.
1358 psql_scan_slash_option(PsqlScanState state,
1359 enum slash_option_type type,
1363 PQExpBufferData mybuf;
1368 /* Must be scanning already */
1369 psql_assert(state->scanbufhandle);
1372 quote = &local_quote;
1375 /* Build a local buffer that we'll return the data of */
1376 initPQExpBuffer(&mybuf);
1378 /* Set up static variables that will be used by yylex */
1380 output_buf = &mybuf;
1382 option_quote = quote;
1384 if (state->buffer_stack != NULL)
1385 yy_switch_to_buffer(state->buffer_stack->buf);
1387 yy_switch_to_buffer(state->scanbufhandle);
1389 if (type == OT_WHOLE_LINE)
1390 BEGIN(xslashwholeline);
1395 lexresult = yylex();
1398 * Check the lex result: we should have gotten back either LEXRES_OK
1399 * or LEXRES_EOL (the latter indicating end of string). If we were inside
1400 * a quoted string, as indicated by YY_START, EOL is an error.
1402 psql_assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK);
1407 /* empty arg, or possibly a psql variable substitution */
1410 if (lexresult != LEXRES_OK)
1411 badarg = true; /* hit EOL not ending quote */
1413 case xslashbackquote:
1414 if (lexresult != LEXRES_OK)
1415 badarg = true; /* hit EOL not ending quote */
1418 /* Perform evaluation of backticked command */
1419 char *cmd = mybuf.data;
1422 PQExpBufferData output;
1426 fd = popen(cmd, PG_BINARY_R);
1429 psql_error("%s: %s\n", cmd, strerror(errno));
1433 initPQExpBuffer(&output);
1439 result = fread(buf, 1, sizeof(buf), fd);
1442 psql_error("%s: %s\n", cmd, strerror(errno));
1446 appendBinaryPQExpBuffer(&output, buf, result);
1447 } while (!feof(fd));
1450 if (fd && pclose(fd) == -1)
1452 psql_error("%s: %s\n", cmd, strerror(errno));
1456 if (PQExpBufferBroken(&output))
1458 psql_error("%s: out of memory\n", cmd);
1462 /* Now done with cmd, transfer result to mybuf */
1463 resetPQExpBuffer(&mybuf);
1467 /* strip any trailing newline */
1468 if (output.len > 0 &&
1469 output.data[output.len - 1] == '\n')
1471 appendBinaryPQExpBuffer(&mybuf, output.data, output.len);
1474 termPQExpBuffer(&output);
1477 case xslashdefaultarg:
1478 /* Strip any trailing semi-colons if requested */
1481 while (mybuf.len > 0 &&
1482 mybuf.data[mybuf.len - 1] == ';')
1484 mybuf.data[--mybuf.len] = '\0';
1489 * If SQL identifier processing was requested, then we strip out
1490 * excess double quotes and downcase unquoted letters.
1491 * Doubled double-quotes become output double-quotes, per spec.
1493 * Note that a string like FOO"BAR"BAZ will be converted to
1494 * fooBARbaz; this is somewhat inconsistent with the SQL spec,
1495 * which would have us parse it as several identifiers. But
1496 * for psql's purposes, we want a string like "foo"."bar" to
1497 * be treated as one option, so there's little choice.
1499 if (type == OT_SQLID || type == OT_SQLIDHACK)
1501 bool inquotes = false;
1502 char *cp = mybuf.data;
1508 if (inquotes && cp[1] == '"')
1510 /* Keep the first quote, remove the second */
1513 inquotes = !inquotes;
1514 /* Collapse out quote at *cp */
1515 memmove(cp, cp + 1, strlen(cp));
1517 /* do not advance cp */
1521 if (!inquotes && type == OT_SQLID)
1522 *cp = pg_tolower((unsigned char) *cp);
1523 cp += PQmblen(cp, pset.encoding);
1528 case xslashquotedarg:
1529 /* must have hit EOL inside double quotes */
1532 case xslashwholeline:
1536 /* can't get here */
1537 fprintf(stderr, "invalid YY_START\n");
1543 psql_error("unterminated quoted string\n");
1544 termPQExpBuffer(&mybuf);
1549 * An unquoted empty argument isn't possible unless we are at end of
1550 * command. Return NULL instead.
1552 if (mybuf.len == 0 && *quote == 0)
1554 termPQExpBuffer(&mybuf);
1558 /* Else return the completed string. */
1563 * Eat up any unused \\ to complete a backslash command.
1566 psql_scan_slash_command_end(PsqlScanState state)
1570 /* Must be scanning already */
1571 psql_assert(state->scanbufhandle);
1573 /* Set up static variables that will be used by yylex */
1577 if (state->buffer_stack != NULL)
1578 yy_switch_to_buffer(state->buffer_stack->buf);
1580 yy_switch_to_buffer(state->scanbufhandle);
1585 lexresult = yylex();
1587 /* There are no possible errors in this lex state... */
1592 * Push the given string onto the stack of stuff to scan.
1594 * cur_state must point to the active PsqlScanState.
1596 * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1599 push_new_buffer(const char *newstr)
1601 StackElem *stackelem;
1603 stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
1604 stackelem->buf = prepare_buffer(newstr, strlen(newstr),
1605 &stackelem->bufstring);
1606 cur_state->curline = stackelem->bufstring;
1607 if (cur_state->safe_encoding)
1609 stackelem->origstring = NULL;
1610 cur_state->refline = stackelem->bufstring;
1614 stackelem->origstring = pg_strdup(newstr);
1615 cur_state->refline = stackelem->origstring;
1617 stackelem->next = cur_state->buffer_stack;
1618 cur_state->buffer_stack = stackelem;
1622 * Set up a flex input buffer to scan the given data. We always make a
1623 * copy of the data. If working in an unsafe encoding, the copy has
1624 * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
1626 * cur_state must point to the active PsqlScanState.
1628 * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1630 static YY_BUFFER_STATE
1631 prepare_buffer(const char *txt, int len, char **txtcopy)
1635 /* Flex wants two \0 characters after the actual data */
1636 newtxt = pg_malloc(len + 2);
1638 newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
1640 if (cur_state->safe_encoding)
1641 memcpy(newtxt, txt, len);
1644 /* Gotta do it the hard way */
1649 int thislen = PQmblen(txt + i, cur_state->encoding);
1651 /* first byte should always be okay... */
1654 while (--thislen > 0)
1655 newtxt[i++] = (char) 0xFF;
1659 return yy_scan_buffer(newtxt, len + 2);
1663 * emit() --- body for ECHO macro
1665 * NB: this must be used for ALL and ONLY the text copied from the flex
1666 * input data. If you pass it something that is not part of the yytext
1667 * string, you are making a mistake. Internally generated text can be
1668 * appended directly to output_buf.
1671 emit(const char *txt, int len)
1673 if (cur_state->safe_encoding)
1674 appendBinaryPQExpBuffer(output_buf, txt, len);
1677 /* Gotta do it the hard way */
1678 const char *reference = cur_state->refline;
1681 reference += (txt - cur_state->curline);
1683 for (i = 0; i < len; i++)
1687 if (ch == (char) 0xFF)
1689 appendPQExpBufferChar(output_buf, ch);