2 /*-------------------------------------------------------------------------
5 * lexical scanner for SQL commands
7 * This lexer used to be part of psql, and that heritage is reflected in
8 * the file name as well as function and typedef names, though it can now
9 * be used by other frontend programs as well. It's also possible to extend
10 * this lexer with a compatible add-on lexer to handle program-specific
13 * This code is mainly concerned with determining where the end of a SQL
14 * statement is: we are looking for semicolons that are not within quotes,
15 * comments, or parentheses. The most reliable way to handle this is to
16 * borrow the backend's flex lexer rules, lock, stock, and barrel. The rules
17 * below are (except for a few) the same as the backend's, but their actions
18 * are just ECHO whereas the backend's actions generally do other things.
20 * XXX The rules in this file must be kept in sync with the backend lexer!!!
22 * XXX Avoid creating backtracking cases --- see the backend lexer for info.
24 * See psqlscan_int.h for additional commentary.
27 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
28 * Portions Copyright (c) 1994, Regents of the University of California
31 * src/fe_utils/psqlscan.l
33 *-------------------------------------------------------------------------
35 #include "postgres_fe.h"
37 #include "common/logging.h"
38 #include "fe_utils/psqlscan.h"
47 #include "fe_utils/psqlscan_int.h"
50 * We must have a typedef YYSTYPE for yylex's first argument, but this lexer
51 * doesn't presently make use of that argument, so just declare it as int.
56 /* Return values from yylex() */
57 #define LEXRES_EOL 0 /* end of input */
58 #define LEXRES_SEMI 1 /* command-terminating semicolon found */
59 #define LEXRES_BACKSLASH 2 /* backslash command start */
62 #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
65 * Work around a bug in flex 2.5.35: it emits a couple of functions that
66 * it forgets to emit declarations for. Since we use -Wmissing-prototypes,
67 * this would cause warnings. Providing our own declarations should be
68 * harmless even when the bug gets fixed.
70 extern int psql_yyget_column(yyscan_t yyscanner);
71 extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
78 %option never-interactive
84 %option prefix="psql_yy"
87 * Set the type of yyextra; we use it as a pointer back to the containing
90 %option extra-type="PsqlScanState"
93 * All of the following definitions and rules should exactly match
94 * src/backend/parser/scan.l so far as the flex patterns are concerned.
95 * The rule bodies are just ECHO as opposed to what the backend does,
96 * however. (But be sure to duplicate code that affects the lexing process,
97 * such as BEGIN() and yyless().) Also, psqlscan uses a single <<EOF>> rule
98 * whereas scan.l has a separate one for each exclusive state.
102 * OK, here is a short description of lex/flex rules behavior.
103 * The longest pattern which matches an input string is always chosen.
104 * For equal-length patterns, the first occurring in the rules list is chosen.
105 * INITIAL is the starting state, to which all non-conditional rules apply.
106 * Exclusive states change parsing rules while the state is active. When in
107 * an exclusive state, only those rules defined for that state apply.
109 * We use exclusive states for quoted strings, extended comments,
110 * and to eliminate parsing troubles for numeric strings.
112 * <xb> bit string literal
113 * <xc> extended C-style comments
114 * <xd> delimited identifiers (double-quoted identifiers)
115 * <xh> hexadecimal byte string
116 * <xq> standard quoted strings
117 * <xqs> quote stop (detect continued strings)
118 * <xe> extended quoted strings (support backslash escape sequences)
119 * <xdolq> $foo$ quoted strings
120 * <xui> quoted identifier with Unicode escapes
121 * <xus> quoted string with Unicode escapes
123 * Note: we intentionally don't mimic the backend's <xeu> state; we have
124 * no need to distinguish it from <xe> state, and no good way to get out
125 * of it in error cases. The backend just throws yyerror() in those
126 * cases, but that's not an option here.
141 * In order to make the world safe for Windows and Mac clients as well as
142 * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
143 * sequence will be seen as two successive newlines, but that doesn't cause
144 * any problems. Comments that start with -- and extend to the next
145 * newline are treated as equivalent to a single whitespace character.
147 * NOTE a fine point: if there is no newline following --, we will absorb
148 * everything to the end of the input as a comment. This is correct. Older
149 * versions of Postgres failed to recognize -- as a comment if the input
150 * did not end with a newline.
152 * non_newline_space tracks all space characters except newlines.
154 * XXX if you change the set of whitespace characters, fix scanner_isspace()
159 non_newline_space [ \t\f\v]
163 comment ("--"{non_newline}*)
165 whitespace ({space}+|{comment})
168 * SQL requires at least one newline in the whitespace separating
169 * string literals that are to be concatenated. Silly, but who are we
170 * to argue? Note that {whitespace_with_newline} should not have * after
171 * it, whereas {whitespace} should generally have a * after it...
174 special_whitespace ({space}+|{comment}{newline})
175 non_newline_whitespace ({non_newline_space}|{comment})
176 whitespace_with_newline ({non_newline_whitespace}*{newline}{special_whitespace}*)
179 /* If we see {quote} then {quotecontinue}, the quoted string continues */
180 quotecontinue {whitespace_with_newline}{quote}
183 * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
184 * {quotecontinue}. It might seem that this could just be {whitespace}*,
185 * but if there's a dash after {whitespace_with_newline}, it must be consumed
186 * to see if there's another dash --- which would start a {comment} and thus
187 * allow continuation of the {quotecontinue} token.
189 quotecontinuefail {whitespace}*"-"?
192 * It is tempting to scan the string for only those characters
193 * which are allowed. However, this leads to silently swallowed
194 * characters if illegal characters are included in the string.
195 * For example, if xbinside is [01] then B'ABCD' is interpreted
196 * as a zero-length string, and the ABCD' is lost!
197 * Better to pass the string forward and let the input routines
198 * validate the contents.
203 /* Hexadecimal byte string */
207 /* National character */
210 /* Quoted string that allows backslash escapes */
214 xeoctesc [\\][0-7]{1,3}
215 xehexesc [\\]x[0-9A-Fa-f]{1,2}
216 xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
217 xeunicodefail [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
220 * xqdouble implements embedded quote, ''''
223 xqdouble {quote}{quote}
226 /* $foo$ style quotes ("dollar quoting")
227 * The quoted string starts with $foo$ where "foo" is an optional string
228 * in the form of an identifier, except that it may not contain "$",
229 * and extends to the first occurrence of an identical string.
230 * There is *no* processing of the quoted text.
232 * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
233 * fails to match its trailing "$".
235 dolq_start [A-Za-z\200-\377_]
236 dolq_cont [A-Za-z\200-\377_0-9]
237 dolqdelim \$({dolq_start}{dolq_cont}*)?\$
238 dolqfailed \${dolq_start}{dolq_cont}*
242 * Allows embedded spaces and other special characters into identifiers.
247 xddouble {dquote}{dquote}
250 /* Quoted identifier with Unicode escapes */
251 xuistart [uU]&{dquote}
253 /* Quoted string with Unicode escapes */
254 xusstart [uU]&{quote}
256 /* error rule to avoid backup */
262 * The "extended comment" syntax closely resembles allowable operator syntax.
263 * The tricky part here is to get lex to recognize a string starting with
264 * slash-star as a comment, when interpreting it as an operator would produce
265 * a longer match --- remember lex will prefer a longer match! Also, if we
266 * have something like plus-slash-star, lex will think this is a 3-character
267 * operator whereas we want to see it as a + operator and a comment start.
268 * The solution is two-fold:
269 * 1. append {op_chars}* to xcstart so that it matches as much text as
270 * {operator} would. Then the tie-breaker (first matching rule of same
271 * length) ensures xcstart wins. We put back the extra stuff with yyless()
272 * in case it contains a star-slash that should terminate the comment.
273 * 2. In the operator rule, check for slash-star within the operator, and
274 * if found throw it back with yyless(). This handles the plus-slash-star
276 * Dash-dash comments have similar interactions with the operator rule.
278 xcstart \/\*{op_chars}*
282 ident_start [A-Za-z\200-\377_]
283 ident_cont [A-Za-z\200-\377_0-9\$]
285 identifier {ident_start}{ident_cont}*
287 /* Assorted special-case operators and operator-like tokens */
293 * These operator-like tokens (unlike the above ones) also match the {operator}
294 * rule, which means that they might be overridden by a longer match if they
295 * are followed by a comment start or a + or - character. Accordingly, if you
296 * add to this list, you must also add corresponding code to the {operator}
297 * block to return the correct token in such cases. (This is not needed in
298 * psqlscan.l since the token value is ignored there.)
307 * "self" is the set of chars that should be returned as single-character
308 * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
309 * which can be one or more characters long (but if a single-char token
310 * appears in the "self" set, it is not to be returned as an Op). Note
311 * that the sets overlap, but each has some chars that are not in the other.
313 * If you change either set, adjust the character lists appearing in the
314 * rule for "operator"!
316 self [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
317 op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
323 * Unary minus is not part of a number here. Instead we pass it separately to
324 * the parser, and there it gets coerced via doNegate().
326 * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
328 * {realfail} is added to prevent the need for scanner
329 * backup when the {real} rule fails to match completely.
336 decinteger {decdigit}(_?{decdigit})*
337 hexinteger 0[xX](_?{hexdigit})+
338 octinteger 0[oO](_?{octdigit})+
339 bininteger 0[bB](_?{bindigit})+
345 numeric (({decinteger}\.{decinteger}?)|(\.{decinteger}))
346 numericfail {decinteger}\.\.
348 real ({decinteger}|{numeric})[Ee][-+]?{decinteger}
349 realfail ({decinteger}|{numeric})[Ee][-+]
351 /* Positional parameters don't accept underscores. */
355 * An identifier immediately following an integer literal is disallowed because
356 * in some cases it's ambiguous what is meant: for example, 0x1234 could be
357 * either a hexinteger or a decinteger "0" and an identifier "x1234". We can
358 * detect such problems by seeing if integer_junk matches a longer substring
359 * than any of the XXXinteger patterns (decinteger, hexinteger, octinteger,
360 * bininteger). One "junk" pattern is sufficient because
361 * {decinteger}{identifier} will match all the same strings we'd match with
362 * {hexinteger}{identifier} etc.
364 * Note that the rule for integer_junk must appear after the ones for
365 * XXXinteger to make this work correctly: 0x1234 will match both hexinteger
366 * and integer_junk, and we need hexinteger to be chosen in that case.
368 * Also disallow strings matched by numeric_junk, real_junk and param_junk
371 integer_junk {decinteger}{identifier}
372 numeric_junk {numeric}{identifier}
373 real_junk {real}{identifier}
374 param_junk \${decdigit}+{identifier}
376 /* psql-specific: characters allowed in variable names */
377 variable_char [A-Za-z\200-\377_0-9]
382 * Dollar quoted strings are totally opaque, and no escaping is done on them.
383 * Other quoted strings must allow some special characters such as single-quote
385 * Embedded single-quotes are implemented both in the SQL standard
386 * style of two adjacent single quotes "''" and in the Postgres/Java style
387 * of escaped-quote "\'".
388 * Other embedded escaped characters are matched explicitly and the leading
389 * backslash is dropped from the string.
390 * Note that xcstart must appear before operator, as explained above!
391 * Also whitespace (comment) must appear before operator.
397 /* Declare some local variables inside yylex(), for convenience */
398 PsqlScanState cur_state = yyextra;
399 PQExpBuffer output_buf = cur_state->output_buf;
402 * Force flex into the state indicated by start_state. This has a
403 * couple of purposes: it lets some of the functions below set a new
404 * starting state without ugly direct access to flex variables, and it
405 * allows us to transition from one flex lexer to another so that we
406 * can lex different parts of the source string using separate lexers.
408 BEGIN(cur_state->start_state);
413 * Note that the whitespace rule includes both true
414 * whitespace and single-line ("--" style) comments.
415 * We suppress whitespace until we have collected some
416 * non-whitespace data. (This interacts with some
417 * decisions in MainLoop(); see there for details.)
419 if (output_buf->len > 0)
424 cur_state->xcdepth = 0;
426 /* Put back any characters past slash-star; see above */
433 cur_state->xcdepth++;
434 /* Put back any characters past slash-star; see above */
440 if (cur_state->xcdepth <= 0)
443 cur_state->xcdepth--;
470 /* Hexadecimal bit type.
471 * At some point we should simply pass the string
472 * forward to the parser and label it there.
473 * In the meantime, place a leading "x" on the string
474 * to mark it for the input routine as a hex string.
481 yyless(1); /* eat only 'n' this time */
486 if (cur_state->std_strings)
501 <xb,xh,xq,xe,xus>{quote} {
503 * When we are scanning a quoted string and see an end
504 * quote, we must look ahead for a possible continuation.
505 * If we don't see one, we know the end quote was in fact
506 * the end of the string. To reduce the lexer table size,
507 * we use a single "xqs" state to do the lookahead for all
510 cur_state->state_before_str_stop = YYSTATE;
514 <xqs>{quotecontinue} {
516 * Found a quote continuation, so return to the in-quote
517 * state and continue scanning the literal. Nothing is
518 * added to the literal's contents.
520 BEGIN(cur_state->state_before_str_stop);
523 <xqs>{quotecontinuefail} |
526 * Failed to see a quote continuation. Throw back
527 * everything after the end quote, and handle the string
528 * according to the state we were in previously.
532 /* There's nothing to echo ... */
535 <xq,xe,xus>{xqdouble} {
547 <xe>{xeunicodefail} {
560 /* This is only needed for \ just before EOF */
565 cur_state->dolqstart = pg_strdup(yytext);
570 /* throw back all but the initial "$" */
575 if (strcmp(yytext, cur_state->dolqstart) == 0)
577 free(cur_state->dolqstart);
578 cur_state->dolqstart = NULL;
584 * When we fail to match $...$ to dolqstart, transfer
585 * the $... part to the output, but put back the final
586 * $ for rescanning. Consider $delim$...$junk$delim$
592 <xdolq>{dolqinside} {
595 <xdolq>{dolqfailed} {
599 /* This is only needed for $ inside the quoted text */
627 /* throw back all but the initial u/U */
665 * These rules are specific to psql --- they implement parenthesis
666 * counting and detection of command-ending semicolon. These must
667 * appear before the {self} rule so that they take precedence over it.
671 cur_state->paren_depth++;
676 if (cur_state->paren_depth > 0)
677 cur_state->paren_depth--;
683 if (cur_state->paren_depth == 0 && cur_state->begin_depth == 0)
685 /* Terminate lexing temporarily */
686 cur_state->start_state = YY_START;
687 cur_state->identifier_count = 0;
693 * psql-specific rules to handle backslash commands and variable
694 * substitution. We want these before {self}, also.
698 /* Force a semi-colon or colon into the query buffer */
699 psqlscan_emit(cur_state, yytext + 1, 1);
700 if (yytext[1] == ';')
701 cur_state->identifier_count = 0;
705 /* Terminate lexing temporarily */
706 cur_state->start_state = YY_START;
707 return LEXRES_BACKSLASH;
711 /* Possible psql variable substitution */
715 varname = psqlscan_extract_substring(cur_state,
718 if (cur_state->callbacks->get_variable)
719 value = cur_state->callbacks->get_variable(varname,
721 cur_state->cb_passthrough);
727 /* It is a variable, check for recursion */
728 if (psqlscan_var_is_current_source(cur_state, varname))
730 /* Recursive expansion --- don't go there */
731 pg_log_warning("skipping recursive expansion of variable \"%s\"",
733 /* Instead copy the string as is */
738 /* OK, perform substitution */
739 psqlscan_push_new_buffer(cur_state, value, varname);
740 /* yy_scan_string already made buffer active */
747 * if the variable doesn't exist we'll copy the string
756 :'{variable_char}+' {
757 psqlscan_escape_variable(cur_state, yytext, yyleng,
761 :\"{variable_char}+\" {
762 psqlscan_escape_variable(cur_state, yytext, yyleng,
766 :\{\?{variable_char}+\} {
767 psqlscan_test_variable(cur_state, yytext, yyleng);
771 * These rules just avoid the need for scanner backup if one of the
772 * three rules above fails to match completely.
776 /* Throw back everything but the colon */
781 :\"{variable_char}* {
782 /* Throw back everything but the colon */
787 :\{\?{variable_char}* {
788 /* Throw back everything but the colon */
793 /* Throw back everything but the colon */
799 * Back to backend-compatible rules.
808 * Check for embedded slash-star or dash-dash; those
809 * are comment starts, so operator must stop there.
810 * Note that slash-star or dash-dash at the first
811 * character will match a prior rule, not this one.
814 char *slashstar = strstr(yytext, "/*");
815 char *dashdash = strstr(yytext, "--");
817 if (slashstar && dashdash)
819 /* if both appear, take the first one */
820 if (slashstar > dashdash)
821 slashstar = dashdash;
824 slashstar = dashdash;
826 nchars = slashstar - yytext;
829 * For SQL compatibility, '+' and '-' cannot be the
830 * last char of a multi-char operator unless the operator
831 * contains chars that are not in SQL operators.
832 * The idea is to lex '=-' as two operators, but not
833 * to forbid operator names like '?-' that could not be
834 * sequences of SQL operators.
837 (yytext[nchars - 1] == '+' ||
838 yytext[nchars - 1] == '-'))
842 for (ic = nchars - 2; ic >= 0; ic--)
845 if (c == '~' || c == '!' || c == '@' ||
846 c == '#' || c == '^' || c == '&' ||
847 c == '|' || c == '`' || c == '?' ||
854 * didn't find a qualifying character, so remove
859 } while (nchars > 1 &&
860 (yytext[nchars - 1] == '+' ||
861 yytext[nchars - 1] == '-'));
867 /* Strip the unwanted chars from the token */
905 /* throw back the .., and treat as integer */
928 * We need to track if we are inside a BEGIN .. END block
929 * in a function definition, so that semicolons contained
930 * therein don't terminate the whole statement. Short of
931 * writing a full parser here, the following heuristic
932 * should work. First, we track whether the beginning of
933 * the statement matches CREATE [OR REPLACE]
934 * {FUNCTION|PROCEDURE}
937 if (cur_state->identifier_count == 0)
938 memset(cur_state->identifiers, 0, sizeof(cur_state->identifiers));
940 if (pg_strcasecmp(yytext, "create") == 0 ||
941 pg_strcasecmp(yytext, "function") == 0 ||
942 pg_strcasecmp(yytext, "procedure") == 0 ||
943 pg_strcasecmp(yytext, "or") == 0 ||
944 pg_strcasecmp(yytext, "replace") == 0)
946 if (cur_state->identifier_count < sizeof(cur_state->identifiers))
947 cur_state->identifiers[cur_state->identifier_count] = pg_tolower((unsigned char) yytext[0]);
950 cur_state->identifier_count++;
952 if (cur_state->identifiers[0] == 'c' &&
953 (cur_state->identifiers[1] == 'f' || cur_state->identifiers[1] == 'p' ||
954 (cur_state->identifiers[1] == 'o' && cur_state->identifiers[2] == 'r' &&
955 (cur_state->identifiers[3] == 'f' || cur_state->identifiers[3] == 'p'))) &&
956 cur_state->paren_depth == 0)
958 if (pg_strcasecmp(yytext, "begin") == 0)
959 cur_state->begin_depth++;
960 else if (pg_strcasecmp(yytext, "case") == 0)
963 * CASE also ends with END. We only need to track
964 * this if we are already inside a BEGIN.
966 if (cur_state->begin_depth >= 1)
967 cur_state->begin_depth++;
969 else if (pg_strcasecmp(yytext, "end") == 0)
971 if (cur_state->begin_depth > 0)
972 cur_state->begin_depth--;
984 if (cur_state->buffer_stack == NULL)
986 cur_state->start_state = YY_START;
987 return LEXRES_EOL; /* end of input reached */
991 * We were expanding a variable, so pop the inclusion
992 * stack and keep lexing
994 psqlscan_pop_buffer_stack(cur_state);
995 psqlscan_select_top_buffer(cur_state);
1000 /* LCOV_EXCL_STOP */
1003 * Create a lexer working state struct.
1005 * callbacks is a struct of function pointers that encapsulate some
1006 * behavior we need from the surrounding program. This struct must
1007 * remain valid for the lifespan of the PsqlScanState.
1010 psql_scan_create(const PsqlScanCallbacks *callbacks)
1012 PsqlScanState state;
1014 state = (PsqlScanStateData *) pg_malloc0(sizeof(PsqlScanStateData));
1016 state->callbacks = callbacks;
1018 yylex_init(&state->scanner);
1020 yyset_extra(state, state->scanner);
1022 psql_scan_reset(state);
1028 * Destroy a lexer working state struct, releasing all resources.
1031 psql_scan_destroy(PsqlScanState state)
1033 psql_scan_finish(state);
1035 psql_scan_reset(state);
1037 yylex_destroy(state->scanner);
1043 * Set the callback passthrough pointer for the lexer.
1045 * This could have been integrated into psql_scan_create, but keeping it
1046 * separate allows the application to change the pointer later, which might
1050 psql_scan_set_passthrough(PsqlScanState state, void *passthrough)
1052 state->cb_passthrough = passthrough;
1056 * Set up to perform lexing of the given input line.
1058 * The text at *line, extending for line_len bytes, will be scanned by
1059 * subsequent calls to the psql_scan routines. psql_scan_finish should
1060 * be called when scanning is complete. Note that the lexer retains
1061 * a pointer to the storage at *line --- this string must not be altered
1062 * or freed until after psql_scan_finish is called.
1064 * encoding is the libpq identifier for the character encoding in use,
1065 * and std_strings says whether standard_conforming_strings is on.
1068 psql_scan_setup(PsqlScanState state,
1069 const char *line, int line_len,
1070 int encoding, bool std_strings)
1072 /* Mustn't be scanning already */
1073 Assert(state->scanbufhandle == NULL);
1074 Assert(state->buffer_stack == NULL);
1076 /* Do we need to hack the character set encoding? */
1077 state->encoding = encoding;
1078 state->safe_encoding = pg_valid_server_encoding_id(encoding);
1080 /* Save standard-strings flag as well */
1081 state->std_strings = std_strings;
1083 /* Set up flex input buffer with appropriate translation and padding */
1084 state->scanbufhandle = psqlscan_prepare_buffer(state, line, line_len,
1086 state->scanline = line;
1088 /* Set lookaside data in case we have to map unsafe encoding */
1089 state->curline = state->scanbuf;
1090 state->refline = state->scanline;
1094 * Do lexical analysis of SQL command text.
1096 * The text previously passed to psql_scan_setup is scanned, and appended
1097 * (possibly with transformation) to query_buf.
1099 * The return value indicates the condition that stopped scanning:
1101 * PSCAN_SEMICOLON: found a command-ending semicolon. (The semicolon is
1102 * transferred to query_buf.) The command accumulated in query_buf should
1103 * be executed, then clear query_buf and call again to scan the remainder
1106 * PSCAN_BACKSLASH: found a backslash that starts a special command.
1107 * Any previous data on the line has been transferred to query_buf.
1108 * The caller will typically next apply a separate flex lexer to scan
1109 * the special command.
1111 * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
1112 * incomplete SQL command. *prompt is set to the appropriate prompt type.
1114 * PSCAN_EOL: the end of the line was reached, and there is no lexical
1115 * reason to consider the command incomplete. The caller may or may not
1116 * choose to send it. *prompt is set to the appropriate prompt type if
1117 * the caller chooses to collect more input.
1119 * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
1120 * be called next, then the cycle may be repeated with a fresh input line.
1122 * In all cases, *prompt is set to an appropriate prompt type code for the
1123 * next line-input operation.
1126 psql_scan(PsqlScanState state,
1127 PQExpBuffer query_buf,
1128 promptStatus_t *prompt)
1130 PsqlScanResult result;
1133 /* Must be scanning already */
1134 Assert(state->scanbufhandle != NULL);
1136 /* Set current output target */
1137 state->output_buf = query_buf;
1139 /* Set input source */
1140 if (state->buffer_stack != NULL)
1141 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
1143 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
1146 lexresult = yylex(NULL, state->scanner);
1149 * Check termination state and return appropriate result info.
1153 case LEXRES_EOL: /* end of input */
1154 switch (state->start_state)
1157 case xqs: /* we treat this like INITIAL */
1158 if (state->paren_depth > 0)
1160 result = PSCAN_INCOMPLETE;
1161 *prompt = PROMPT_PAREN;
1163 else if (state->begin_depth > 0)
1165 result = PSCAN_INCOMPLETE;
1166 *prompt = PROMPT_CONTINUE;
1168 else if (query_buf->len > 0)
1171 *prompt = PROMPT_CONTINUE;
1175 /* never bother to send an empty buffer */
1176 result = PSCAN_INCOMPLETE;
1177 *prompt = PROMPT_READY;
1181 result = PSCAN_INCOMPLETE;
1182 *prompt = PROMPT_SINGLEQUOTE;
1185 result = PSCAN_INCOMPLETE;
1186 *prompt = PROMPT_COMMENT;
1189 result = PSCAN_INCOMPLETE;
1190 *prompt = PROMPT_DOUBLEQUOTE;
1193 result = PSCAN_INCOMPLETE;
1194 *prompt = PROMPT_SINGLEQUOTE;
1197 result = PSCAN_INCOMPLETE;
1198 *prompt = PROMPT_SINGLEQUOTE;
1201 result = PSCAN_INCOMPLETE;
1202 *prompt = PROMPT_SINGLEQUOTE;
1205 result = PSCAN_INCOMPLETE;
1206 *prompt = PROMPT_DOLLARQUOTE;
1209 result = PSCAN_INCOMPLETE;
1210 *prompt = PROMPT_DOUBLEQUOTE;
1213 result = PSCAN_INCOMPLETE;
1214 *prompt = PROMPT_SINGLEQUOTE;
1217 /* can't get here */
1218 fprintf(stderr, "invalid YY_START\n");
1222 case LEXRES_SEMI: /* semicolon */
1223 result = PSCAN_SEMICOLON;
1224 *prompt = PROMPT_READY;
1226 case LEXRES_BACKSLASH: /* backslash */
1227 result = PSCAN_BACKSLASH;
1228 *prompt = PROMPT_READY;
1231 /* can't get here */
1232 fprintf(stderr, "invalid yylex result\n");
1240 * Clean up after scanning a string. This flushes any unread input and
1241 * releases resources (but not the PsqlScanState itself). Note however
1242 * that this does not reset the lexer scan state; that can be done by
1243 * psql_scan_reset(), which is an orthogonal operation.
1245 * It is legal to call this when not scanning anything (makes it easier
1246 * to deal with error recovery).
1249 psql_scan_finish(PsqlScanState state)
1251 /* Drop any incomplete variable expansions. */
1252 while (state->buffer_stack != NULL)
1253 psqlscan_pop_buffer_stack(state);
1255 /* Done with the outer scan buffer, too */
1256 if (state->scanbufhandle)
1257 yy_delete_buffer(state->scanbufhandle, state->scanner);
1258 state->scanbufhandle = NULL;
1260 free(state->scanbuf);
1261 state->scanbuf = NULL;
1265 * Reset lexer scanning state to start conditions. This is appropriate
1266 * for executing \r psql commands (or any other time that we discard the
1267 * prior contents of query_buf). It is not, however, necessary to do this
1268 * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
1269 * PSCAN_EOL scan result, because the scan state must be INITIAL when those
1270 * conditions are returned.
1272 * Note that this is unrelated to flushing unread input; that task is
1273 * done by psql_scan_finish().
1276 psql_scan_reset(PsqlScanState state)
1278 state->start_state = INITIAL;
1279 state->paren_depth = 0;
1280 state->xcdepth = 0; /* not really necessary */
1281 if (state->dolqstart)
1282 free(state->dolqstart);
1283 state->dolqstart = NULL;
1284 state->identifier_count = 0;
1285 state->begin_depth = 0;
1289 * Reselect this lexer (psqlscan.l) after using another one.
1291 * Currently and for foreseeable uses, it's sufficient to reset to INITIAL
1292 * state, because we'd never switch to another lexer in a different state.
1293 * However, we don't want to reset e.g. paren_depth, so this can't be
1294 * the same as psql_scan_reset().
1296 * Note: psql setjmp error recovery just calls psql_scan_reset(), so that
1297 * must be a superset of this.
1299 * Note: it seems likely that other lexers could just assign INITIAL for
1300 * themselves, since that probably has the value zero in every flex-generated
1301 * lexer. But let's not assume that.
1304 psql_scan_reselect_sql_lexer(PsqlScanState state)
1306 state->start_state = INITIAL;
1310 * Return true if lexer is currently in an "inside quotes" state.
1312 * This is pretty grotty but is needed to preserve the old behavior
1313 * that mainloop.c drops blank lines not inside quotes without even
1317 psql_scan_in_quote(PsqlScanState state)
1319 return state->start_state != INITIAL &&
1320 state->start_state != xqs;
1324 * Push the given string onto the stack of stuff to scan.
1326 * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1329 psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
1330 const char *varname)
1332 StackElem *stackelem;
1334 stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
1337 * In current usage, the passed varname points at the current flex input
1338 * buffer; we must copy it before calling psqlscan_prepare_buffer()
1339 * because that will change the buffer state.
1341 stackelem->varname = varname ? pg_strdup(varname) : NULL;
1343 stackelem->buf = psqlscan_prepare_buffer(state, newstr, strlen(newstr),
1344 &stackelem->bufstring);
1345 state->curline = stackelem->bufstring;
1346 if (state->safe_encoding)
1348 stackelem->origstring = NULL;
1349 state->refline = stackelem->bufstring;
1353 stackelem->origstring = pg_strdup(newstr);
1354 state->refline = stackelem->origstring;
1356 stackelem->next = state->buffer_stack;
1357 state->buffer_stack = stackelem;
1361 * Pop the topmost buffer stack item (there must be one!)
1363 * NB: after this, the flex input state is unspecified; caller must
1364 * switch to an appropriate buffer to continue lexing.
1365 * See psqlscan_select_top_buffer().
1368 psqlscan_pop_buffer_stack(PsqlScanState state)
1370 StackElem *stackelem = state->buffer_stack;
1372 state->buffer_stack = stackelem->next;
1373 yy_delete_buffer(stackelem->buf, state->scanner);
1374 free(stackelem->bufstring);
1375 if (stackelem->origstring)
1376 free(stackelem->origstring);
1377 if (stackelem->varname)
1378 free(stackelem->varname);
1383 * Select the topmost surviving buffer as the active input.
1386 psqlscan_select_top_buffer(PsqlScanState state)
1388 StackElem *stackelem = state->buffer_stack;
1390 if (stackelem != NULL)
1392 yy_switch_to_buffer(stackelem->buf, state->scanner);
1393 state->curline = stackelem->bufstring;
1394 state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
1398 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
1399 state->curline = state->scanbuf;
1400 state->refline = state->scanline;
1405 * Check if specified variable name is the source for any string
1406 * currently being scanned
1409 psqlscan_var_is_current_source(PsqlScanState state, const char *varname)
1411 StackElem *stackelem;
1413 for (stackelem = state->buffer_stack;
1415 stackelem = stackelem->next)
1417 if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
1424 * Set up a flex input buffer to scan the given data. We always make a
1425 * copy of the data. If working in an unsafe encoding, the copy has
1426 * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
1428 * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1431 psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len,
1436 /* Flex wants two \0 characters after the actual data */
1437 newtxt = pg_malloc(len + 2);
1439 newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
1441 if (state->safe_encoding)
1442 memcpy(newtxt, txt, len);
1445 /* Gotta do it the hard way */
1450 int thislen = PQmblen(txt + i, state->encoding);
1452 /* first byte should always be okay... */
1455 while (--thislen > 0 && i < len)
1456 newtxt[i++] = (char) 0xFF;
1460 return yy_scan_buffer(newtxt, len + 2, state->scanner);
1464 * psqlscan_emit() --- body for ECHO macro
1466 * NB: this must be used for ALL and ONLY the text copied from the flex
1467 * input data. If you pass it something that is not part of the yytext
1468 * string, you are making a mistake. Internally generated text can be
1469 * appended directly to state->output_buf.
1472 psqlscan_emit(PsqlScanState state, const char *txt, int len)
1474 PQExpBuffer output_buf = state->output_buf;
1476 if (state->safe_encoding)
1477 appendBinaryPQExpBuffer(output_buf, txt, len);
1480 /* Gotta do it the hard way */
1481 const char *reference = state->refline;
1484 reference += (txt - state->curline);
1486 for (i = 0; i < len; i++)
1490 if (ch == (char) 0xFF)
1492 appendPQExpBufferChar(output_buf, ch);
1498 * psqlscan_extract_substring --- fetch value of (part of) the current token
1500 * This is like psqlscan_emit(), except that the data is returned as a
1501 * malloc'd string rather than being pushed directly to state->output_buf.
1504 psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
1506 char *result = (char *) pg_malloc(len + 1);
1508 if (state->safe_encoding)
1509 memcpy(result, txt, len);
1512 /* Gotta do it the hard way */
1513 const char *reference = state->refline;
1516 reference += (txt - state->curline);
1518 for (i = 0; i < len; i++)
1522 if (ch == (char) 0xFF)
1532 * psqlscan_escape_variable --- process :'VARIABLE' or :"VARIABLE"
1534 * If the variable name is found, escape its value using the appropriate
1535 * quoting method and emit the value to output_buf. (Since the result is
1536 * surely quoted, there is never any reason to rescan it.) If we don't
1537 * find the variable or escaping fails, emit the token as-is.
1540 psqlscan_escape_variable(PsqlScanState state, const char *txt, int len,
1541 PsqlScanQuoteType quote)
1546 /* Variable lookup. */
1547 varname = psqlscan_extract_substring(state, txt + 2, len - 3);
1548 if (state->callbacks->get_variable)
1549 value = state->callbacks->get_variable(varname, quote,
1550 state->cb_passthrough);
1557 /* Emit the suitably-escaped value */
1558 appendPQExpBufferStr(state->output_buf, value);
1563 /* Emit original token as-is */
1564 psqlscan_emit(state, txt, len);
1569 psqlscan_test_variable(PsqlScanState state, const char *txt, int len)
1574 varname = psqlscan_extract_substring(state, txt + 3, len - 4);
1575 if (state->callbacks->get_variable)
1576 value = state->callbacks->get_variable(varname, PQUOTE_PLAIN,
1577 state->cb_passthrough);
1584 psqlscan_emit(state, "TRUE", 4);
1589 psqlscan_emit(state, "FALSE", 5);