Fix obsolete comment regarding FSM truncation.
[PostgreSQL.git] / src / bin / psql / psqlscan.l
blobf18bd81e3a399f8a7da86e1b2bad064cc2d56552
1 %{
2 /*-------------------------------------------------------------------------
3  *
4  * psqlscan.l
5  *        lexical scanner for psql
6  *
7  * This code is mainly needed to determine where the end of a SQL statement
8  * is: we are looking for semicolons that are not within quotes, comments,
9  * or parentheses.  The most reliable way to handle this is to borrow the
10  * backend's flex lexer rules, lock, stock, and barrel.  The rules below
11  * are (except for a few) the same as the backend's, but their actions are
12  * just ECHO whereas the backend's actions generally do other things.
13  *
14  * XXX The rules in this file must be kept in sync with the backend lexer!!!
15  *
16  * XXX Avoid creating backtracking cases --- see the backend lexer for info.
17  *
18  * The most difficult aspect of this code is that we need to work in multibyte
19  * encodings that are not ASCII-safe.  A "safe" encoding is one in which each
20  * byte of a multibyte character has the high bit set (it's >= 0x80).  Since
21  * all our lexing rules treat all high-bit-set characters alike, we don't
22  * really need to care whether such a byte is part of a sequence or not.
23  * In an "unsafe" encoding, we still expect the first byte of a multibyte
24  * sequence to be >= 0x80, but later bytes might not be.  If we scan such
25  * a sequence as-is, the lexing rules could easily be fooled into matching
26  * such bytes to ordinary ASCII characters.  Our solution for this is to
27  * substitute 0xFF for each non-first byte within the data presented to flex.
28  * The flex rules will then pass the FF's through unmolested.  The emit()
29  * subroutine is responsible for looking back to the original string and
30  * replacing FF's with the corresponding original bytes.
31  *
32  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
33  * Portions Copyright (c) 1994, Regents of the University of California
34  *
35  * IDENTIFICATION
36  *        $PostgreSQL$
37  *
38  *-------------------------------------------------------------------------
39  */
40 #include "postgres_fe.h"
42 #include "psqlscan.h"
44 #include <ctype.h>
46 #include "common.h"
47 #include "settings.h"
48 #include "variables.h"
52  * We use a stack of flex buffers to handle substitution of psql variables.
53  * Each stacked buffer contains the as-yet-unread text from one psql variable.
54  * When we pop the stack all the way, we resume reading from the outer buffer
55  * identified by scanbufhandle.
56  */
57 typedef struct StackElem
59         YY_BUFFER_STATE buf;            /* flex input control structure */
60         char       *bufstring;          /* data actually being scanned by flex */
61         char       *origstring;         /* copy of original data, if needed */
62         struct StackElem *next;
63 } StackElem;
66  * All working state of the lexer must be stored in PsqlScanStateData
67  * between calls.  This allows us to have multiple open lexer operations,
68  * which is needed for nested include files.  The lexer itself is not
69  * recursive, but it must be re-entrant.
70  */
71 typedef struct PsqlScanStateData
73         StackElem  *buffer_stack;       /* stack of variable expansion buffers */
74         /*
75          * These variables always refer to the outer buffer, never to any
76          * stacked variable-expansion buffer.
77          */
78         YY_BUFFER_STATE scanbufhandle;
79         char       *scanbuf;            /* start of outer-level input buffer */
80         const char *scanline;           /* current input line at outer level */
82         /* safe_encoding, curline, refline are used by emit() to replace FFs */
83         int                     encoding;               /* encoding being used now */
84         bool            safe_encoding;  /* is current encoding "safe"? */
85         const char *curline;            /* actual flex input string for cur buf */
86         const char *refline;            /* original data for cur buffer */
88         /*
89          * All this state lives across successive input lines, until explicitly
90          * reset by psql_scan_reset.
91          */
92         int                     start_state;    /* saved YY_START */
93         int                     paren_depth;    /* depth of nesting in parentheses */
94         int                     xcdepth;                /* depth of nesting in slash-star comments */
95         char       *dolqstart;          /* current $foo$ quote start string */
96 } PsqlScanStateData;
98 static PsqlScanState cur_state; /* current state while active */
100 static PQExpBuffer output_buf;  /* current output buffer */
102 /* these variables do not need to be saved across calls */
103 static enum slash_option_type option_type;
104 static char *option_quote;
107 /* Return values from yylex() */
108 #define LEXRES_EOL                      0       /* end of input */
109 #define LEXRES_SEMI                     1       /* command-terminating semicolon found */
110 #define LEXRES_BACKSLASH        2       /* backslash command start */
111 #define LEXRES_OK                       3       /* OK completion of backslash argument */
114 int     yylex(void);
116 static void push_new_buffer(const char *newstr);
117 static YY_BUFFER_STATE prepare_buffer(const char *txt, int len,
118                                                                           char **txtcopy);
119 static void emit(const char *txt, int len);
121 #define ECHO emit(yytext, yyleng)
125 %option 8bit
126 %option never-interactive
127 %option nodefault
128 %option noinput
129 %option nounput
130 %option noyywrap
133  * All of the following definitions and rules should exactly match
134  * src/backend/parser/scan.l so far as the flex patterns are concerned.
135  * The rule bodies are just ECHO as opposed to what the backend does,
136  * however.  (But be sure to duplicate code that affects the lexing process,
137  * such as BEGIN().)  Also, psqlscan uses a single <<EOF>> rule whereas
138  * scan.l has a separate one for each exclusive state.
139  */
142  * OK, here is a short description of lex/flex rules behavior.
143  * The longest pattern which matches an input string is always chosen.
144  * For equal-length patterns, the first occurring in the rules list is chosen.
145  * INITIAL is the starting state, to which all non-conditional rules apply.
146  * Exclusive states change parsing rules while the state is active.  When in
147  * an exclusive state, only those rules defined for that state apply.
149  * We use exclusive states for quoted strings, extended comments,
150  * and to eliminate parsing troubles for numeric strings.
151  * Exclusive states:
152  *  <xb> bit string literal
153  *  <xc> extended C-style comments
154  *  <xd> delimited identifiers (double-quoted identifiers)
155  *  <xh> hexadecimal numeric string
156  *  <xq> standard quoted strings
157  *  <xe> extended quoted strings (support backslash escape sequences)
158  *  <xdolq> $foo$ quoted strings
159  *  <xui> quoted identifier with Unicode escapes
160  *  <xus> quoted string with Unicode escapes
161  */
163 %x xb
164 %x xc
165 %x xd
166 %x xh
167 %x xe
168 %x xq
169 %x xdolq
170 %x xui
171 %x xus
172 /* Additional exclusive states for psql only: lex backslash commands */
173 %x xslashcmd
174 %x xslasharg
175 %x xslashquote
176 %x xslashbackquote
177 %x xslashdefaultarg
178 %x xslashquotedarg
179 %x xslashwholeline
180 %x xslashend
183  * In order to make the world safe for Windows and Mac clients as well as
184  * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
185  * sequence will be seen as two successive newlines, but that doesn't cause
186  * any problems.  Comments that start with -- and extend to the next
187  * newline are treated as equivalent to a single whitespace character.
189  * NOTE a fine point: if there is no newline following --, we will absorb
190  * everything to the end of the input as a comment.  This is correct.  Older
191  * versions of Postgres failed to recognize -- as a comment if the input
192  * did not end with a newline.
194  * XXX perhaps \f (formfeed) should be treated as a newline as well?
195  */
197 space                   [ \t\n\r\f]
198 horiz_space             [ \t\f]
199 newline                 [\n\r]
200 non_newline             [^\n\r]
202 comment                 ("--"{non_newline}*)
204 whitespace              ({space}+|{comment})
207  * SQL requires at least one newline in the whitespace separating
208  * string literals that are to be concatenated.  Silly, but who are we
209  * to argue?  Note that {whitespace_with_newline} should not have * after
210  * it, whereas {whitespace} should generally have a * after it...
211  */
213 special_whitespace              ({space}+|{comment}{newline})
214 horiz_whitespace                ({horiz_space}|{comment})
215 whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
218  * To ensure that {quotecontinue} can be scanned without having to back up
219  * if the full pattern isn't matched, we include trailing whitespace in
220  * {quotestop}.  This matches all cases where {quotecontinue} fails to match,
221  * except for {quote} followed by whitespace and just one "-" (not two,
222  * which would start a {comment}).  To cover that we have {quotefail}.
223  * The actions for {quotestop} and {quotefail} must throw back characters
224  * beyond the quote proper.
225  */
226 quote                   '
227 quotestop               {quote}{whitespace}*
228 quotecontinue   {quote}{whitespace_with_newline}{quote}
229 quotefail               {quote}{whitespace}*"-"
231 /* Bit string
232  * It is tempting to scan the string for only those characters
233  * which are allowed. However, this leads to silently swallowed
234  * characters if illegal characters are included in the string.
235  * For example, if xbinside is [01] then B'ABCD' is interpreted
236  * as a zero-length string, and the ABCD' is lost!
237  * Better to pass the string forward and let the input routines
238  * validate the contents.
239  */
240 xbstart                 [bB]{quote}
241 xbinside                [^']*
243 /* Hexadecimal number */
244 xhstart                 [xX]{quote}
245 xhinside                [^']*
247 /* National character */
248 xnstart                 [nN]{quote}
250 /* Quoted string that allows backslash escapes */
251 xestart                 [eE]{quote}
252 xeinside                [^\\']+
253 xeescape                [\\][^0-7]
254 xeoctesc                [\\][0-7]{1,3}
255 xehexesc                [\\]x[0-9A-Fa-f]{1,2}
257 /* Extended quote
258  * xqdouble implements embedded quote, ''''
259  */
260 xqstart                 {quote}
261 xqdouble                {quote}{quote}
262 xqinside                [^']+
264 /* $foo$ style quotes ("dollar quoting")
265  * The quoted string starts with $foo$ where "foo" is an optional string
266  * in the form of an identifier, except that it may not contain "$", 
267  * and extends to the first occurrence of an identical string.  
268  * There is *no* processing of the quoted text.
270  * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
271  * fails to match its trailing "$".
272  */
273 dolq_start              [A-Za-z\200-\377_]
274 dolq_cont               [A-Za-z\200-\377_0-9]
275 dolqdelim               \$({dolq_start}{dolq_cont}*)?\$
276 dolqfailed              \${dolq_start}{dolq_cont}*
277 dolqinside              [^$]+
279 /* Double quote
280  * Allows embedded spaces and other special characters into identifiers.
281  */
282 dquote                  \"
283 xdstart                 {dquote}
284 xdstop                  {dquote}
285 xddouble                {dquote}{dquote}
286 xdinside                [^"]+
288 /* Unicode escapes */
289 uescape                 [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
290 /* error rule to avoid backup */
291 uescapefail             ("-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU])
293 /* Quoted identifier with Unicode escapes */
294 xuistart                [uU]&{dquote}
295 xuistop1                {dquote}{whitespace}*{uescapefail}?
296 xuistop2                {dquote}{whitespace}*{uescape}
298 /* Quoted string with Unicode escapes */
299 xusstart                [uU]&{quote}
300 xusstop1                {quote}{whitespace}*{uescapefail}?
301 xusstop2                {quote}{whitespace}*{uescape}
303 /* error rule to avoid backup */
304 xufailed                [uU]&
307 /* C-style comments
309  * The "extended comment" syntax closely resembles allowable operator syntax.
310  * The tricky part here is to get lex to recognize a string starting with
311  * slash-star as a comment, when interpreting it as an operator would produce
312  * a longer match --- remember lex will prefer a longer match!  Also, if we
313  * have something like plus-slash-star, lex will think this is a 3-character
314  * operator whereas we want to see it as a + operator and a comment start.
315  * The solution is two-fold:
316  * 1. append {op_chars}* to xcstart so that it matches as much text as
317  *    {operator} would. Then the tie-breaker (first matching rule of same
318  *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
319  *    in case it contains a star-slash that should terminate the comment.
320  * 2. In the operator rule, check for slash-star within the operator, and
321  *    if found throw it back with yyless().  This handles the plus-slash-star
322  *    problem.
323  * Dash-dash comments have similar interactions with the operator rule.
324  */
325 xcstart                 \/\*{op_chars}*
326 xcstop                  \*+\/
327 xcinside                [^*/]+
329 digit                   [0-9]
330 ident_start             [A-Za-z\200-\377_]
331 ident_cont              [A-Za-z\200-\377_0-9\$]
333 identifier              {ident_start}{ident_cont}*
335 typecast                "::"
338  * "self" is the set of chars that should be returned as single-character
339  * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
340  * which can be one or more characters long (but if a single-char token
341  * appears in the "self" set, it is not to be returned as an Op).  Note
342  * that the sets overlap, but each has some chars that are not in the other.
344  * If you change either set, adjust the character lists appearing in the
345  * rule for "operator"!
346  */
347 self                    [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
348 op_chars                [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
349 operator                {op_chars}+
351 /* we no longer allow unary minus in numbers. 
352  * instead we pass it separately to parser. there it gets
353  * coerced via doNegate() -- Leon aug 20 1999
355  * {realfail1} and {realfail2} are added to prevent the need for scanner
356  * backup when the {real} rule fails to match completely.
357  */
359 integer                 {digit}+
360 decimal                 (({digit}*\.{digit}+)|({digit}+\.{digit}*))
361 real                    ({integer}|{decimal})[Ee][-+]?{digit}+
362 realfail1               ({integer}|{decimal})[Ee]
363 realfail2               ({integer}|{decimal})[Ee][-+]
365 param                   \${integer}
367 other                   .
370  * Dollar quoted strings are totally opaque, and no escaping is done on them.
371  * Other quoted strings must allow some special characters such as single-quote
372  *  and newline.
373  * Embedded single-quotes are implemented both in the SQL standard
374  *  style of two adjacent single quotes "''" and in the Postgres/Java style
375  *  of escaped-quote "\'".
376  * Other embedded escaped characters are matched explicitly and the leading
377  *  backslash is dropped from the string.
378  * Note that xcstart must appear before operator, as explained above!
379  *  Also whitespace (comment) must appear before operator.
380  */
384 {whitespace}    {
385                                         /*
386                                          * Note that the whitespace rule includes both true
387                                          * whitespace and single-line ("--" style) comments.
388                                          * We suppress whitespace at the start of the query
389                                          * buffer.  We also suppress all single-line comments,
390                                          * which is pretty dubious but is the historical
391                                          * behavior.
392                                          */
393                                         if (!(output_buf->len == 0 || yytext[0] == '-'))
394                                                 ECHO;
395                                 }
397 {xcstart}               {
398                                         cur_state->xcdepth = 0;
399                                         BEGIN(xc);
400                                         /* Put back any characters past slash-star; see above */
401                                         yyless(2);
402                                         ECHO;
403                                 }
405 <xc>{xcstart}   {
406                                         cur_state->xcdepth++;
407                                         /* Put back any characters past slash-star; see above */
408                                         yyless(2);
409                                         ECHO;
410                                 }
412 <xc>{xcstop}    {
413                                         if (cur_state->xcdepth <= 0)
414                                         {
415                                                 BEGIN(INITIAL);
416                                         }
417                                         else
418                                                 cur_state->xcdepth--;
419                                         ECHO;
420                                 }
422 <xc>{xcinside}  {
423                                         ECHO;
424                                 }
426 <xc>{op_chars}  {
427                                         ECHO;
428                                 }
430 <xc>\*+                 {
431                                         ECHO;
432                                 }
434 {xbstart}               {
435                                         BEGIN(xb);
436                                         ECHO;
437                                 }
438 <xb>{quotestop} |
439 <xb>{quotefail} {
440                                         yyless(1);
441                                         BEGIN(INITIAL);
442                                         ECHO;
443                                 }
444 <xh>{xhinside}  |
445 <xb>{xbinside}  {
446                                         ECHO;
447                                 }
448 <xh>{quotecontinue}     |
449 <xb>{quotecontinue}     {
450                                         ECHO;
451                                 }
453 {xhstart}               {
454                                         /* Hexadecimal bit type.
455                                          * At some point we should simply pass the string
456                                          * forward to the parser and label it there.
457                                          * In the meantime, place a leading "x" on the string
458                                          * to mark it for the input routine as a hex string.
459                                          */
460                                         BEGIN(xh);
461                                         ECHO;
462                                 }
463 <xh>{quotestop} |
464 <xh>{quotefail} {
465                                         yyless(1);
466                                         BEGIN(INITIAL);
467                                         ECHO;
468                                 }
470 {xnstart}               {
471                                         yyless(1);                              /* eat only 'n' this time */
472                                         ECHO;
473                                 }
475 {xqstart}               {
476                                         if (standard_strings())
477                                                 BEGIN(xq);
478                                         else
479                                                 BEGIN(xe);
480                                         ECHO;
481                                 }
482 {xestart}               {
483                                         BEGIN(xe);
484                                         ECHO;
485                                 }
486 {xusstart}              {
487                                         BEGIN(xus);
488                                         ECHO;
489                                 }
490 <xq,xe>{quotestop}      |
491 <xq,xe>{quotefail} {
492                                         yyless(1);
493                                         BEGIN(INITIAL);
494                                         ECHO;
495                                 }
496 <xus>{xusstop1} {
497                                         yyless(1);
498                                         BEGIN(INITIAL);
499                                         ECHO;
500                                 }
501 <xus>{xusstop2} {
502                                         BEGIN(INITIAL);
503                                         ECHO;
504                                 }
505 <xq,xe,xus>{xqdouble} {
506                                         ECHO;
507                                 }
508 <xq,xus>{xqinside}  {
509                                         ECHO;
510                                 }
511 <xe>{xeinside}  {
512                                         ECHO;
513                                 }
514 <xe>{xeescape}  {
515                                         ECHO;
516                                 }
517 <xe>{xeoctesc}  {
518                                         ECHO;
519                                 }
520 <xe>{xehexesc}  {
521                                         ECHO;
522                                 }
523 <xq,xe,xus>{quotecontinue} {
524                                         ECHO;
525                                 }
526 <xe>.                   {
527                                         /* This is only needed for \ just before EOF */
528                                         ECHO;
529                                 }
531 {dolqdelim}             {
532                                         cur_state->dolqstart = pg_strdup(yytext);
533                                         BEGIN(xdolq);
534                                         ECHO;
535                                 }
536 {dolqfailed}    {
537                                         /* throw back all but the initial "$" */
538                                         yyless(1);
539                                         ECHO;
540                                 }
541 <xdolq>{dolqdelim} {
542                                         if (strcmp(yytext, cur_state->dolqstart) == 0)
543                                         {
544                                                 free(cur_state->dolqstart);
545                                                 cur_state->dolqstart = NULL;
546                                                 BEGIN(INITIAL);
547                                         }
548                                         else
549                                         {
550                                                 /*
551                                                  * When we fail to match $...$ to dolqstart, transfer
552                                                  * the $... part to the output, but put back the final
553                                                  * $ for rescanning.  Consider $delim$...$junk$delim$
554                                                  */
555                                                 yyless(yyleng-1);
556                                         }
557                                         ECHO;
558                                 }
559 <xdolq>{dolqinside} {
560                                         ECHO;
561                                 }
562 <xdolq>{dolqfailed} {
563                                         ECHO;
564                                 }
565 <xdolq>.                {
566                                         /* This is only needed for $ inside the quoted text */
567                                         ECHO;
568                                 }
570 {xdstart}               {
571                                         BEGIN(xd);
572                                         ECHO;
573                                 }
574 {xuistart}              {
575                                         BEGIN(xui);
576                                         ECHO;
577                                 }
578 <xd>{xdstop}    {
579                                         BEGIN(INITIAL);
580                                         ECHO;
581                                 }
582 <xui>{xuistop1} {
583                                         yyless(1);
584                                         BEGIN(INITIAL);
585                                         ECHO;
586                                 }
587 <xui>{xuistop2} {
588                                         BEGIN(INITIAL);
589                                         ECHO;
590                                 }
591 <xd,xui>{xddouble}      {
592                                         ECHO;
593                                 }
594 <xd,xui>{xdinside}      {
595                                         ECHO;
596                                 }
598 {xufailed}      {
599                                         /* throw back all but the initial u/U */
600                                         yyless(1);
601                                         ECHO;
602                                 }
604 {typecast}              {
605                                         ECHO;
606                                 }
608         /*
609          * These rules are specific to psql --- they implement parenthesis
610          * counting and detection of command-ending semicolon.  These must
611          * appear before the {self} rule so that they take precedence over it.
612          */
614 "("                             {
615                                         cur_state->paren_depth++;
616                                         ECHO;
617                                 }
619 ")"                             {
620                                         if (cur_state->paren_depth > 0)
621                                                 cur_state->paren_depth--;
622                                         ECHO;
623                                 }
625 ";"                             {
626                                         ECHO;
627                                         if (cur_state->paren_depth == 0)
628                                         {
629                                                 /* Terminate lexing temporarily */
630                                                 return LEXRES_SEMI;
631                                         }
632                                 }
634         /*
635          * psql-specific rules to handle backslash commands and variable
636          * substitution.  We want these before {self}, also.
637          */
639 "\\"[;:]                {
640                                         /* Force a semicolon or colon into the query buffer */
641                                         emit(yytext + 1, 1);
642                                 }
644 "\\"                    {
645                                         /* Terminate lexing temporarily */
646                                         return LEXRES_BACKSLASH;
647                                 }
649 :[A-Za-z0-9_]+  {
650                                         /* Possible psql variable substitution */
651                                         const char *value;
653                                         value = GetVariable(pset.vars, yytext + 1);
655                                         if (value)
656                                         {
657                                                 /* It is a variable, perform substitution */
658                                                 push_new_buffer(value);
659                                                 /* yy_scan_string already made buffer active */
660                                         }
661                                         else
662                                         {
663                                                 /*
664                                                  * if the variable doesn't exist we'll copy the
665                                                  * string as is
666                                                  */
667                                                 ECHO;
668                                         }
669                                 }
671         /*
672          * Back to backend-compatible rules.
673          */
675 {self}                  {
676                                         ECHO;
677                                 }
679 {operator}              {
680                                         /*
681                                          * Check for embedded slash-star or dash-dash; those
682                                          * are comment starts, so operator must stop there.
683                                          * Note that slash-star or dash-dash at the first
684                                          * character will match a prior rule, not this one.
685                                          */
686                                         int             nchars = yyleng;
687                                         char   *slashstar = strstr(yytext, "/*");
688                                         char   *dashdash = strstr(yytext, "--");
690                                         if (slashstar && dashdash)
691                                         {
692                                                 /* if both appear, take the first one */
693                                                 if (slashstar > dashdash)
694                                                         slashstar = dashdash;
695                                         }
696                                         else if (!slashstar)
697                                                 slashstar = dashdash;
698                                         if (slashstar)
699                                                 nchars = slashstar - yytext;
701                                         /*
702                                          * For SQL compatibility, '+' and '-' cannot be the
703                                          * last char of a multi-char operator unless the operator
704                                          * contains chars that are not in SQL operators.
705                                          * The idea is to lex '=-' as two operators, but not
706                                          * to forbid operator names like '?-' that could not be
707                                          * sequences of SQL operators.
708                                          */
709                                         while (nchars > 1 &&
710                                                    (yytext[nchars-1] == '+' ||
711                                                         yytext[nchars-1] == '-'))
712                                         {
713                                                 int             ic;
715                                                 for (ic = nchars-2; ic >= 0; ic--)
716                                                 {
717                                                         if (strchr("~!@#^&|`?%", yytext[ic]))
718                                                                 break;
719                                                 }
720                                                 if (ic >= 0)
721                                                         break; /* found a char that makes it OK */
722                                                 nchars--; /* else remove the +/-, and check again */
723                                         }
725                                         if (nchars < yyleng)
726                                         {
727                                                 /* Strip the unwanted chars from the token */
728                                                 yyless(nchars);
729                                         }
730                                         ECHO;
731                                 }
733 {param}                 {
734                                         ECHO;
735                                 }
737 {integer}               {
738                                         ECHO;
739                                 }
740 {decimal}               {
741                                         ECHO;
742                                 }
743 {real}                  {
744                                         ECHO;
745                                 }
746 {realfail1}             {
747                                         /*
748                                          * throw back the [Ee], and treat as {decimal}.  Note
749                                          * that it is possible the input is actually {integer},
750                                          * but since this case will almost certainly lead to a
751                                          * syntax error anyway, we don't bother to distinguish.
752                                          */
753                                         yyless(yyleng-1);
754                                         ECHO;
755                                 }
756 {realfail2}             {
757                                         /* throw back the [Ee][+-], and proceed as above */
758                                         yyless(yyleng-2);
759                                         ECHO;
760                                 }
763 {identifier}    {
764                                         ECHO;
765                                 }
767 {other}                 {
768                                         ECHO;
769                                 }
772         /*
773          * Everything from here down is psql-specific.
774          */
776 <<EOF>>                 {
777                                         StackElem  *stackelem = cur_state->buffer_stack;
779                                         if (stackelem == NULL)
780                                                 return LEXRES_EOL; /* end of input reached */
782                                         /*
783                                          * We were expanding a variable, so pop the inclusion
784                                          * stack and keep lexing
785                                          */
786                                         cur_state->buffer_stack = stackelem->next;
787                                         yy_delete_buffer(stackelem->buf);
788                                         free(stackelem->bufstring);
789                                         if (stackelem->origstring)
790                                                 free(stackelem->origstring);
791                                         free(stackelem);
793                                         stackelem = cur_state->buffer_stack;
794                                         if (stackelem != NULL)
795                                         {
796                                                 yy_switch_to_buffer(stackelem->buf);
797                                                 cur_state->curline = stackelem->bufstring;
798                                                 cur_state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
799                                         }
800                                         else
801                                         {
802                                                 yy_switch_to_buffer(cur_state->scanbufhandle);
803                                                 cur_state->curline = cur_state->scanbuf;
804                                                 cur_state->refline = cur_state->scanline;
805                                         }
806                                 }
808         /*
809          * Exclusive lexer states to handle backslash command lexing
810          */
812 <xslashcmd>{
813         /* command name ends at whitespace or backslash; eat all else */
815 {space}|"\\"    {
816                                         yyless(0);
817                                         return LEXRES_OK;
818                                 }
820 {other}                 { ECHO; }
824 <xslasharg>{
825         /* eat any whitespace, then decide what to do at first nonblank */
827 {space}+                { }
829 "\\"                    {
830                                         /*
831                                          * backslash is end of command or next command, do not eat
832                                          *
833                                          * XXX this means we can't conveniently accept options
834                                          * that start with a backslash; therefore, option
835                                          * processing that encourages use of backslashes is rather
836                                          * broken.
837                                          */
838                                         yyless(0);
839                                         return LEXRES_OK;
840                                 }
842 {quote}                 {
843                                         *option_quote = '\'';
844                                         BEGIN(xslashquote);
845                                 }
847 "`"                             {
848                                         if (option_type == OT_VERBATIM)
849                                         {
850                                                 /* in verbatim mode, backquote is not special */
851                                                 ECHO;
852                                                 BEGIN(xslashdefaultarg);
853                                         }
854                                         else
855                                         {
856                                                 *option_quote = '`';
857                                                 BEGIN(xslashbackquote);
858                                         }
859                                 }
861 :[A-Za-z0-9_]*  {
862                                         /* Possible psql variable substitution */
863                                         if (option_type == OT_VERBATIM)
864                                                 ECHO;
865                                         else
866                                         {
867                                                 const char *value;
869                                                 value = GetVariable(pset.vars, yytext + 1);
871                                                 /*
872                                                  * The variable value is just emitted without any
873                                                  * further examination.  This is consistent with the
874                                                  * pre-8.0 code behavior, if not with the way that
875                                                  * variables are handled outside backslash commands.
876                                                  */
877                                                 if (value)
878                                                         appendPQExpBufferStr(output_buf, value);
879                                         }
881                                         *option_quote = ':';
883                                         return LEXRES_OK;
884                                 }
886 "|"                             {
887                                         ECHO;
888                                         if (option_type == OT_FILEPIPE)
889                                         {
890                                                 /* treat like whole-string case */
891                                                 BEGIN(xslashwholeline);
892                                         }
893                                         else
894                                         {
895                                                 /* treat like default case */
896                                                 BEGIN(xslashdefaultarg);
897                                         }
898                                 }
900 {dquote}                {
901                                         *option_quote = '"';
902                                         ECHO;
903                                         BEGIN(xslashquotedarg);
904                                 }
906 {other}                 {
907                                         ECHO;
908                                         BEGIN(xslashdefaultarg);
909                                 }
913 <xslashquote>{
914         /*
915          * single-quoted text: copy literally except for '' and backslash
916          * sequences
917          */
919 {quote}                 { return LEXRES_OK; }
921 {xqdouble}              { appendPQExpBufferChar(output_buf, '\''); }
923 "\\n"                   { appendPQExpBufferChar(output_buf, '\n'); }
924 "\\t"                   { appendPQExpBufferChar(output_buf, '\t'); }
925 "\\b"                   { appendPQExpBufferChar(output_buf, '\b'); }
926 "\\r"                   { appendPQExpBufferChar(output_buf, '\r'); }
927 "\\f"                   { appendPQExpBufferChar(output_buf, '\f'); }
929 {xeoctesc}              {
930                                         /* octal case */
931                                         appendPQExpBufferChar(output_buf,
932                                                                                   (char) strtol(yytext + 1, NULL, 8));
933                                 }
935 {xehexesc}              {
936                                         /* hex case */
937                                         appendPQExpBufferChar(output_buf,
938                                                                                   (char) strtol(yytext + 2, NULL, 16));
939                                 }
941 "\\".                   { emit(yytext + 1, 1); }
943 {other}|\n              { ECHO; }
947 <xslashbackquote>{
948         /*
949          * backticked text: copy everything until next backquote or end of line.
950          * Invocation of the command will happen in psql_scan_slash_option.
951          */
953 "`"                             { return LEXRES_OK; }
955 {other}|\n              { ECHO; }
959 <xslashdefaultarg>{
960         /*
961          * Copy everything until unquoted whitespace or end of line.  Quotes
962          * do not get stripped yet.
963          */
965 {space}                 {
966                                         yyless(0);
967                                         return LEXRES_OK;
968                                 }
970 "\\"                    {
971                                         /*
972                                          * unquoted backslash is end of command or next command,
973                                          * do not eat
974                                          *
975                                          * (this was not the behavior pre-8.0, but it seems
976                                          * consistent)
977                                          */
978                                         yyless(0);
979                                         return LEXRES_OK;
980                                 }
982 {dquote}                {
983                                         *option_quote = '"';
984                                         ECHO;
985                                         BEGIN(xslashquotedarg);
986                                 }
988 {other}                 { ECHO; }
992 <xslashquotedarg>{
993         /* double-quoted text within a default-type argument: copy */
995 {dquote}                {
996                                         ECHO;
997                                         BEGIN(xslashdefaultarg);
998                                 }
1000 {other}|\n              { ECHO; }
1004 <xslashwholeline>{
1005         /* copy everything until end of input line */
1006         /* but suppress leading whitespace */
1008 {space}+                {
1009                                         if (output_buf->len > 0)
1010                                                 ECHO;
1011                                 }
1013 {other}                 { ECHO; }
1017 <xslashend>{
1018         /* at end of command, eat a double backslash, but not anything else */
1020 "\\\\"                  { return LEXRES_OK; }
1022 {other}|\n              {
1023                                         yyless(0);
1024                                         return LEXRES_OK;
1025                                 }
1032  * Create a lexer working state struct.
1033  */
1034 PsqlScanState
1035 psql_scan_create(void)
1037         PsqlScanState state;
1039         state = (PsqlScanStateData *) pg_malloc_zero(sizeof(PsqlScanStateData));
1041         psql_scan_reset(state);
1043         return state;
1047  * Destroy a lexer working state struct, releasing all resources.
1048  */
1049 void
1050 psql_scan_destroy(PsqlScanState state)
1052         psql_scan_finish(state);
1054         psql_scan_reset(state);
1056         free(state);
1060  * Set up to perform lexing of the given input line.
1062  * The text at *line, extending for line_len bytes, will be scanned by
1063  * subsequent calls to the psql_scan routines.  psql_scan_finish should
1064  * be called when scanning is complete.  Note that the lexer retains
1065  * a pointer to the storage at *line --- this string must not be altered
1066  * or freed until after psql_scan_finish is called.
1067  */
1068 void
1069 psql_scan_setup(PsqlScanState state,
1070                                 const char *line, int line_len)
1072         /* Mustn't be scanning already */
1073         psql_assert(state->scanbufhandle == NULL);
1074         psql_assert(state->buffer_stack == NULL);
1076         /* Do we need to hack the character set encoding? */
1077         state->encoding = pset.encoding;
1078         state->safe_encoding = pg_valid_server_encoding_id(state->encoding);
1080         /* needed for prepare_buffer */
1081         cur_state = state;
1083         /* Set up flex input buffer with appropriate translation and padding */
1084         state->scanbufhandle = prepare_buffer(line, line_len,
1085                                                                                   &state->scanbuf);
1086         state->scanline = line;
1088         /* Set lookaside data in case we have to map unsafe encoding */
1089         state->curline = state->scanbuf;
1090         state->refline = state->scanline;
1094  * Do lexical analysis of SQL command text.
1096  * The text previously passed to psql_scan_setup is scanned, and appended
1097  * (possibly with transformation) to query_buf.
1099  * The return value indicates the condition that stopped scanning:
1101  * PSCAN_SEMICOLON: found a command-ending semicolon.  (The semicolon is
1102  * transferred to query_buf.)  The command accumulated in query_buf should
1103  * be executed, then clear query_buf and call again to scan the remainder
1104  * of the line.
1106  * PSCAN_BACKSLASH: found a backslash that starts a psql special command.
1107  * Any previous data on the line has been transferred to query_buf.
1108  * The caller will typically next call psql_scan_slash_command(),
1109  * perhaps psql_scan_slash_option(), and psql_scan_slash_command_end().
1111  * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
1112  * incomplete SQL command.  *prompt is set to the appropriate prompt type.
1114  * PSCAN_EOL: the end of the line was reached, and there is no lexical
1115  * reason to consider the command incomplete.  The caller may or may not
1116  * choose to send it.  *prompt is set to the appropriate prompt type if
1117  * the caller chooses to collect more input.
1119  * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
1120  * be called next, then the cycle may be repeated with a fresh input line.
1122  * In all cases, *prompt is set to an appropriate prompt type code for the
1123  * next line-input operation.
1124  */
1125 PsqlScanResult
1126 psql_scan(PsqlScanState state,
1127                   PQExpBuffer query_buf,
1128                   promptStatus_t *prompt)
1130         PsqlScanResult result;
1131         int                     lexresult;
1133         /* Must be scanning already */
1134         psql_assert(state->scanbufhandle);
1136         /* Set up static variables that will be used by yylex */
1137         cur_state = state;
1138         output_buf = query_buf;
1140         if (state->buffer_stack != NULL)
1141                 yy_switch_to_buffer(state->buffer_stack->buf);
1142         else
1143                 yy_switch_to_buffer(state->scanbufhandle);
1145         BEGIN(state->start_state);
1147         /* And lex. */
1148         lexresult = yylex();
1150         /* Update static vars back to the state struct */
1151         state->start_state = YY_START;
1153         /*
1154          * Check termination state and return appropriate result info.
1155          */
1156         switch (lexresult)
1157         {
1158                 case LEXRES_EOL:                /* end of input */
1159                         switch (state->start_state)
1160                         {
1161                                 case INITIAL:
1162                                         if (state->paren_depth > 0)
1163                                         {
1164                                                 result = PSCAN_INCOMPLETE;
1165                                                 *prompt = PROMPT_PAREN;
1166                                         }
1167                                         else if (query_buf->len > 0)
1168                                         {
1169                                                 result = PSCAN_EOL;
1170                                                 *prompt = PROMPT_CONTINUE;
1171                                         }
1172                                         else
1173                                         {
1174                                                 /* never bother to send an empty buffer */
1175                                                 result = PSCAN_INCOMPLETE;
1176                                                 *prompt = PROMPT_READY;
1177                                         }
1178                                         break;
1179                                 case xb:
1180                                         result = PSCAN_INCOMPLETE;
1181                                         *prompt = PROMPT_SINGLEQUOTE;
1182                                         break;
1183                                 case xc:
1184                                         result = PSCAN_INCOMPLETE;
1185                                         *prompt = PROMPT_COMMENT;
1186                                         break;
1187                                 case xd:
1188                                         result = PSCAN_INCOMPLETE;
1189                                         *prompt = PROMPT_DOUBLEQUOTE;
1190                                         break;
1191                                 case xh:
1192                                         result = PSCAN_INCOMPLETE;
1193                                         *prompt = PROMPT_SINGLEQUOTE;
1194                                         break;
1195                                 case xq:
1196                                         result = PSCAN_INCOMPLETE;
1197                                         *prompt = PROMPT_SINGLEQUOTE;
1198                                         break;
1199                                 case xe:
1200                                         result = PSCAN_INCOMPLETE;
1201                                         *prompt = PROMPT_SINGLEQUOTE;
1202                                         break;
1203                                 case xdolq:
1204                                         result = PSCAN_INCOMPLETE;
1205                                         *prompt = PROMPT_DOLLARQUOTE;
1206                                         break;
1207                                 default:
1208                                         /* can't get here */
1209                                         fprintf(stderr, "invalid YY_START\n");
1210                                         exit(1);
1211                         }
1212                         break;
1213                 case LEXRES_SEMI:               /* semicolon */
1214                         result = PSCAN_SEMICOLON;
1215                         *prompt = PROMPT_READY;
1216                         break;
1217                 case LEXRES_BACKSLASH:  /* backslash */
1218                         result = PSCAN_BACKSLASH;
1219                         *prompt = PROMPT_READY;
1220                         break;
1221                 default:
1222                         /* can't get here */
1223                         fprintf(stderr, "invalid yylex result\n");
1224                         exit(1);
1225         }
1227         return result;
1231  * Clean up after scanning a string.  This flushes any unread input and
1232  * releases resources (but not the PsqlScanState itself).  Note however
1233  * that this does not reset the lexer scan state; that can be done by
1234  * psql_scan_reset(), which is an orthogonal operation.
1236  * It is legal to call this when not scanning anything (makes it easier
1237  * to deal with error recovery).
1238  */
1239 void
1240 psql_scan_finish(PsqlScanState state)
1242         /* Drop any incomplete variable expansions. */
1243         while (state->buffer_stack != NULL)
1244         {
1245                 StackElem  *stackelem = state->buffer_stack;
1247                 state->buffer_stack = stackelem->next;
1248                 yy_delete_buffer(stackelem->buf);
1249                 free(stackelem->bufstring);
1250                 if (stackelem->origstring)
1251                         free(stackelem->origstring);
1252                 free(stackelem);
1253         }
1255         /* Done with the outer scan buffer, too */
1256         if (state->scanbufhandle)
1257                 yy_delete_buffer(state->scanbufhandle);
1258         state->scanbufhandle = NULL;
1259         if (state->scanbuf)
1260                 free(state->scanbuf);
1261         state->scanbuf = NULL;
1265  * Reset lexer scanning state to start conditions.  This is appropriate
1266  * for executing \r psql commands (or any other time that we discard the
1267  * prior contents of query_buf).  It is not, however, necessary to do this
1268  * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
1269  * PSCAN_EOL scan result, because the scan state must be INITIAL when those
1270  * conditions are returned.
1272  * Note that this is unrelated to flushing unread input; that task is
1273  * done by psql_scan_finish().
1274  */
1275 void
1276 psql_scan_reset(PsqlScanState state)
1278         state->start_state = INITIAL;
1279         state->paren_depth = 0;
1280         state->xcdepth = 0;                     /* not really necessary */
1281         if (state->dolqstart)
1282                 free(state->dolqstart);
1283         state->dolqstart = NULL;
1287  * Return true if lexer is currently in an "inside quotes" state.
1289  * This is pretty grotty but is needed to preserve the old behavior
1290  * that mainloop.c drops blank lines not inside quotes without even
1291  * echoing them.
1292  */
1293 bool
1294 psql_scan_in_quote(PsqlScanState state)
1296         return state->start_state != INITIAL;
1300  * Scan the command name of a psql backslash command.  This should be called
1301  * after psql_scan() returns PSCAN_BACKSLASH.  It is assumed that the input
1302  * has been consumed through the leading backslash.
1304  * The return value is a malloc'd copy of the command name, as parsed off
1305  * from the input.
1306  */
1307 char *
1308 psql_scan_slash_command(PsqlScanState state)
1310         PQExpBufferData mybuf;
1311         int                     lexresult;
1313         /* Must be scanning already */
1314         psql_assert(state->scanbufhandle);
1316         /* Build a local buffer that we'll return the data of */
1317         initPQExpBuffer(&mybuf);
1319         /* Set up static variables that will be used by yylex */
1320         cur_state = state;
1321         output_buf = &mybuf;
1323         if (state->buffer_stack != NULL)
1324                 yy_switch_to_buffer(state->buffer_stack->buf);
1325         else
1326                 yy_switch_to_buffer(state->scanbufhandle);
1328         BEGIN(xslashcmd);
1330         /* And lex. */
1331         lexresult = yylex();
1333         /* There are no possible errors in this lex state... */
1335         return mybuf.data;
1339  * Parse off the next argument for a backslash command, and return it as a
1340  * malloc'd string.  If there are no more arguments, returns NULL.
1342  * type tells what processing, if any, to perform on the option string;
1343  * for example, if it's a SQL identifier, we want to downcase any unquoted
1344  * letters.
1346  * if quote is not NULL, *quote is set to 0 if no quoting was found, else
1347  * the quote symbol.
1349  * if semicolon is true, unquoted trailing semicolon(s) that would otherwise
1350  * be taken as part of the option string will be stripped.
1352  * NOTE: the only possible syntax errors for backslash options are unmatched
1353  * quotes, which are detected when we run out of input.  Therefore, on a
1354  * syntax error we just throw away the string and return NULL; there is no
1355  * need to worry about flushing remaining input.
1356  */
1357 char *
1358 psql_scan_slash_option(PsqlScanState state,
1359                                            enum slash_option_type type,
1360                                            char *quote,
1361                                            bool semicolon)
1363         PQExpBufferData mybuf;
1364         int                     lexresult;
1365         char            local_quote;
1366         bool            badarg;
1368         /* Must be scanning already */
1369         psql_assert(state->scanbufhandle);
1371         if (quote == NULL)
1372                 quote = &local_quote;
1373         *quote = 0;
1375         /* Build a local buffer that we'll return the data of */
1376         initPQExpBuffer(&mybuf);
1378         /* Set up static variables that will be used by yylex */
1379         cur_state = state;
1380         output_buf = &mybuf;
1381         option_type = type;
1382         option_quote = quote;
1384         if (state->buffer_stack != NULL)
1385                 yy_switch_to_buffer(state->buffer_stack->buf);
1386         else
1387                 yy_switch_to_buffer(state->scanbufhandle);
1389         if (type == OT_WHOLE_LINE)
1390                 BEGIN(xslashwholeline);
1391         else
1392                 BEGIN(xslasharg);
1394         /* And lex. */
1395         lexresult = yylex();
1397         /*
1398          * Check the lex result: we should have gotten back either LEXRES_OK
1399          * or LEXRES_EOL (the latter indicating end of string).  If we were inside
1400          * a quoted string, as indicated by YY_START, EOL is an error.
1401          */
1402         psql_assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK);
1403         badarg = false;
1404         switch (YY_START)
1405         {
1406                 case xslasharg:
1407                         /* empty arg, or possibly a psql variable substitution */
1408                         break;
1409                 case xslashquote:
1410                         if (lexresult != LEXRES_OK)
1411                                 badarg = true;          /* hit EOL not ending quote */
1412                         break;
1413                 case xslashbackquote:
1414                         if (lexresult != LEXRES_OK)
1415                                 badarg = true;          /* hit EOL not ending quote */
1416                         else
1417                         {
1418                                 /* Perform evaluation of backticked command */
1419                                 char       *cmd = mybuf.data;
1420                                 FILE       *fd;
1421                                 bool            error = false;
1422                                 PQExpBufferData output;
1423                                 char            buf[512];
1424                                 size_t          result;
1426                                 fd = popen(cmd, PG_BINARY_R);
1427                                 if (!fd)
1428                                 {
1429                                         psql_error("%s: %s\n", cmd, strerror(errno));
1430                                         error = true;
1431                                 }
1433                                 initPQExpBuffer(&output);
1435                                 if (!error)
1436                                 {
1437                                         do
1438                                         {
1439                                                 result = fread(buf, 1, sizeof(buf), fd);
1440                                                 if (ferror(fd))
1441                                                 {
1442                                                         psql_error("%s: %s\n", cmd, strerror(errno));
1443                                                         error = true;
1444                                                         break;
1445                                                 }
1446                                                 appendBinaryPQExpBuffer(&output, buf, result);
1447                                         } while (!feof(fd));
1448                                 }
1450                                 if (fd && pclose(fd) == -1)
1451                                 {
1452                                         psql_error("%s: %s\n", cmd, strerror(errno));
1453                                         error = true;
1454                                 }
1456                                 if (PQExpBufferBroken(&output))
1457                                 {
1458                                         psql_error("%s: out of memory\n", cmd);
1459                                         error = true;
1460                                 }
1462                                 /* Now done with cmd, transfer result to mybuf */
1463                                 resetPQExpBuffer(&mybuf);
1465                                 if (!error)
1466                                 {
1467                                         /* strip any trailing newline */
1468                                         if (output.len > 0 &&
1469                                                 output.data[output.len - 1] == '\n')
1470                                                 output.len--;
1471                                         appendBinaryPQExpBuffer(&mybuf, output.data, output.len);
1472                                 }
1474                                 termPQExpBuffer(&output);
1475                         }
1476                         break;
1477                 case xslashdefaultarg:
1478                         /* Strip any trailing semi-colons if requested */
1479                         if (semicolon)
1480                         {
1481                                 while (mybuf.len > 0 &&
1482                                            mybuf.data[mybuf.len - 1] == ';')
1483                                 {
1484                                         mybuf.data[--mybuf.len] = '\0';
1485                                 }
1486                         }
1488                         /*
1489                          * If SQL identifier processing was requested, then we strip out
1490                          * excess double quotes and downcase unquoted letters.
1491                          * Doubled double-quotes become output double-quotes, per spec.
1492                          *
1493                          * Note that a string like FOO"BAR"BAZ will be converted to
1494                          * fooBARbaz; this is somewhat inconsistent with the SQL spec,
1495                          * which would have us parse it as several identifiers.  But
1496                          * for psql's purposes, we want a string like "foo"."bar" to
1497                          * be treated as one option, so there's little choice.
1498                          */
1499                         if (type == OT_SQLID || type == OT_SQLIDHACK)
1500                         {
1501                                 bool            inquotes = false;
1502                                 char       *cp = mybuf.data;
1504                                 while (*cp)
1505                                 {
1506                                         if (*cp == '"')
1507                                         {
1508                                                 if (inquotes && cp[1] == '"')
1509                                                 {
1510                                                         /* Keep the first quote, remove the second */
1511                                                         cp++;
1512                                                 }
1513                                                 inquotes = !inquotes;
1514                                                 /* Collapse out quote at *cp */
1515                                                 memmove(cp, cp + 1, strlen(cp));
1516                                                 mybuf.len--;
1517                                                 /* do not advance cp */
1518                                         }
1519                                         else
1520                                         {
1521                                                 if (!inquotes && type == OT_SQLID)
1522                                                         *cp = pg_tolower((unsigned char) *cp);
1523                                                 cp += PQmblen(cp, pset.encoding);
1524                                         }
1525                                 }
1526                         }
1527                         break;
1528                 case xslashquotedarg:
1529                         /* must have hit EOL inside double quotes */
1530                         badarg = true;
1531                         break;
1532                 case xslashwholeline:
1533                         /* always okay */
1534                         break;
1535                 default:
1536                         /* can't get here */
1537                         fprintf(stderr, "invalid YY_START\n");
1538                         exit(1);
1539         }
1541         if (badarg)
1542         {
1543                 psql_error("unterminated quoted string\n");
1544                 termPQExpBuffer(&mybuf);
1545                 return NULL;
1546         }
1548         /*
1549          * An unquoted empty argument isn't possible unless we are at end of
1550          * command.  Return NULL instead.
1551          */
1552         if (mybuf.len == 0 && *quote == 0)
1553         {
1554                 termPQExpBuffer(&mybuf);
1555                 return NULL;
1556         }
1558         /* Else return the completed string. */
1559         return mybuf.data;
1563  * Eat up any unused \\ to complete a backslash command.
1564  */
1565 void
1566 psql_scan_slash_command_end(PsqlScanState state)
1568         int                     lexresult;
1570         /* Must be scanning already */
1571         psql_assert(state->scanbufhandle);
1573         /* Set up static variables that will be used by yylex */
1574         cur_state = state;
1575         output_buf = NULL;
1577         if (state->buffer_stack != NULL)
1578                 yy_switch_to_buffer(state->buffer_stack->buf);
1579         else
1580                 yy_switch_to_buffer(state->scanbufhandle);
1582         BEGIN(xslashend);
1584         /* And lex. */
1585         lexresult = yylex();
1587         /* There are no possible errors in this lex state... */
1592  * Push the given string onto the stack of stuff to scan.
1594  * cur_state must point to the active PsqlScanState.
1596  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1597  */
1598 static void
1599 push_new_buffer(const char *newstr)
1601         StackElem  *stackelem;
1603         stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
1604         stackelem->buf = prepare_buffer(newstr, strlen(newstr),
1605                                                                         &stackelem->bufstring);
1606         cur_state->curline = stackelem->bufstring;
1607         if (cur_state->safe_encoding)
1608         {
1609                 stackelem->origstring = NULL;
1610                 cur_state->refline = stackelem->bufstring;
1611         }
1612         else
1613         {
1614                 stackelem->origstring = pg_strdup(newstr);
1615                 cur_state->refline = stackelem->origstring;
1616         }
1617         stackelem->next = cur_state->buffer_stack;
1618         cur_state->buffer_stack = stackelem;
1622  * Set up a flex input buffer to scan the given data.  We always make a
1623  * copy of the data.  If working in an unsafe encoding, the copy has
1624  * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
1626  * cur_state must point to the active PsqlScanState.
1628  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1629  */
1630 static YY_BUFFER_STATE
1631 prepare_buffer(const char *txt, int len, char **txtcopy)
1633         char       *newtxt;
1635         /* Flex wants two \0 characters after the actual data */
1636         newtxt = pg_malloc(len + 2);
1637         *txtcopy = newtxt;
1638         newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
1640         if (cur_state->safe_encoding)
1641                 memcpy(newtxt, txt, len);
1642         else
1643         {
1644                 /* Gotta do it the hard way */
1645                 int             i = 0;
1647                 while (i < len)
1648                 {
1649                         int             thislen = PQmblen(txt + i, cur_state->encoding);
1651                         /* first byte should always be okay... */
1652                         newtxt[i] = txt[i];
1653                         i++;
1654                         while (--thislen > 0)
1655                                 newtxt[i++] = (char) 0xFF;
1656                 }
1657         }
1659         return yy_scan_buffer(newtxt, len + 2);
1663  * emit() --- body for ECHO macro
1665  * NB: this must be used for ALL and ONLY the text copied from the flex
1666  * input data.  If you pass it something that is not part of the yytext
1667  * string, you are making a mistake.  Internally generated text can be
1668  * appended directly to output_buf.
1669  */
1670 static void
1671 emit(const char *txt, int len)
1673         if (cur_state->safe_encoding)
1674                 appendBinaryPQExpBuffer(output_buf, txt, len);
1675         else
1676         {
1677                 /* Gotta do it the hard way */
1678                 const char *reference = cur_state->refline;
1679                 int             i;
1681                 reference += (txt - cur_state->curline);
1683                 for (i = 0; i < len; i++)
1684                 {
1685                         char    ch = txt[i];
1687                         if (ch == (char) 0xFF)
1688                                 ch = reference[i];
1689                         appendPQExpBufferChar(output_buf, ch);
1690                 }
1691         }