2 * psql - the PostgreSQL interactive terminal
4 * Copyright (c) 2000-2025, PostgreSQL Global Development Group
6 * src/bin/psql/stringutils.c
8 #include "postgres_fe.h"
13 #include "stringutils.h"
17 * Replacement for strtok() (a.k.a. poor man's flex)
19 * Splits a string into tokens, returning one token per call, then NULL
20 * when no more tokens exist in the given string.
22 * The calling convention is similar to that of strtok, but with more
25 * s - string to parse, if NULL continue parsing the last string
26 * whitespace - set of whitespace characters that separate tokens
27 * delim - set of non-whitespace separator characters (or NULL)
28 * quote - set of characters that can quote a token (NULL if none)
29 * escape - character that can quote quotes (0 if none)
30 * e_strings - if true, treat E'...' syntax as a valid token
31 * del_quotes - if true, strip quotes from the returned token, else return
32 * it exactly as found in the string
33 * encoding - the active character-set encoding
35 * Characters in 'delim', if any, will be returned as single-character
36 * tokens unless part of a quoted token.
38 * Double occurrences of the quoting character are always taken to represent
39 * a single quote character in the data. If escape isn't 0, then escape
40 * followed by anything (except \0) is a data character too.
42 * The combination of e_strings and del_quotes both true is not currently
43 * handled. This could be fixed but it's not needed anywhere at the moment.
45 * Note that the string s is _not_ overwritten in this implementation.
47 * NB: it's okay to vary delim, quote, and escape from one call to the
48 * next on a single source string, but changing whitespace is a bad idea
49 * since you might lose data.
52 strtokx(const char *s
,
53 const char *whitespace
,
61 static char *storage
= NULL
; /* store the local copy of the users
63 static char *string
= NULL
; /* pointer into storage where to continue on
66 /* variously abused variables: */
76 * We may need extra space to insert delimiter nulls for adjacent
77 * tokens. 2X the space is a gross overestimate, but it's unlikely
78 * that this code will be used on huge strings anyway.
80 storage
= pg_malloc(2 * strlen(s
) + 1);
88 /* skip leading whitespace */
89 offset
= strspn(string
, whitespace
);
90 start
= &string
[offset
];
92 /* end of string reached? */
95 /* technically we don't need to free here, but we're nice */
102 /* test if delimiter character */
103 if (delim
&& strchr(delim
, *start
))
106 * If not at end of string, we need to insert a null to terminate the
107 * returned token. We can just overwrite the next character if it
108 * happens to be in the whitespace set ... otherwise move over the
109 * rest of the string to make room. (This is why we allocated extra
115 if (!strchr(whitespace
, *p
))
116 memmove(p
+ 1, p
, strlen(p
) + 1);
122 /* at end of string, so no extra work */
129 /* check for E string */
132 (*p
== 'E' || *p
== 'e') &&
136 escape
= '\\'; /* if std strings before, not any more */
140 /* test if quoting character */
141 if (quote
&& strchr(quote
, *p
))
143 /* okay, we have a quoted token, now scan for the closer */
144 char thisquote
= *p
++;
146 for (; *p
; p
+= PQmblenBounded(p
, encoding
))
148 if (*p
== escape
&& p
[1] != '\0')
149 p
++; /* process escaped anything */
150 else if (*p
== thisquote
&& p
[1] == thisquote
)
151 p
++; /* process doubled quote */
152 else if (*p
== thisquote
)
154 p
++; /* skip trailing quote */
160 * If not at end of string, we need to insert a null to terminate the
161 * returned token. See notes above.
165 if (!strchr(whitespace
, *p
))
166 memmove(p
+ 1, p
, strlen(p
) + 1);
172 /* at end of string, so no extra work */
176 /* Clean up the token if caller wants that */
178 strip_quotes(start
, thisquote
, escape
, encoding
);
184 * Otherwise no quoting character. Scan till next whitespace, delimiter
185 * or quote. NB: at this point, *start is known not to be '\0',
186 * whitespace, delim, or quote, so we will consume at least one character.
188 offset
= strcspn(start
, whitespace
);
192 unsigned int offset2
= strcspn(start
, delim
);
194 if (offset
> offset2
)
200 unsigned int offset2
= strcspn(start
, quote
);
202 if (offset
> offset2
)
209 * If not at end of string, we need to insert a null to terminate the
210 * returned token. See notes above.
214 if (!strchr(whitespace
, *p
))
215 memmove(p
+ 1, p
, strlen(p
) + 1);
221 /* at end of string, so no extra work */
232 * Remove quotes from the string at *source. Leading and trailing occurrences
233 * of 'quote' are removed; embedded double occurrences of 'quote' are reduced
234 * to single occurrences; if 'escape' is not 0 then 'escape' removes special
235 * significance of next character.
237 * Note that the source string is overwritten in-place.
240 strip_quotes(char *source
, char quote
, char escape
, int encoding
)
245 Assert(source
!= NULL
);
246 Assert(quote
!= '\0');
250 if (*src
&& *src
== quote
)
251 src
++; /* skip leading quote */
258 if (c
== quote
&& src
[1] == '\0')
259 break; /* skip trailing quote */
260 else if (c
== quote
&& src
[1] == quote
)
261 src
++; /* process doubled quote */
262 else if (c
== escape
&& src
[1] != '\0')
263 src
++; /* process escaped character */
265 i
= PQmblenBounded(src
, encoding
);
277 * Opposite of strip_quotes(). If "source" denotes itself literally without
278 * quoting or escaping, returns NULL. Otherwise, returns a malloc'd copy with
279 * quoting and escaping applied:
281 * source - string to parse
282 * entails_quote - any of these present? need outer quotes
283 * quote - doubled within string, affixed to both ends
284 * escape - doubled within string
285 * force_quote - if true, quote the output even if it doesn't "need" it
286 * encoding - the active character-set encoding
288 * Do not use this as a substitute for PQescapeStringConn(). Use it for
289 * strings to be parsed by strtokx() or psql_scan_slash_option().
292 quote_if_needed(const char *source
, const char *entails_quote
,
293 char quote
, char escape
, bool force_quote
,
299 bool need_quotes
= force_quote
;
301 Assert(source
!= NULL
);
302 Assert(quote
!= '\0');
305 dst
= ret
= pg_malloc(2 * strlen(src
) + 3); /* excess */
319 else if (c
== escape
)
324 else if (strchr(entails_quote
, c
))
327 i
= PQmblenBounded(src
, encoding
);