2 * psql - the PostgreSQL interactive terminal
4 * Copyright (c) 2000-2008, PostgreSQL Global Development Group
8 #include "postgres_fe.h"
13 #include "stringutils.h"
16 static void strip_quotes(char *source
, char quote
, char escape
, int encoding
);
20 * Replacement for strtok() (a.k.a. poor man's flex)
22 * Splits a string into tokens, returning one token per call, then NULL
23 * when no more tokens exist in the given string.
25 * The calling convention is similar to that of strtok, but with more
28 * s - string to parse, if NULL continue parsing the last string
29 * whitespace - set of whitespace characters that separate tokens
30 * delim - set of non-whitespace separator characters (or NULL)
31 * quote - set of characters that can quote a token (NULL if none)
32 * escape - character that can quote quotes (0 if none)
33 * e_strings - if TRUE, treat E'...' syntax as a valid token
34 * del_quotes - if TRUE, strip quotes from the returned token, else return
35 * it exactly as found in the string
36 * encoding - the active character-set encoding
38 * Characters in 'delim', if any, will be returned as single-character
39 * tokens unless part of a quoted token.
41 * Double occurrences of the quoting character are always taken to represent
42 * a single quote character in the data. If escape isn't 0, then escape
43 * followed by anything (except \0) is a data character too.
45 * The combination of e_strings and del_quotes both TRUE is not currently
46 * handled. This could be fixed but it's not needed anywhere at the moment.
48 * Note that the string s is _not_ overwritten in this implementation.
50 * NB: it's okay to vary delim, quote, and escape from one call to the
51 * next on a single source string, but changing whitespace is a bad idea
52 * since you might lose data.
55 strtokx(const char *s
,
56 const char *whitespace
,
64 static char *storage
= NULL
;/* store the local copy of the users string
66 static char *string
= NULL
; /* pointer into storage where to continue on
69 /* variously abused variables: */
79 * We may need extra space to insert delimiter nulls for adjacent
80 * tokens. 2X the space is a gross overestimate, but it's unlikely
81 * that this code will be used on huge strings anyway.
83 storage
= pg_malloc(2 * strlen(s
) + 1);
91 /* skip leading whitespace */
92 offset
= strspn(string
, whitespace
);
93 start
= &string
[offset
];
95 /* end of string reached? */
98 /* technically we don't need to free here, but we're nice */
105 /* test if delimiter character */
106 if (delim
&& strchr(delim
, *start
))
109 * If not at end of string, we need to insert a null to terminate the
110 * returned token. We can just overwrite the next character if it
111 * happens to be in the whitespace set ... otherwise move over the
112 * rest of the string to make room. (This is why we allocated extra
118 if (!strchr(whitespace
, *p
))
119 memmove(p
+ 1, p
, strlen(p
) + 1);
125 /* at end of string, so no extra work */
132 /* check for E string */
135 (*p
== 'E' || *p
== 'e') &&
139 escape
= '\\'; /* if std strings before, not any more */
143 /* test if quoting character */
144 if (quote
&& strchr(quote
, *p
))
146 /* okay, we have a quoted token, now scan for the closer */
147 char thisquote
= *p
++;
149 for (; *p
; p
+= PQmblen(p
, encoding
))
151 if (*p
== escape
&& p
[1] != '\0')
152 p
++; /* process escaped anything */
153 else if (*p
== thisquote
&& p
[1] == thisquote
)
154 p
++; /* process doubled quote */
155 else if (*p
== thisquote
)
157 p
++; /* skip trailing quote */
163 * If not at end of string, we need to insert a null to terminate the
164 * returned token. See notes above.
168 if (!strchr(whitespace
, *p
))
169 memmove(p
+ 1, p
, strlen(p
) + 1);
175 /* at end of string, so no extra work */
179 /* Clean up the token if caller wants that */
181 strip_quotes(start
, thisquote
, escape
, encoding
);
187 * Otherwise no quoting character. Scan till next whitespace, delimiter
188 * or quote. NB: at this point, *start is known not to be '\0',
189 * whitespace, delim, or quote, so we will consume at least one character.
191 offset
= strcspn(start
, whitespace
);
195 unsigned int offset2
= strcspn(start
, delim
);
197 if (offset
> offset2
)
203 unsigned int offset2
= strcspn(start
, quote
);
205 if (offset
> offset2
)
212 * If not at end of string, we need to insert a null to terminate the
213 * returned token. See notes above.
217 if (!strchr(whitespace
, *p
))
218 memmove(p
+ 1, p
, strlen(p
) + 1);
224 /* at end of string, so no extra work */
235 * Remove quotes from the string at *source. Leading and trailing occurrences
236 * of 'quote' are removed; embedded double occurrences of 'quote' are reduced
237 * to single occurrences; if 'escape' is not 0 then 'escape' removes special
238 * significance of next character.
240 * Note that the source string is overwritten in-place.
243 strip_quotes(char *source
, char quote
, char escape
, int encoding
)
253 if (*src
&& *src
== quote
)
254 src
++; /* skip leading quote */
261 if (c
== quote
&& src
[1] == '\0')
262 break; /* skip trailing quote */
263 else if (c
== quote
&& src
[1] == quote
)
264 src
++; /* process doubled quote */
265 else if (c
== escape
&& src
[1] != '\0')
266 src
++; /* process escaped character */
268 i
= PQmblen(src
, encoding
);