Fix obsolete comment regarding FSM truncation.
[PostgreSQL.git] / src / bin / psql / stringutils.c
blob35d7cacdfb4d4226214ec497020758e7ae9884f9
1 /*
2 * psql - the PostgreSQL interactive terminal
4 * Copyright (c) 2000-2008, PostgreSQL Global Development Group
6 * $PostgreSQL$
7 */
8 #include "postgres_fe.h"
10 #include <ctype.h>
12 #include "common.h"
13 #include "stringutils.h"
16 static void strip_quotes(char *source, char quote, char escape, int encoding);
20 * Replacement for strtok() (a.k.a. poor man's flex)
22 * Splits a string into tokens, returning one token per call, then NULL
23 * when no more tokens exist in the given string.
25 * The calling convention is similar to that of strtok, but with more
26 * frammishes.
28 * s - string to parse, if NULL continue parsing the last string
29 * whitespace - set of whitespace characters that separate tokens
30 * delim - set of non-whitespace separator characters (or NULL)
31 * quote - set of characters that can quote a token (NULL if none)
32 * escape - character that can quote quotes (0 if none)
33 * e_strings - if TRUE, treat E'...' syntax as a valid token
34 * del_quotes - if TRUE, strip quotes from the returned token, else return
35 * it exactly as found in the string
36 * encoding - the active character-set encoding
38 * Characters in 'delim', if any, will be returned as single-character
39 * tokens unless part of a quoted token.
41 * Double occurrences of the quoting character are always taken to represent
42 * a single quote character in the data. If escape isn't 0, then escape
43 * followed by anything (except \0) is a data character too.
45 * The combination of e_strings and del_quotes both TRUE is not currently
46 * handled. This could be fixed but it's not needed anywhere at the moment.
48 * Note that the string s is _not_ overwritten in this implementation.
50 * NB: it's okay to vary delim, quote, and escape from one call to the
51 * next on a single source string, but changing whitespace is a bad idea
52 * since you might lose data.
54 char *
55 strtokx(const char *s,
56 const char *whitespace,
57 const char *delim,
58 const char *quote,
59 char escape,
60 bool e_strings,
61 bool del_quotes,
62 int encoding)
64 static char *storage = NULL;/* store the local copy of the users string
65 * here */
66 static char *string = NULL; /* pointer into storage where to continue on
67 * next call */
69 /* variously abused variables: */
70 unsigned int offset;
71 char *start;
72 char *p;
74 if (s)
76 free(storage);
79 * We may need extra space to insert delimiter nulls for adjacent
80 * tokens. 2X the space is a gross overestimate, but it's unlikely
81 * that this code will be used on huge strings anyway.
83 storage = pg_malloc(2 * strlen(s) + 1);
84 strcpy(storage, s);
85 string = storage;
88 if (!storage)
89 return NULL;
91 /* skip leading whitespace */
92 offset = strspn(string, whitespace);
93 start = &string[offset];
95 /* end of string reached? */
96 if (*start == '\0')
98 /* technically we don't need to free here, but we're nice */
99 free(storage);
100 storage = NULL;
101 string = NULL;
102 return NULL;
105 /* test if delimiter character */
106 if (delim && strchr(delim, *start))
109 * If not at end of string, we need to insert a null to terminate the
110 * returned token. We can just overwrite the next character if it
111 * happens to be in the whitespace set ... otherwise move over the
112 * rest of the string to make room. (This is why we allocated extra
113 * space above).
115 p = start + 1;
116 if (*p != '\0')
118 if (!strchr(whitespace, *p))
119 memmove(p + 1, p, strlen(p) + 1);
120 *p = '\0';
121 string = p + 1;
123 else
125 /* at end of string, so no extra work */
126 string = p;
129 return start;
132 /* check for E string */
133 p = start;
134 if (e_strings &&
135 (*p == 'E' || *p == 'e') &&
136 p[1] == '\'')
138 quote = "'";
139 escape = '\\'; /* if std strings before, not any more */
140 p++;
143 /* test if quoting character */
144 if (quote && strchr(quote, *p))
146 /* okay, we have a quoted token, now scan for the closer */
147 char thisquote = *p++;
149 for (; *p; p += PQmblen(p, encoding))
151 if (*p == escape && p[1] != '\0')
152 p++; /* process escaped anything */
153 else if (*p == thisquote && p[1] == thisquote)
154 p++; /* process doubled quote */
155 else if (*p == thisquote)
157 p++; /* skip trailing quote */
158 break;
163 * If not at end of string, we need to insert a null to terminate the
164 * returned token. See notes above.
166 if (*p != '\0')
168 if (!strchr(whitespace, *p))
169 memmove(p + 1, p, strlen(p) + 1);
170 *p = '\0';
171 string = p + 1;
173 else
175 /* at end of string, so no extra work */
176 string = p;
179 /* Clean up the token if caller wants that */
180 if (del_quotes)
181 strip_quotes(start, thisquote, escape, encoding);
183 return start;
187 * Otherwise no quoting character. Scan till next whitespace, delimiter
188 * or quote. NB: at this point, *start is known not to be '\0',
189 * whitespace, delim, or quote, so we will consume at least one character.
191 offset = strcspn(start, whitespace);
193 if (delim)
195 unsigned int offset2 = strcspn(start, delim);
197 if (offset > offset2)
198 offset = offset2;
201 if (quote)
203 unsigned int offset2 = strcspn(start, quote);
205 if (offset > offset2)
206 offset = offset2;
209 p = start + offset;
212 * If not at end of string, we need to insert a null to terminate the
213 * returned token. See notes above.
215 if (*p != '\0')
217 if (!strchr(whitespace, *p))
218 memmove(p + 1, p, strlen(p) + 1);
219 *p = '\0';
220 string = p + 1;
222 else
224 /* at end of string, so no extra work */
225 string = p;
228 return start;
233 * strip_quotes
235 * Remove quotes from the string at *source. Leading and trailing occurrences
236 * of 'quote' are removed; embedded double occurrences of 'quote' are reduced
237 * to single occurrences; if 'escape' is not 0 then 'escape' removes special
238 * significance of next character.
240 * Note that the source string is overwritten in-place.
242 static void
243 strip_quotes(char *source, char quote, char escape, int encoding)
245 char *src;
246 char *dst;
248 psql_assert(source);
249 psql_assert(quote);
251 src = dst = source;
253 if (*src && *src == quote)
254 src++; /* skip leading quote */
256 while (*src)
258 char c = *src;
259 int i;
261 if (c == quote && src[1] == '\0')
262 break; /* skip trailing quote */
263 else if (c == quote && src[1] == quote)
264 src++; /* process doubled quote */
265 else if (c == escape && src[1] != '\0')
266 src++; /* process escaped character */
268 i = PQmblen(src, encoding);
269 while (i--)
270 *dst++ = *src++;
273 *dst = '\0';