turns printfs back on
[freebsd-src/fkvm-freebsd.git] / contrib / ntp / libopts / tokenize.c
blob0e576ce26d0ac9487955d2333b5137d1c1ca593b
1 /*
2 * This file defines the string_tokenize interface
3 * Time-stamp: "2006-06-24 15:27:49 bkorb"
5 * string_tokenize copyright 2005 Bruce Korb
7 * string_tokenize is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * string_tokenize is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with string_tokenize; if not, write to:
19 * The Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
23 #include <ctype.h>
24 #include <errno.h>
25 #include <stdlib.h>
27 #define cc_t const unsigned char
28 #define ch_t unsigned char
30 /* = = = START-STATIC-FORWARD = = = */
31 /* static forward declarations maintained by :mkfwd */
32 static void
33 copy_cooked( ch_t** ppDest, char const ** ppSrc );
35 static void
36 copy_raw( ch_t** ppDest, char const ** ppSrc );
37 /* = = = END-STATIC-FORWARD = = = */
39 static void
40 copy_cooked( ch_t** ppDest, char const ** ppSrc )
42 ch_t* pDest = (ch_t*)*ppDest;
43 const ch_t* pSrc = (const ch_t*)(*ppSrc + 1);
45 for (;;) {
46 ch_t ch = *(pSrc++);
47 switch (ch) {
48 case NUL: *ppSrc = NULL; return;
49 case '"': goto done;
50 case '\\':
51 pSrc += ao_string_cook_escape_char( (char*)pSrc, (char*)&ch, 0x7F );
52 if (ch == 0x7F)
53 break;
54 /* FALLTHROUGH */
56 default:
57 *(pDest++) = ch;
61 done:
62 *ppDest = (ch_t*)pDest; /* next spot for storing character */
63 *ppSrc = (char const *)pSrc; /* char following closing quote */
67 static void
68 copy_raw( ch_t** ppDest, char const ** ppSrc )
70 ch_t* pDest = *ppDest;
71 cc_t* pSrc = (cc_t*) (*ppSrc + 1);
73 for (;;) {
74 ch_t ch = *(pSrc++);
75 switch (ch) {
76 case NUL: *ppSrc = NULL; return;
77 case '\'': goto done;
78 case '\\':
80 * *Four* escapes are handled: newline removal, escape char
81 * quoting and apostrophe quoting
83 switch (*pSrc) {
84 case NUL: *ppSrc = NULL; return;
85 case '\r':
86 if (*(++pSrc) == '\n')
87 ++pSrc;
88 continue;
90 case '\n':
91 ++pSrc;
92 continue;
94 case '\'':
95 ch = '\'';
96 /* FALLTHROUGH */
98 case '\\':
99 ++pSrc;
100 break;
102 /* FALLTHROUGH */
104 default:
105 *(pDest++) = ch;
109 done:
110 *ppDest = pDest; /* next spot for storing character */
111 *ppSrc = (char const *) pSrc; /* char following closing quote */
115 /*=export_func ao_string_tokenize
117 * what: tokenize an input string
119 * arg: + char const* + string + string to be tokenized +
121 * ret_type: token_list_t*
122 * ret_desc: pointer to a structure that lists each token
124 * doc:
126 * This function will convert one input string into a list of strings.
127 * The list of strings is derived by separating the input based on
128 * white space separation. However, if the input contains either single
129 * or double quote characters, then the text after that character up to
130 * a matching quote will become the string in the list.
132 * The returned pointer should be deallocated with @code{free(3C)} when
133 * are done using the data. The data are placed in a single block of
134 * allocated memory. Do not deallocate individual token/strings.
136 * The structure pointed to will contain at least these two fields:
137 * @table @samp
138 * @item tkn_ct
139 * The number of tokens found in the input string.
140 * @item tok_list
141 * An array of @code{tkn_ct + 1} pointers to substring tokens, with
142 * the last pointer set to NULL.
143 * @end table
145 * There are two types of quoted strings: single quoted (@code{'}) and
146 * double quoted (@code{"}). Singly quoted strings are fairly raw in that
147 * escape characters (@code{\\}) are simply another character, except when
148 * preceding the following characters:
149 * @example
150 * @code{\\} double backslashes reduce to one
151 * @code{'} incorporates the single quote into the string
152 * @code{\n} suppresses both the backslash and newline character
153 * @end example
155 * Double quote strings are formed according to the rules of string
156 * constants in ANSI-C programs.
158 * example:
159 * @example
160 * #include <stdlib.h>
161 * int ix;
162 * token_list_t* ptl = ao_string_tokenize( some_string )
163 * for (ix = 0; ix < ptl->tkn_ct; ix++)
164 * do_something_with_tkn( ptl->tkn_list[ix] );
165 * free( ptl );
166 * @end example
167 * Note that everything is freed with the one call to @code{free(3C)}.
169 * err:
170 * NULL is returned and @code{errno} will be set to indicate the problem:
171 * @itemize @bullet
172 * @item
173 * @code{EINVAL} - There was an unterminated quoted string.
174 * @item
175 * @code{ENOENT} - The input string was empty.
176 * @item
177 * @code{ENOMEM} - There is not enough memory.
178 * @end itemize
180 token_list_t*
181 ao_string_tokenize( char const* str )
183 int max_token_ct = 1; /* allow for trailing NUL on string */
184 token_list_t* res;
186 if (str == NULL) goto bogus_str;
189 * Trim leading white space. Use "ENOENT" and a NULL return to indicate
190 * an empty string was passed.
192 while (isspace( (ch_t)*str )) str++;
193 if (*str == NUL) {
194 bogus_str:
195 errno = ENOENT;
196 return NULL;
200 * Take an approximate count of tokens. If no quoted strings are used,
201 * it will be accurate. If quoted strings are used, it will be a little
202 * high and we'll squander the space for a few extra pointers.
205 cc_t* pz = (cc_t*)str;
207 do {
208 max_token_ct++;
209 while (! isspace( *++pz ))
210 if (*pz == NUL) goto found_nul;
211 while (isspace( *pz )) pz++;
212 } while (*pz != NUL);
214 found_nul:
218 res = malloc( sizeof(*res) + strlen(str) + (max_token_ct * sizeof(ch_t*)) );
219 if (res == NULL) {
220 errno = ENOMEM;
221 return res;
225 * Now copy each token into the output buffer.
228 ch_t* pzDest = (ch_t*)(res->tkn_list + (max_token_ct + 1));
229 res->tkn_ct = 0;
231 do {
232 res->tkn_list[ res->tkn_ct++ ] = pzDest;
233 for (;;) {
234 int ch = (ch_t)*str;
235 if (isspace( ch )) {
236 found_white_space:
237 while (isspace( (ch_t)*++str )) ;
238 break;
241 switch (ch) {
242 case '"':
243 copy_cooked( &pzDest, &str );
244 if (str == NULL) {
245 free(res);
246 errno = EINVAL;
247 return NULL;
249 if (isspace( (ch_t)*str ))
250 goto found_white_space;
251 break;
253 case '\'':
254 copy_raw( &pzDest, &str );
255 if (str == NULL) {
256 free(res);
257 errno = EINVAL;
258 return NULL;
260 if (isspace( (ch_t)*str ))
261 goto found_white_space;
262 break;
264 case NUL:
265 goto copy_done;
267 default:
268 str++;
269 *(pzDest++) = ch;
271 } copy_done:;
274 * NUL terminate the last token and see if we have any more tokens.
276 *(pzDest++) = NUL;
277 } while (*str != NUL);
279 res->tkn_list[ res->tkn_ct ] = NULL;
282 return res;
285 #ifdef TEST
286 #include <stdio.h>
287 #include <string.h>
290 main( int argc, char** argv )
292 if (argc == 1) {
293 printf("USAGE: %s arg [ ... ]\n", *argv);
294 return 1;
296 while (--argc > 0) {
297 char* arg = *(++argv);
298 token_list_t* p = ao_string_tokenize( arg );
299 if (p == NULL) {
300 printf( "Parsing string ``%s'' failed:\n\terrno %d (%s)\n",
301 arg, errno, strerror( errno ));
302 } else {
303 int ix = 0;
304 printf( "Parsed string ``%s''\ninto %d tokens:\n", arg, p->tkn_ct );
305 do {
306 printf( " %3d: ``%s''\n", ix+1, p->tkn_list[ix] );
307 } while (++ix < p->tkn_ct);
308 free(p);
311 return 0;
313 #endif
316 * Local Variables:
317 * mode: C
318 * c-file-style: "stroustrup"
319 * indent-tabs-mode: nil
320 * End:
321 * end of autoopts/tokenize.c */