Patrick Welche <prlw1@cam.ac.uk>
[netbsd-mini2440.git] / external / bsd / ntp / dist / sntp / libopts / tokenize.c
blob9c29a5af5e3338c100b47f4faf1bddf5f802f7d4
1 /* $NetBSD$ */
3 /*
4 * This file defines the string_tokenize interface
5 * Time-stamp: "2007-11-12 20:40:36 bkorb"
7 * This file is part of AutoOpts, a companion to AutoGen.
8 * AutoOpts is free software.
9 * AutoOpts is copyright (c) 1992-2009 by Bruce Korb - all rights reserved
11 * AutoOpts is available under any one of two licenses. The license
12 * in use must be one of these two and the choice is under the control
13 * of the user of the license.
15 * The GNU Lesser General Public License, version 3 or later
16 * See the files "COPYING.lgplv3" and "COPYING.gplv3"
18 * The Modified Berkeley Software Distribution License
19 * See the file "COPYING.mbsd"
21 * These files have the following md5sums:
23 * 43b91e8ca915626ed3818ffb1b71248b pkg/libopts/COPYING.gplv3
24 * 06a1a2e4760c90ea5e1dad8dfaac4d39 pkg/libopts/COPYING.lgplv3
25 * 66a5cedaf62c4b2637025f049f9b826f pkg/libopts/COPYING.mbsd
28 #include <errno.h>
29 #include <stdlib.h>
31 #define cc_t const unsigned char
32 #define ch_t unsigned char
34 /* = = = START-STATIC-FORWARD = = = */
35 /* static forward declarations maintained by mk-fwd */
36 static void
37 copy_cooked( ch_t** ppDest, char const ** ppSrc );
39 static void
40 copy_raw( ch_t** ppDest, char const ** ppSrc );
41 /* = = = END-STATIC-FORWARD = = = */
43 static void
44 copy_cooked( ch_t** ppDest, char const ** ppSrc )
46 ch_t* pDest = (ch_t*)*ppDest;
47 const ch_t* pSrc = (const ch_t*)(*ppSrc + 1);
49 for (;;) {
50 ch_t ch = *(pSrc++);
51 switch (ch) {
52 case NUL: *ppSrc = NULL; return;
53 case '"': goto done;
54 case '\\':
55 pSrc += ao_string_cook_escape_char( (char*)pSrc, (char*)&ch, 0x7F );
56 if (ch == 0x7F)
57 break;
58 /* FALLTHROUGH */
60 default:
61 *(pDest++) = ch;
65 done:
66 *ppDest = (ch_t*)pDest; /* next spot for storing character */
67 *ppSrc = (char const *)pSrc; /* char following closing quote */
71 static void
72 copy_raw( ch_t** ppDest, char const ** ppSrc )
74 ch_t* pDest = *ppDest;
75 cc_t* pSrc = (cc_t*) (*ppSrc + 1);
77 for (;;) {
78 ch_t ch = *(pSrc++);
79 switch (ch) {
80 case NUL: *ppSrc = NULL; return;
81 case '\'': goto done;
82 case '\\':
84 * *Four* escapes are handled: newline removal, escape char
85 * quoting and apostrophe quoting
87 switch (*pSrc) {
88 case NUL: *ppSrc = NULL; return;
89 case '\r':
90 if (*(++pSrc) == '\n')
91 ++pSrc;
92 continue;
94 case '\n':
95 ++pSrc;
96 continue;
98 case '\'':
99 ch = '\'';
100 /* FALLTHROUGH */
102 case '\\':
103 ++pSrc;
104 break;
106 /* FALLTHROUGH */
108 default:
109 *(pDest++) = ch;
113 done:
114 *ppDest = pDest; /* next spot for storing character */
115 *ppSrc = (char const *) pSrc; /* char following closing quote */
119 /*=export_func ao_string_tokenize
121 * what: tokenize an input string
123 * arg: + char const* + string + string to be tokenized +
125 * ret_type: token_list_t*
126 * ret_desc: pointer to a structure that lists each token
128 * doc:
130 * This function will convert one input string into a list of strings.
131 * The list of strings is derived by separating the input based on
132 * white space separation. However, if the input contains either single
133 * or double quote characters, then the text after that character up to
134 * a matching quote will become the string in the list.
136 * The returned pointer should be deallocated with @code{free(3C)} when
137 * are done using the data. The data are placed in a single block of
138 * allocated memory. Do not deallocate individual token/strings.
140 * The structure pointed to will contain at least these two fields:
141 * @table @samp
142 * @item tkn_ct
143 * The number of tokens found in the input string.
144 * @item tok_list
145 * An array of @code{tkn_ct + 1} pointers to substring tokens, with
146 * the last pointer set to NULL.
147 * @end table
149 * There are two types of quoted strings: single quoted (@code{'}) and
150 * double quoted (@code{"}). Singly quoted strings are fairly raw in that
151 * escape characters (@code{\\}) are simply another character, except when
152 * preceding the following characters:
153 * @example
154 * @code{\\} double backslashes reduce to one
155 * @code{'} incorporates the single quote into the string
156 * @code{\n} suppresses both the backslash and newline character
157 * @end example
159 * Double quote strings are formed according to the rules of string
160 * constants in ANSI-C programs.
162 * example:
163 * @example
164 * #include <stdlib.h>
165 * int ix;
166 * token_list_t* ptl = ao_string_tokenize( some_string )
167 * for (ix = 0; ix < ptl->tkn_ct; ix++)
168 * do_something_with_tkn( ptl->tkn_list[ix] );
169 * free( ptl );
170 * @end example
171 * Note that everything is freed with the one call to @code{free(3C)}.
173 * err:
174 * NULL is returned and @code{errno} will be set to indicate the problem:
175 * @itemize @bullet
176 * @item
177 * @code{EINVAL} - There was an unterminated quoted string.
178 * @item
179 * @code{ENOENT} - The input string was empty.
180 * @item
181 * @code{ENOMEM} - There is not enough memory.
182 * @end itemize
184 token_list_t*
185 ao_string_tokenize( char const* str )
187 int max_token_ct = 1; /* allow for trailing NUL on string */
188 token_list_t* res;
190 if (str == NULL) goto bogus_str;
193 * Trim leading white space. Use "ENOENT" and a NULL return to indicate
194 * an empty string was passed.
196 while (IS_WHITESPACE_CHAR(*str)) str++;
197 if (*str == NUL) {
198 bogus_str:
199 errno = ENOENT;
200 return NULL;
204 * Take an approximate count of tokens. If no quoted strings are used,
205 * it will be accurate. If quoted strings are used, it will be a little
206 * high and we'll squander the space for a few extra pointers.
209 cc_t* pz = (cc_t*)str;
211 do {
212 max_token_ct++;
213 while (! IS_WHITESPACE_CHAR(*++pz))
214 if (*pz == NUL) goto found_nul;
215 while (IS_WHITESPACE_CHAR(*pz)) pz++;
216 } while (*pz != NUL);
218 found_nul:
222 res = malloc( sizeof(*res) + strlen(str) + (max_token_ct * sizeof(ch_t*)) );
223 if (res == NULL) {
224 errno = ENOMEM;
225 return res;
229 * Now copy each token into the output buffer.
232 ch_t* pzDest = (ch_t*)(res->tkn_list + (max_token_ct + 1));
233 res->tkn_ct = 0;
235 do {
236 res->tkn_list[ res->tkn_ct++ ] = pzDest;
237 for (;;) {
238 int ch = (ch_t)*str;
239 if (IS_WHITESPACE_CHAR(ch)) {
240 found_white_space:
241 while (IS_WHITESPACE_CHAR(*++str)) ;
242 break;
245 switch (ch) {
246 case '"':
247 copy_cooked( &pzDest, &str );
248 if (str == NULL) {
249 free(res);
250 errno = EINVAL;
251 return NULL;
253 if (IS_WHITESPACE_CHAR(*str))
254 goto found_white_space;
255 break;
257 case '\'':
258 copy_raw( &pzDest, &str );
259 if (str == NULL) {
260 free(res);
261 errno = EINVAL;
262 return NULL;
264 if (IS_WHITESPACE_CHAR(*str))
265 goto found_white_space;
266 break;
268 case NUL:
269 goto copy_done;
271 default:
272 str++;
273 *(pzDest++) = ch;
275 } copy_done:;
278 * NUL terminate the last token and see if we have any more tokens.
280 *(pzDest++) = NUL;
281 } while (*str != NUL);
283 res->tkn_list[ res->tkn_ct ] = NULL;
286 return res;
289 #ifdef TEST
290 #include <stdio.h>
291 #include <string.h>
294 main( int argc, char** argv )
296 if (argc == 1) {
297 printf("USAGE: %s arg [ ... ]\n", *argv);
298 return 1;
300 while (--argc > 0) {
301 char* arg = *(++argv);
302 token_list_t* p = ao_string_tokenize( arg );
303 if (p == NULL) {
304 printf( "Parsing string ``%s'' failed:\n\terrno %d (%s)\n",
305 arg, errno, strerror( errno ));
306 } else {
307 int ix = 0;
308 printf( "Parsed string ``%s''\ninto %d tokens:\n", arg, p->tkn_ct );
309 do {
310 printf( " %3d: ``%s''\n", ix+1, p->tkn_list[ix] );
311 } while (++ix < p->tkn_ct);
312 free(p);
315 return 0;
317 #endif
320 * Local Variables:
321 * mode: C
322 * c-file-style: "stroustrup"
323 * indent-tabs-mode: nil
324 * End:
325 * end of autoopts/tokenize.c */