Fix up mix of man(7)/mdoc(7).
[netbsd-mini2440.git] / dist / ntp / libopts / tokenize.c
blob8446955f926bae7d4170f694878267387883e4d5
1 /* $NetBSD$ */
3 /*
4 * This file defines the string_tokenize interface
5 * Time-stamp: "2006-06-24 15:27:49 bkorb"
7 * string_tokenize copyright 2005 Bruce Korb
9 * string_tokenize is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * string_tokenize is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with string_tokenize; if not, write to:
21 * The Free Software Foundation, Inc.,
22 * 51 Franklin Street, Fifth Floor,
23 * Boston, MA 02110-1301, USA.
25 #include <ctype.h>
26 #include <errno.h>
27 #include <stdlib.h>
29 #define cc_t const unsigned char
30 #define ch_t unsigned char
32 /* = = = START-STATIC-FORWARD = = = */
33 /* static forward declarations maintained by :mkfwd */
34 static void
35 copy_cooked( ch_t** ppDest, char const ** ppSrc );
37 static void
38 copy_raw( ch_t** ppDest, char const ** ppSrc );
39 /* = = = END-STATIC-FORWARD = = = */
41 static void
42 copy_cooked( ch_t** ppDest, char const ** ppSrc )
44 ch_t* pDest = (ch_t*)*ppDest;
45 const ch_t* pSrc = (const ch_t*)(*ppSrc + 1);
47 for (;;) {
48 ch_t ch = *(pSrc++);
49 switch (ch) {
50 case NUL: *ppSrc = NULL; return;
51 case '"': goto done;
52 case '\\':
53 pSrc += ao_string_cook_escape_char( (char*)pSrc, (char*)&ch, 0x7F );
54 if (ch == 0x7F)
55 break;
56 /* FALLTHROUGH */
58 default:
59 *(pDest++) = ch;
63 done:
64 *ppDest = (ch_t*)pDest; /* next spot for storing character */
65 *ppSrc = (char const *)pSrc; /* char following closing quote */
69 static void
70 copy_raw( ch_t** ppDest, char const ** ppSrc )
72 ch_t* pDest = *ppDest;
73 cc_t* pSrc = (cc_t*) (*ppSrc + 1);
75 for (;;) {
76 ch_t ch = *(pSrc++);
77 switch (ch) {
78 case NUL: *ppSrc = NULL; return;
79 case '\'': goto done;
80 case '\\':
82 * *Four* escapes are handled: newline removal, escape char
83 * quoting and apostrophe quoting
85 switch (*pSrc) {
86 case NUL: *ppSrc = NULL; return;
87 case '\r':
88 if (*(++pSrc) == '\n')
89 ++pSrc;
90 continue;
92 case '\n':
93 ++pSrc;
94 continue;
96 case '\'':
97 ch = '\'';
98 /* FALLTHROUGH */
100 case '\\':
101 ++pSrc;
102 break;
104 /* FALLTHROUGH */
106 default:
107 *(pDest++) = ch;
111 done:
112 *ppDest = pDest; /* next spot for storing character */
113 *ppSrc = (char const *) pSrc; /* char following closing quote */
117 /*=export_func ao_string_tokenize
119 * what: tokenize an input string
121 * arg: + char const* + string + string to be tokenized +
123 * ret_type: token_list_t*
124 * ret_desc: pointer to a structure that lists each token
126 * doc:
128 * This function will convert one input string into a list of strings.
129 * The list of strings is derived by separating the input based on
130 * white space separation. However, if the input contains either single
131 * or double quote characters, then the text after that character up to
132 * a matching quote will become the string in the list.
134 * The returned pointer should be deallocated with @code{free(3C)} when
135 * are done using the data. The data are placed in a single block of
136 * allocated memory. Do not deallocate individual token/strings.
138 * The structure pointed to will contain at least these two fields:
139 * @table @samp
140 * @item tkn_ct
141 * The number of tokens found in the input string.
142 * @item tok_list
143 * An array of @code{tkn_ct + 1} pointers to substring tokens, with
144 * the last pointer set to NULL.
145 * @end table
147 * There are two types of quoted strings: single quoted (@code{'}) and
148 * double quoted (@code{"}). Singly quoted strings are fairly raw in that
149 * escape characters (@code{\\}) are simply another character, except when
150 * preceding the following characters:
151 * @example
152 * @code{\\} double backslashes reduce to one
153 * @code{'} incorporates the single quote into the string
154 * @code{\n} suppresses both the backslash and newline character
155 * @end example
157 * Double quote strings are formed according to the rules of string
158 * constants in ANSI-C programs.
160 * example:
161 * @example
162 * #include <stdlib.h>
163 * int ix;
164 * token_list_t* ptl = ao_string_tokenize( some_string )
165 * for (ix = 0; ix < ptl->tkn_ct; ix++)
166 * do_something_with_tkn( ptl->tkn_list[ix] );
167 * free( ptl );
168 * @end example
169 * Note that everything is freed with the one call to @code{free(3C)}.
171 * err:
172 * NULL is returned and @code{errno} will be set to indicate the problem:
173 * @itemize @bullet
174 * @item
175 * @code{EINVAL} - There was an unterminated quoted string.
176 * @item
177 * @code{ENOENT} - The input string was empty.
178 * @item
179 * @code{ENOMEM} - There is not enough memory.
180 * @end itemize
182 token_list_t*
183 ao_string_tokenize( char const* str )
185 int max_token_ct = 1; /* allow for trailing NUL on string */
186 token_list_t* res;
188 if (str == NULL) goto bogus_str;
191 * Trim leading white space. Use "ENOENT" and a NULL return to indicate
192 * an empty string was passed.
194 while (isspace( (ch_t)*str )) str++;
195 if (*str == NUL) {
196 bogus_str:
197 errno = ENOENT;
198 return NULL;
202 * Take an approximate count of tokens. If no quoted strings are used,
203 * it will be accurate. If quoted strings are used, it will be a little
204 * high and we'll squander the space for a few extra pointers.
207 cc_t* pz = (cc_t*)str;
209 do {
210 max_token_ct++;
211 while (! isspace( *++pz ))
212 if (*pz == NUL) goto found_nul;
213 while (isspace( *pz )) pz++;
214 } while (*pz != NUL);
216 found_nul:
220 res = malloc( sizeof(*res) + strlen(str) + (max_token_ct * sizeof(ch_t*)) );
221 if (res == NULL) {
222 errno = ENOMEM;
223 return res;
227 * Now copy each token into the output buffer.
230 ch_t* pzDest = (ch_t*)(res->tkn_list + (max_token_ct + 1));
231 res->tkn_ct = 0;
233 do {
234 res->tkn_list[ res->tkn_ct++ ] = pzDest;
235 for (;;) {
236 int ch = (ch_t)*str;
237 if (isspace( ch )) {
238 found_white_space:
239 while (isspace( (ch_t)*++str )) ;
240 break;
243 switch (ch) {
244 case '"':
245 copy_cooked( &pzDest, &str );
246 if (str == NULL) {
247 free(res);
248 errno = EINVAL;
249 return NULL;
251 if (isspace( (ch_t)*str ))
252 goto found_white_space;
253 break;
255 case '\'':
256 copy_raw( &pzDest, &str );
257 if (str == NULL) {
258 free(res);
259 errno = EINVAL;
260 return NULL;
262 if (isspace( (ch_t)*str ))
263 goto found_white_space;
264 break;
266 case NUL:
267 goto copy_done;
269 default:
270 str++;
271 *(pzDest++) = ch;
273 } copy_done:;
276 * NUL terminate the last token and see if we have any more tokens.
278 *(pzDest++) = NUL;
279 } while (*str != NUL);
281 res->tkn_list[ res->tkn_ct ] = NULL;
284 return res;
287 #ifdef TEST
288 #include <stdio.h>
289 #include <string.h>
292 main( int argc, char** argv )
294 if (argc == 1) {
295 printf("USAGE: %s arg [ ... ]\n", *argv);
296 return 1;
298 while (--argc > 0) {
299 char* arg = *(++argv);
300 token_list_t* p = ao_string_tokenize( arg );
301 if (p == NULL) {
302 printf( "Parsing string ``%s'' failed:\n\terrno %d (%s)\n",
303 arg, errno, strerror( errno ));
304 } else {
305 int ix = 0;
306 printf( "Parsed string ``%s''\ninto %d tokens:\n", arg, p->tkn_ct );
307 do {
308 printf( " %3d: ``%s''\n", ix+1, p->tkn_list[ix] );
309 } while (++ix < p->tkn_ct);
310 free(p);
313 return 0;
315 #endif
318 * Local Variables:
319 * mode: C
320 * c-file-style: "stroustrup"
321 * indent-tabs-mode: nil
322 * End:
323 * end of autoopts/tokenize.c */