external/bsd/ntp/dist/sntp/libopts/tokenize.c

   1 /*      $NetBSD$        */
   2
   3 /*
   4  *  This file defines the string_tokenize interface
   5  * Time-stamp:      "2007-11-12 20:40:36 bkorb"
   6  *
   7  *  This file is part of AutoOpts, a companion to AutoGen.
   8  *  AutoOpts is free software.
   9  *  AutoOpts is copyright (c) 1992-2009 by Bruce Korb - all rights reserved
  10  *
  11  *  AutoOpts is available under any one of two licenses.  The license
  12  *  in use must be one of these two and the choice is under the control
  13  *  of the user of the license.
  14  *
  15  *   The GNU Lesser General Public License, version 3 or later
  16  *      See the files "COPYING.lgplv3" and "COPYING.gplv3"
  17  *
  18  *   The Modified Berkeley Software Distribution License
  19  *      See the file "COPYING.mbsd"
  20  *
  21  *  These files have the following md5sums:
  22  *
  23  *  43b91e8ca915626ed3818ffb1b71248b pkg/libopts/COPYING.gplv3
  24  *  06a1a2e4760c90ea5e1dad8dfaac4d39 pkg/libopts/COPYING.lgplv3
  25  *  66a5cedaf62c4b2637025f049f9b826f pkg/libopts/COPYING.mbsd
  26  */
  27
  28 #include <errno.h>
  29 #include <stdlib.h>
  30
  31 #define cc_t   const unsigned char
  32 #define ch_t   unsigned char
  33
  34 /* = = = START-STATIC-FORWARD = = = */
  35 /* static forward declarations maintained by mk-fwd */
  36 static void
  37 copy_cooked( ch_t** ppDest, char const ** ppSrc );
  38
  39 static void
  40 copy_raw( ch_t** ppDest, char const ** ppSrc );
  41 /* = = = END-STATIC-FORWARD = = = */
  42
  43 static void
  44 copy_cooked( ch_t** ppDest, char const ** ppSrc )
  45 {
  46     ch_t* pDest = (ch_t*)*ppDest;
  47     const ch_t* pSrc  = (const ch_t*)(*ppSrc + 1);
  48
  49     for (;;) {
  50         ch_t ch = *(pSrc++);
  51         switch (ch) {
  52         case NUL:   *ppSrc = NULL; return;
  53         case '"':   goto done;
  54         case '\\':
  55             pSrc += ao_string_cook_escape_char( (char*)pSrc, (char*)&ch, 0x7F );
  56             if (ch == 0x7F)
  57                 break;
  58             /* FALLTHROUGH */
  59
  60         default:
  61             *(pDest++) = ch;
  62         }
  63     }
  64
  65  done:
  66     *ppDest = (ch_t*)pDest; /* next spot for storing character */
  67     *ppSrc  = (char const *)pSrc;  /* char following closing quote    */
  68 }
  69
  70
  71 static void
  72 copy_raw( ch_t** ppDest, char const ** ppSrc )
  73 {
  74     ch_t* pDest = *ppDest;
  75     cc_t* pSrc  = (cc_t*) (*ppSrc + 1);
  76
  77     for (;;) {
  78         ch_t ch = *(pSrc++);
  79         switch (ch) {
  80         case NUL:   *ppSrc = NULL; return;
  81         case '\'':  goto done;
  82         case '\\':
  83             /*
  84              *  *Four* escapes are handled:  newline removal, escape char
  85              *  quoting and apostrophe quoting
  86              */
  87             switch (*pSrc) {
  88             case NUL:   *ppSrc = NULL; return;
  89             case '\r':
  90                 if (*(++pSrc) == '\n')
  91                     ++pSrc;
  92                 continue;
  93
  94             case '\n':
  95                 ++pSrc;
  96                 continue;
  97
  98             case '\'':
  99                 ch = '\'';
 100                 /* FALLTHROUGH */
 101
 102             case '\\':
 103                 ++pSrc;
 104                 break;
 105             }
 106             /* FALLTHROUGH */
 107
 108         default:
 109             *(pDest++) = ch;
 110         }
 111     }
 112
 113  done:
 114     *ppDest = pDest; /* next spot for storing character */
 115     *ppSrc  = (char const *) pSrc;  /* char following closing quote    */
 116 }
 117
 118
 119 /*=export_func ao_string_tokenize
 120  *
 121  * what: tokenize an input string
 122  *
 123  * arg:  + char const* + string + string to be tokenized +
 124  *
 125  * ret_type:  token_list_t*
 126  * ret_desc:  pointer to a structure that lists each token
 127  *
 128  * doc:
 129  *
 130  * This function will convert one input string into a list of strings.
 131  * The list of strings is derived by separating the input based on
 132  * white space separation.  However, if the input contains either single
 133  * or double quote characters, then the text after that character up to
 134  * a matching quote will become the string in the list.
 135  *
 136  *  The returned pointer should be deallocated with @code{free(3C)} when
 137  *  are done using the data.  The data are placed in a single block of
 138  *  allocated memory.  Do not deallocate individual token/strings.
 139  *
 140  *  The structure pointed to will contain at least these two fields:
 141  *  @table @samp
 142  *  @item tkn_ct
 143  *  The number of tokens found in the input string.
 144  *  @item tok_list
 145  *  An array of @code{tkn_ct + 1} pointers to substring tokens, with
 146  *  the last pointer set to NULL.
 147  *  @end table
 148  *
 149  * There are two types of quoted strings: single quoted (@code{'}) and
 150  * double quoted (@code{"}).  Singly quoted strings are fairly raw in that
 151  * escape characters (@code{\\}) are simply another character, except when
 152  * preceding the following characters:
 153  * @example
 154  * @code{\\}  double backslashes reduce to one
 155  * @code{'}   incorporates the single quote into the string
 156  * @code{\n}  suppresses both the backslash and newline character
 157  * @end example
 158  *
 159  * Double quote strings are formed according to the rules of string
 160  * constants in ANSI-C programs.
 161  *
 162  * example:
 163  * @example
 164  *    #include <stdlib.h>
 165  *    int ix;
 166  *    token_list_t* ptl = ao_string_tokenize( some_string )
 167  *    for (ix = 0; ix < ptl->tkn_ct; ix++)
 168  *       do_something_with_tkn( ptl->tkn_list[ix] );
 169  *    free( ptl );
 170  * @end example
 171  * Note that everything is freed with the one call to @code{free(3C)}.
 172  *
 173  * err:
 174  *  NULL is returned and @code{errno} will be set to indicate the problem:
 175  *  @itemize @bullet
 176  *  @item
 177  *  @code{EINVAL} - There was an unterminated quoted string.
 178  *  @item
 179  *  @code{ENOENT} - The input string was empty.
 180  *  @item
 181  *  @code{ENOMEM} - There is not enough memory.
 182  *  @end itemize
 183 =*/
 184 token_list_t*
 185 ao_string_tokenize( char const* str )
 186 {
 187     int max_token_ct = 1; /* allow for trailing NUL on string */
 188     token_list_t* res;
 189
 190     if (str == NULL)  goto bogus_str;
 191
 192     /*
 193      *  Trim leading white space.  Use "ENOENT" and a NULL return to indicate
 194      *  an empty string was passed.
 195      */
 196     while (IS_WHITESPACE_CHAR(*str))  str++;
 197     if (*str == NUL) {
 198     bogus_str:
 199         errno = ENOENT;
 200         return NULL;
 201     }
 202
 203     /*
 204      *  Take an approximate count of tokens.  If no quoted strings are used,
 205      *  it will be accurate.  If quoted strings are used, it will be a little
 206      *  high and we'll squander the space for a few extra pointers.
 207      */
 208     {
 209         cc_t* pz = (cc_t*)str;
 210
 211         do {
 212             max_token_ct++;
 213             while (! IS_WHITESPACE_CHAR(*++pz))
 214                 if (*pz == NUL) goto found_nul;
 215             while (IS_WHITESPACE_CHAR(*pz))  pz++;
 216         } while (*pz != NUL);
 217
 218     found_nul:
 219         ;
 220     }
 221
 222     res = malloc( sizeof(*res) + strlen(str) + (max_token_ct * sizeof(ch_t*)) );
 223     if (res == NULL) {
 224         errno = ENOMEM;
 225         return res;
 226     }
 227
 228     /*
 229      *  Now copy each token into the output buffer.
 230      */
 231     {
 232         ch_t* pzDest = (ch_t*)(res->tkn_list + (max_token_ct + 1));
 233         res->tkn_ct  = 0;
 234
 235         do  {
 236             res->tkn_list[ res->tkn_ct++ ] = pzDest;
 237             for (;;) {
 238                 int ch = (ch_t)*str;
 239                 if (IS_WHITESPACE_CHAR(ch)) {
 240                 found_white_space:
 241                     while (IS_WHITESPACE_CHAR(*++str))  ;
 242                     break;
 243                 }
 244
 245                 switch (ch) {
 246                 case '"':
 247                     copy_cooked( &pzDest, &str );
 248                     if (str == NULL) {
 249                         free(res);
 250                         errno = EINVAL;
 251                         return NULL;
 252                     }
 253                     if (IS_WHITESPACE_CHAR(*str))
 254                         goto found_white_space;
 255                     break;
 256
 257                 case '\'':
 258                     copy_raw( &pzDest, &str );
 259                     if (str == NULL) {
 260                         free(res);
 261                         errno = EINVAL;
 262                         return NULL;
 263                     }
 264                     if (IS_WHITESPACE_CHAR(*str))
 265                         goto found_white_space;
 266                     break;
 267
 268                 case NUL:
 269                     goto copy_done;
 270
 271                 default:
 272                     str++;
 273                     *(pzDest++) = ch;
 274                 }
 275             } copy_done:;
 276
 277             /*
 278              * NUL terminate the last token and see if we have any more tokens.
 279              */
 280             *(pzDest++) = NUL;
 281         } while (*str != NUL);
 282
 283         res->tkn_list[ res->tkn_ct ] = NULL;
 284     }
 285
 286     return res;
 287 }
 288
 289 #ifdef TEST
 290 #include <stdio.h>
 291 #include <string.h>
 292
 293 int
 294 main( int argc, char** argv )
 295 {
 296     if (argc == 1) {
 297         printf("USAGE:  %s arg [ ... ]\n", *argv);
 298         return 1;
 299     }
 300     while (--argc > 0) {
 301         char* arg = *(++argv);
 302         token_list_t* p = ao_string_tokenize( arg );
 303         if (p == NULL) {
 304             printf( "Parsing string ``%s'' failed:\n\terrno %d (%s)\n",
 305                     arg, errno, strerror( errno ));
 306         } else {
 307             int ix = 0;
 308             printf( "Parsed string ``%s''\ninto %d tokens:\n", arg, p->tkn_ct );
 309             do {
 310                 printf( " %3d:  ``%s''\n", ix+1, p->tkn_list[ix] );
 311             } while (++ix < p->tkn_ct);
 312             free(p);
 313         }
 314     }
 315     return 0;
 316 }
 317 #endif
 318
 319 /*
 320  * Local Variables:
 321  * mode: C
 322  * c-file-style: "stroustrup"
 323  * indent-tabs-mode: nil
 324  * End:
 325  * end of autoopts/tokenize.c */