2 * token.c -- tokenize strings, a la strtok(3)
4 * Copyright (C) 2007 Oracle. All rights reserved.
5 * Copyright (C) 2007 Chuck Lever <chuck.lever@oracle.com>
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
17 * You should have received a copy of the GNU General Public
18 * License along with this program; if not, write to the
19 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 * Boston, MA 021110-1307, USA.
25 * We've constructed a simple string tokenizer that is better than
26 * strtok(3) in several ways:
28 * 1. It doesn't interfere with ongoing tokenizations using strtok(3).
29 * 2. It's re-entrant so we can nest tokenizations, if needed.
30 * 3. It can handle double-quoted delimiters (needed for 'context="sd,fslj"').
31 * 4. It doesn't alter the string we're tokenizing, so it can work
32 * on write-protected strings as well as writable strings.
46 struct tokenizer_state
{
52 static void find_next_nondelimiter(struct tokenizer_state
*tstate
)
54 while (*tstate
->pos
!= '\0' && *tstate
->pos
== tstate
->delimiter
)
58 static size_t find_next_delimiter(struct tokenizer_state
*tstate
)
63 while (*tstate
->pos
!= '\0') {
64 if (*tstate
->pos
== '"')
67 if (!quote_seen
&& *tstate
->pos
== tstate
->delimiter
)
74 /* did the string terminate before the close quote? */
76 tstate
->error
= EINVAL
;
84 * next_token - find the next token in a string and return it
85 * @tstate: pointer to tokenizer context object
87 * Returns the next token found in the current string.
88 * Returns NULL if there are no more tokens in the string,
89 * or if an error occurs.
91 * Side effect: tstate is updated
93 char *next_token(struct tokenizer_state
*tstate
)
98 if (!tstate
|| !tstate
->pos
|| tstate
->error
)
101 find_next_nondelimiter(tstate
);
102 if (*tstate
->pos
== '\0')
106 len
= find_next_delimiter(tstate
);
108 token
= strndup(token
, len
);
111 tstate
->error
= ENOMEM
;
116 return NULL
; /* no tokens found in this string */
120 * init_tokenizer - return an initialized tokenizer context object
121 * @string: pointer to C string
122 * @delimiter: single character that delimits tokens in @string
124 * Returns an initialized tokenizer context object
126 struct tokenizer_state
*init_tokenizer(char *string
, char delimiter
)
128 struct tokenizer_state
*tstate
;
130 tstate
= malloc(sizeof(*tstate
));
132 tstate
->pos
= string
;
133 tstate
->delimiter
= delimiter
;
140 * tokenizer_error - digs error value out of tokenizer context
141 * @tstate: pointer to tokenizer context object
144 int tokenizer_error(struct tokenizer_state
*tstate
)
146 return tstate
? tstate
->error
: 0;
150 * end_tokenizer - free a tokenizer context object
151 * @tstate: pointer to tokenizer context object
154 void end_tokenizer(struct tokenizer_state
*tstate
)