1 /* $NetBSD: str.c,v 1.29 2013/08/11 01:54:35 dholland Exp $ */
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/cdefs.h>
35 static char sccsid
[] = "@(#)str.c 8.2 (Berkeley) 4/28/95";
37 __RCSID("$NetBSD: str.c,v 1.29 2013/08/11 01:54:35 dholland Exp $");
40 #include <sys/types.h>
54 enum { STRING1
, STRING2
} which
;
55 enum { EOS
, INFINITE
, NORMAL
, RANGE
, SEQUENCE
, SET
} state
;
56 int cnt
; /* character count */
57 int lastch
; /* last character */
58 int equiv
[2]; /* equivalence set */
59 int *set
; /* set of characters */
60 const char *str
; /* user's string */
63 static int backslash(STR
*);
64 static int bracket(STR
*);
65 static int c_class(const void *, const void *);
66 static int *genclass(const char *, size_t);
67 static void genequiv(STR
*);
68 static int genrange(STR
*);
69 static void genseq(STR
*);
72 str_create(int whichstring
, const char *txt
)
76 s
= malloc(sizeof(*s
));
78 err(1, "Out of memory");
81 s
->which
= whichstring
== 2 ? STRING2
: STRING1
;
96 if (s
->set
!= NULL
&& s
->set
!= s
->equiv
) {
103 next(STR
*s
, int *ret
)
115 ch
= (unsigned char)s
->str
[0];
122 s
->lastch
= backslash(s
);
135 /* We can start a range at any time. */
136 if (s
->str
[0] == '-' && genrange(s
)) {
159 s
->lastch
= s
->set
[s
->cnt
++];
160 if (s
->lastch
== OOBCH
) {
162 if (s
->set
!= s
->equiv
) {
184 case ':': /* "[:class:]" */
185 if ((p
= strstr(s
->str
+ 2, ":]")) == NULL
)
188 q
= genclass(s
->str
, p
- s
->str
);
194 case '=': /* "[=equiv=]" */
195 if ((p
= strstr(s
->str
+ 2, "=]")) == NULL
)
201 default: /* "[\###*n]" or "[#*n]" */
202 if ((p
= strpbrk(s
->str
+ 2, "*]")) == NULL
)
204 if (p
[0] != '*' || strchr(p
, ']') == NULL
)
218 static const CLASS classes
[] = {
219 { "alnum", isalnum
},
220 { "alpha", isalpha
},
221 { "blank", isblank
},
222 { "cntrl", iscntrl
},
223 { "digit", isdigit
},
224 { "graph", isgraph
},
225 { "lower", islower
},
226 { "print", isprint
},
227 { "punct", ispunct
},
228 { "space", isspace
},
229 { "upper", isupper
},
230 { "xdigit", isxdigit
},
239 genclass(const char *class, size_t len
)
250 cp
= bsearch(&key
, classes
, __arraycount(classes
), sizeof(classes
[0]),
253 errx(1, "unknown class %.*s", (int)len
, class);
257 * Figure out what characters are in the class
261 p
= malloc(num
* sizeof(*p
));
267 for (ch
= 0; ch
< NCHARS
; ch
++) {
274 for (; pos
< num
; pos
++) {
282 c_class(const void *av
, const void *bv
)
284 const CLASSKEY
*a
= av
;
289 blen
= strlen(b
->name
);
290 r
= strncmp(a
->name
, b
->name
, a
->len
);
295 /* someone gave us a prefix of the right name */
298 assert(a
-> len
== blen
);
303 * English doesn't have any equivalence classes, so for now
304 * we just syntax check and grab the character.
311 ch
= (unsigned char)s
->str
[0];
313 s
->equiv
[0] = backslash(s
);
318 if (s
->str
[0] != '=') {
319 errx(1, "Misplaced equivalence equals sign");
322 if (s
->str
[0] != ']') {
323 errx(1, "Misplaced equivalence right bracket");
336 const char *savestart
;
338 savestart
= s
->str
++;
339 stopval
= s
->str
[0] == '\\' ? backslash(s
) : (unsigned char)*s
->str
++;
340 if (stopval
< (unsigned char)s
->lastch
) {
344 s
->cnt
= stopval
- s
->lastch
+ 1;
355 if (s
->which
== STRING1
) {
356 errx(1, "Sequences only valid in string2");
359 if (*s
->str
== '\\') {
360 s
->lastch
= backslash(s
);
362 s
->lastch
= (unsigned char)*s
->str
++;
364 if (*s
->str
!= '*') {
365 errx(1, "Misplaced sequence asterisk");
371 s
->cnt
= backslash(s
);
378 if (isdigit((unsigned char)s
->str
[0])) {
379 s
->cnt
= strtol(s
->str
, &ep
, 0);
385 errx(1, "illegal sequence count");
389 s
->state
= s
->cnt
? SEQUENCE
: INFINITE
;
393 * Translate \??? into a character. Up to 3 octal digits, if no digits either
394 * an escape code or a literal character.
403 /* Consume the character we're already on. */
406 /* Look at the next character. */
407 ch
= (unsigned char)s
->str
[0];
408 if (!isascii(ch
) || !isdigit(ch
)) {
411 val
= val
* 8 + ch
- '0';
413 /* Enough digits; consume this one and stop */
419 /* We saw digits, so return their value */
428 /* Consume the escaped character */
432 case 'a': /* escape characters */
448 default: /* \q -> q */