etc/protocols - sync with NetBSD-8
[minix.git] / usr.bin / tr / str.c
blobee7b72c4a0ad494a87c60433ac5a6db984e8fd22
1 /* $NetBSD: str.c,v 1.29 2013/08/11 01:54:35 dholland Exp $ */
3 /*-
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
32 #include <sys/cdefs.h>
33 #ifndef lint
34 #if 0
35 static char sccsid[] = "@(#)str.c 8.2 (Berkeley) 4/28/95";
36 #endif
37 __RCSID("$NetBSD: str.c,v 1.29 2013/08/11 01:54:35 dholland Exp $");
38 #endif /* not lint */
40 #include <sys/types.h>
42 #include <err.h>
43 #include <errno.h>
44 #include <stddef.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <ctype.h>
49 #include <assert.h>
51 #include "extern.h"
53 struct str {
54 enum { STRING1, STRING2 } which;
55 enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, SET } state;
56 int cnt; /* character count */
57 int lastch; /* last character */
58 int equiv[2]; /* equivalence set */
59 int *set; /* set of characters */
60 const char *str; /* user's string */
63 static int backslash(STR *);
64 static int bracket(STR *);
65 static int c_class(const void *, const void *);
66 static int *genclass(const char *, size_t);
67 static void genequiv(STR *);
68 static int genrange(STR *);
69 static void genseq(STR *);
71 STR *
72 str_create(int whichstring, const char *txt)
74 STR *s;
76 s = malloc(sizeof(*s));
77 if (s == NULL) {
78 err(1, "Out of memory");
81 s->which = whichstring == 2 ? STRING2 : STRING1;
82 s->state = NORMAL;
83 s->cnt = 0;
84 s->lastch = OOBCH;
85 s->equiv[0] = 0;
86 s->equiv[1] = OOBCH;
87 s->set = NULL;
88 s->str = txt;
90 return s;
93 void
94 str_destroy(STR *s)
96 if (s->set != NULL && s->set != s->equiv) {
97 free(s->set);
99 free(s);
103 next(STR *s, int *ret)
105 int ch;
107 switch (s->state) {
108 case EOS:
109 *ret = s->lastch;
110 return 0;
111 case INFINITE:
112 *ret = s->lastch;
113 return 1;
114 case NORMAL:
115 ch = (unsigned char)s->str[0];
116 switch (ch) {
117 case '\0':
118 s->state = EOS;
119 *ret = s->lastch;
120 return 0;
121 case '\\':
122 s->lastch = backslash(s);
123 break;
124 case '[':
125 if (bracket(s)) {
126 return next(s, ret);
128 /* FALLTHROUGH */
129 default:
130 ++s->str;
131 s->lastch = ch;
132 break;
135 /* We can start a range at any time. */
136 if (s->str[0] == '-' && genrange(s)) {
137 return next(s, ret);
139 *ret = s->lastch;
140 return 1;
141 case RANGE:
142 if (s->cnt == 0) {
143 s->state = NORMAL;
144 return next(s, ret);
146 s->cnt--;
147 ++s->lastch;
148 *ret = s->lastch;
149 return 1;
150 case SEQUENCE:
151 if (s->cnt == 0) {
152 s->state = NORMAL;
153 return next(s, ret);
155 s->cnt--;
156 *ret = s->lastch;
157 return 1;
158 case SET:
159 s->lastch = s->set[s->cnt++];
160 if (s->lastch == OOBCH) {
161 s->state = NORMAL;
162 if (s->set != s->equiv) {
163 free(s->set);
165 s->set = NULL;
166 return next(s, ret);
168 *ret = s->lastch;
169 return 1;
171 /* NOTREACHED */
172 assert(0);
173 *ret = s->lastch;
174 return 0;
177 static int
178 bracket(STR *s)
180 const char *p;
181 int *q;
183 switch (s->str[1]) {
184 case ':': /* "[:class:]" */
185 if ((p = strstr(s->str + 2, ":]")) == NULL)
186 return 0;
187 s->str += 2;
188 q = genclass(s->str, p - s->str);
189 s->state = SET;
190 s->set = q;
191 s->cnt = 0;
192 s->str = p + 2;
193 return 1;
194 case '=': /* "[=equiv=]" */
195 if ((p = strstr(s->str + 2, "=]")) == NULL)
196 return 0;
197 s->str += 2;
198 genequiv(s);
199 s->str = p + 2;
200 return 1;
201 default: /* "[\###*n]" or "[#*n]" */
202 if ((p = strpbrk(s->str + 2, "*]")) == NULL)
203 return 0;
204 if (p[0] != '*' || strchr(p, ']') == NULL)
205 return 0;
206 s->str += 1;
207 genseq(s);
208 return 1;
210 /* NOTREACHED */
213 typedef struct {
214 const char *name;
215 int (*func)(int);
216 } CLASS;
218 static const CLASS classes[] = {
219 { "alnum", isalnum },
220 { "alpha", isalpha },
221 { "blank", isblank },
222 { "cntrl", iscntrl },
223 { "digit", isdigit },
224 { "graph", isgraph },
225 { "lower", islower },
226 { "print", isprint },
227 { "punct", ispunct },
228 { "space", isspace },
229 { "upper", isupper },
230 { "xdigit", isxdigit },
233 typedef struct {
234 const char *name;
235 size_t len;
236 } CLASSKEY;
238 static int *
239 genclass(const char *class, size_t len)
241 int ch;
242 const CLASS *cp;
243 CLASSKEY key;
244 int *p;
245 unsigned pos, num;
247 /* Find the class */
248 key.name = class;
249 key.len = len;
250 cp = bsearch(&key, classes, __arraycount(classes), sizeof(classes[0]),
251 c_class);
252 if (cp == NULL) {
253 errx(1, "unknown class %.*s", (int)len, class);
257 * Figure out what characters are in the class
260 num = NCHARS + 1;
261 p = malloc(num * sizeof(*p));
262 if (p == NULL) {
263 err(1, "malloc");
266 pos = 0;
267 for (ch = 0; ch < NCHARS; ch++) {
268 if (cp->func(ch)) {
269 p[pos++] = ch;
273 p[pos++] = OOBCH;
274 for (; pos < num; pos++) {
275 p[pos] = 0;
278 return p;
281 static int
282 c_class(const void *av, const void *bv)
284 const CLASSKEY *a = av;
285 const CLASS *b = bv;
286 size_t blen;
287 int r;
289 blen = strlen(b->name);
290 r = strncmp(a->name, b->name, a->len);
291 if (r != 0) {
292 return r;
294 if (a->len < blen) {
295 /* someone gave us a prefix of the right name */
296 return -1;
298 assert(a-> len == blen);
299 return 0;
303 * English doesn't have any equivalence classes, so for now
304 * we just syntax check and grab the character.
306 static void
307 genequiv(STR *s)
309 int ch;
311 ch = (unsigned char)s->str[0];
312 if (ch == '\\') {
313 s->equiv[0] = backslash(s);
314 } else {
315 s->equiv[0] = ch;
316 s->str++;
318 if (s->str[0] != '=') {
319 errx(1, "Misplaced equivalence equals sign");
321 s->str++;
322 if (s->str[0] != ']') {
323 errx(1, "Misplaced equivalence right bracket");
325 s->str++;
327 s->cnt = 0;
328 s->state = SET;
329 s->set = s->equiv;
332 static int
333 genrange(STR *s)
335 int stopval;
336 const char *savestart;
338 savestart = s->str++;
339 stopval = s->str[0] == '\\' ? backslash(s) : (unsigned char)*s->str++;
340 if (stopval < (unsigned char)s->lastch) {
341 s->str = savestart;
342 return 0;
344 s->cnt = stopval - s->lastch + 1;
345 s->state = RANGE;
346 --s->lastch;
347 return 1;
350 static void
351 genseq(STR *s)
353 char *ep;
355 if (s->which == STRING1) {
356 errx(1, "Sequences only valid in string2");
359 if (*s->str == '\\') {
360 s->lastch = backslash(s);
361 } else {
362 s->lastch = (unsigned char)*s->str++;
364 if (*s->str != '*') {
365 errx(1, "Misplaced sequence asterisk");
368 s->str++;
369 switch (s->str[0]) {
370 case '\\':
371 s->cnt = backslash(s);
372 break;
373 case ']':
374 s->cnt = 0;
375 ++s->str;
376 break;
377 default:
378 if (isdigit((unsigned char)s->str[0])) {
379 s->cnt = strtol(s->str, &ep, 0);
380 if (*ep == ']') {
381 s->str = ep + 1;
382 break;
385 errx(1, "illegal sequence count");
386 /* NOTREACHED */
389 s->state = s->cnt ? SEQUENCE : INFINITE;
393 * Translate \??? into a character. Up to 3 octal digits, if no digits either
394 * an escape code or a literal character.
396 static int
397 backslash(STR *s)
399 int ch, cnt, val;
401 cnt = val = 0;
402 for (;;) {
403 /* Consume the character we're already on. */
404 s->str++;
406 /* Look at the next character. */
407 ch = (unsigned char)s->str[0];
408 if (!isascii(ch) || !isdigit(ch)) {
409 break;
411 val = val * 8 + ch - '0';
412 if (++cnt == 3) {
413 /* Enough digits; consume this one and stop */
414 ++s->str;
415 break;
418 if (cnt) {
419 /* We saw digits, so return their value */
420 return val;
422 if (ch == '\0') {
423 /* \<end> -> \ */
424 s->state = EOS;
425 return '\\';
428 /* Consume the escaped character */
429 s->str++;
431 switch (ch) {
432 case 'a': /* escape characters */
433 return '\7';
434 case 'b':
435 return '\b';
436 case 'e':
437 return '\033';
438 case 'f':
439 return '\f';
440 case 'n':
441 return '\n';
442 case 'r':
443 return '\r';
444 case 't':
445 return '\t';
446 case 'v':
447 return '\13';
448 default: /* \q -> q */
449 return ch;