port of netbsd's tr
[minix.git] / commands / simple / str.c
blob88dcebbab71462b5931e3470540f044400174e29
1 /* $NetBSD: str.c,v 1.12 2009/04/13 23:50:49 lukem Exp $ */
3 /*-
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
32 #include <sys/types.h>
34 #include <err.h>
35 #include <errno.h>
36 #include <stddef.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <ctype.h>
42 #include "tr.h"
44 static int backslash (STR *);
45 static int bracket (STR *);
46 static int c_class (const void *, const void *);
47 static void genclass (STR *);
48 static void genequiv (STR *);
49 static int genrange (STR *);
50 static void genseq (STR *);
52 int
53 next(s)
54 STR *s;
56 int ch;
58 switch (s->state) {
59 case EOS:
60 return (0);
61 case INFINITE:
62 return (1);
63 case NORMAL:
64 switch (ch = *s->str) {
65 case '\0':
66 s->state = EOS;
67 return (0);
68 case '\\':
69 s->lastch = backslash(s);
70 break;
71 case '[':
72 if (bracket(s))
73 return (next(s));
74 /* FALLTHROUGH */
75 default:
76 ++s->str;
77 s->lastch = ch;
78 break;
81 /* We can start a range at any time. */
82 if (s->str[0] == '-' && genrange(s))
83 return (next(s));
84 return (1);
85 case RANGE:
86 if (s->cnt-- == 0) {
87 s->state = NORMAL;
88 return (next(s));
90 ++s->lastch;
91 return (1);
92 case SEQUENCE:
93 if (s->cnt-- == 0) {
94 s->state = NORMAL;
95 return (next(s));
97 return (1);
98 case SET:
99 if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
100 s->state = NORMAL;
101 return (next(s));
103 return (1);
105 /* NOTREACHED */
106 return (0);
109 static int
110 bracket(s)
111 STR *s;
113 char *p;
115 switch (s->str[1]) {
116 case ':': /* "[:class:]" */
117 if ((p = strstr((char *) s->str + 2, ":]")) == NULL)
118 return (0);
119 *p = '\0';
120 s->str += 2;
121 genclass(s);
122 s->str = (unsigned char *) p + 2;
123 return (1);
124 case '=': /* "[=equiv=]" */
125 if ((p = strstr((char *) s->str + 2, "=]")) == NULL)
126 return (0);
127 s->str += 2;
128 genequiv(s);
129 return (1);
130 default: /* "[\###*n]" or "[#*n]" */
131 if ((p = strpbrk((char *) s->str + 2, "*]")) == NULL)
132 return (0);
133 if (p[0] != '*' || strchr(p, ']') == NULL)
134 return (0);
135 s->str += 1;
136 genseq(s);
137 return (1);
139 /* NOTREACHED */
142 typedef struct {
143 const char *name;
144 int (*func) (int);
145 int *set;
146 } CLASS;
148 static CLASS classes[] = {
149 { "alnum", isalnum, NULL, },
150 { "alpha", isalpha, NULL, },
151 { "blank", isblank, NULL, },
152 { "cntrl", iscntrl, NULL, },
153 { "digit", isdigit, NULL, },
154 { "graph", isgraph, NULL, },
155 { "lower", islower, NULL, },
156 { "print", isprint, NULL, },
157 { "punct", ispunct, NULL, },
158 { "space", isspace, NULL, },
159 { "upper", isupper, NULL, },
160 { "xdigit", isxdigit, NULL, },
163 static void
164 genclass(s)
165 STR *s;
167 int cnt, (*func) (int);
168 CLASS *cp, tmp;
169 int *p;
171 tmp.name = (char *) s->str;
172 if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) /
173 sizeof(CLASS), sizeof(CLASS), c_class)) == NULL) {
174 fprintf(stderr, "tr: unknown class %s\n", s->str);
175 exit(1);
178 if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL) {
179 perror("malloc");
180 exit(1);
182 memset(p, 0, (NCHARS + 1) * sizeof(int));
183 for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
184 if ((func)(cnt))
185 *p++ = cnt;
186 *p = OOBCH;
188 s->cnt = 0;
189 s->state = SET;
190 s->set = cp->set;
193 static int
194 c_class(a, b)
195 const void *a, *b;
197 return (strcmp(((const CLASS *)a)->name, ((const CLASS *)b)->name));
201 * English doesn't have any equivalence classes, so for now
202 * we just syntax check and grab the character.
204 static void
205 genequiv(s)
206 STR *s;
208 if (*s->str == '\\') {
209 s->equiv[0] = backslash(s);
210 if (*s->str != '=') {
211 fprintf(stderr, "tr: misplaced equivalence equals sign\n");
212 exit(1);
214 } else {
215 s->equiv[0] = s->str[0];
216 if (s->str[1] != '=') {
217 fprintf(stderr, "tr: misplaced equivalence equals sign\n");
218 exit(1);
221 s->str += 2;
222 s->cnt = 0;
223 s->state = SET;
224 s->set = s->equiv;
227 static int
228 genrange(s)
229 STR *s;
231 int stopval;
232 unsigned char *savestart;
234 savestart = s->str;
235 stopval = *++s->str == '\\' ? backslash(s) : *s->str++;
236 if (stopval < (u_char)s->lastch) {
237 s->str = savestart;
238 return (0);
240 s->cnt = stopval - s->lastch + 1;
241 s->state = RANGE;
242 --s->lastch;
243 return (1);
246 static void
247 genseq(s)
248 STR *s;
250 char *ep;
252 if (s->which == STRING1) {
253 fprintf(stderr, "tr: sequences only valid in string2\n");
254 exit(1);
257 if (*s->str == '\\')
258 s->lastch = backslash(s);
259 else
260 s->lastch = *s->str++;
261 if (*s->str != '*') {
262 fprintf(stderr, "tr: misplaced sequence asterisk\n");
263 exit(1);
266 switch (*++s->str) {
267 case '\\':
268 s->cnt = backslash(s);
269 break;
270 case ']':
271 s->cnt = 0;
272 ++s->str;
273 break;
274 default:
275 if (isdigit(*s->str)) {
276 s->cnt = strtol((char *) s->str, &ep, 0);
277 if (*ep == ']') {
278 s->str = (unsigned char *) ep + 1;
279 break;
282 fprintf(stderr, "tr: illegal sequence count\n");
283 exit(1);
284 /* NOTREACHED */
287 s->state = s->cnt ? SEQUENCE : INFINITE;
291 * Translate \??? into a character. Up to 3 octal digits, if no digits either
292 * an escape code or a literal character.
294 static int
295 backslash(s)
296 STR *s;
298 int ch, cnt, val;
300 for (cnt = val = 0;;) {
301 ch = *++s->str;
302 if (!isascii(ch) || !isdigit(ch))
303 break;
304 val = val * 8 + ch - '0';
305 if (++cnt == 3) {
306 ++s->str;
307 break;
310 if (cnt)
311 return (val);
312 if (ch != '\0')
313 ++s->str;
314 switch (ch) {
315 case 'a': /* escape characters */
316 return ('\7');
317 case 'b':
318 return ('\b');
319 case 'f':
320 return ('\f');
321 case 'n':
322 return ('\n');
323 case 'r':
324 return ('\r');
325 case 't':
326 return ('\t');
327 case 'v':
328 return ('\13');
329 case '\0': /* \" -> \ */
330 s->state = EOS;
331 return ('\\');
332 default: /* \x" -> x */
333 return (ch);