port of netbsd's tr
[minix.git] / commands / awk / tran.c
blob57dd281b37963d4f1233ad484000041aaf22dd4e
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
25 #define DEBUG
26 #include <stdio.h>
27 #include <math.h>
28 #include <ctype.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include "awk.h"
32 #include "ytab.h"
34 #define FULLTAB 2 /* rehash when table gets this x full */
35 #define GROWTAB 4 /* grow table by this factor */
37 Array *symtab; /* main symbol table */
39 char **FS; /* initial field sep */
40 char **RS; /* initial record sep */
41 char **OFS; /* output field sep */
42 char **ORS; /* output record sep */
43 char **OFMT; /* output format for numbers */
44 char **CONVFMT; /* format for conversions in getsval */
45 Awkfloat *NF; /* number of fields in current record */
46 Awkfloat *NR; /* number of current record */
47 Awkfloat *FNR; /* number of current record in current file */
48 char **FILENAME; /* current filename argument */
49 Awkfloat *ARGC; /* number of arguments from command line */
50 char **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */
51 Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */
52 Awkfloat *RLENGTH; /* length of same */
54 Cell *fsloc; /* FS */
55 Cell *nrloc; /* NR */
56 Cell *nfloc; /* NF */
57 Cell *fnrloc; /* FNR */
58 Array *ARGVtab; /* symbol table containing ARGV[...] */
59 Array *ENVtab; /* symbol table containing ENVIRON[...] */
60 Cell *rstartloc; /* RSTART */
61 Cell *rlengthloc; /* RLENGTH */
62 Cell *symtabloc; /* SYMTAB */
64 Cell *nullloc; /* a guaranteed empty cell */
65 Node *nullnode; /* zero&null, converted into a node for comparisons */
66 Cell *literal0;
68 extern Cell **fldtab;
70 void syminit(void) /* initialize symbol table with builtin vars */
72 literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
73 /* this is used for if(x)... tests: */
74 nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
75 nullnode = celltonode(nullloc, CCON);
77 fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
78 FS = &fsloc->sval;
79 RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
80 OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
81 ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
82 OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
83 CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
84 FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
85 nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
86 NF = &nfloc->fval;
87 nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
88 NR = &nrloc->fval;
89 fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
90 FNR = &fnrloc->fval;
91 SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
92 rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
93 RSTART = &rstartloc->fval;
94 rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
95 RLENGTH = &rlengthloc->fval;
96 symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
97 symtabloc->sval = (char *) symtab;
100 void arginit(int ac, char **av) /* set up ARGV and ARGC */
102 Cell *cp;
103 int i;
104 char temp[50];
106 ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
107 cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
108 ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */
109 cp->sval = (char *) ARGVtab;
110 for (i = 0; i < ac; i++) {
111 sprintf(temp, "%d", i);
112 if (is_number(*av))
113 setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
114 else
115 setsymtab(temp, *av, 0.0, STR, ARGVtab);
116 av++;
120 void envinit(char **envp) /* set up ENVIRON variable */
122 Cell *cp;
123 char *p;
125 cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
126 ENVtab = makesymtab(NSYMTAB);
127 cp->sval = (char *) ENVtab;
128 for ( ; *envp; envp++) {
129 if ((p = strchr(*envp, '=')) == NULL)
130 continue;
131 if( p == *envp ) /* no left hand side name in env string */
132 continue;
133 *p++ = 0; /* split into two strings at = */
134 if (is_number(p))
135 setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
136 else
137 setsymtab(*envp, p, 0.0, STR, ENVtab);
138 p[-1] = '='; /* restore in case env is passed down to a shell */
142 Array *makesymtab(int n) /* make a new symbol table */
144 Array *ap;
145 Cell **tp;
147 ap = (Array *) malloc(sizeof(Array));
148 tp = (Cell **) calloc(n, sizeof(Cell *));
149 if (ap == NULL || tp == NULL)
150 FATAL("out of space in makesymtab");
151 ap->nelem = 0;
152 ap->size = n;
153 ap->tab = tp;
154 return(ap);
157 void freesymtab(const Cell *ap) /* free a symbol table */
159 Cell *cp, *temp;
160 Array *tp;
161 int i;
163 if (!isarr(ap))
164 return;
165 tp = (Array *) ap->sval;
166 if (tp == NULL)
167 return;
168 for (i = 0; i < tp->size; i++) {
169 for (cp = tp->tab[i]; cp != NULL; cp = temp) {
170 xfree(cp->nval);
171 if (freeable(cp))
172 xfree(cp->sval);
173 temp = cp->cnext; /* avoids freeing then using */
174 free(cp);
175 tp->nelem--;
177 tp->tab[i] = 0;
179 if (tp->nelem != 0)
180 WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
181 free(tp->tab);
182 free(tp);
185 void freeelem(const Cell *ap, const char *s)
186 /* free elem s from ap (i.e., ap["s"] */
188 Array *tp;
189 Cell *p, *prev = NULL;
190 int h;
192 tp = (Array *) ap->sval;
193 h = hash(s, tp->size);
194 for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
195 if (strcmp(s, p->nval) == 0) {
196 if (prev == NULL) /* 1st one */
197 tp->tab[h] = p->cnext;
198 else /* middle somewhere */
199 prev->cnext = p->cnext;
200 if (freeable(p))
201 xfree(p->sval);
202 free(p->nval);
203 free(p);
204 tp->nelem--;
205 return;
209 Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp)
211 int h;
212 Cell *p;
214 if (n != NULL && (p = lookup(n, tp)) != NULL) {
215 dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
216 p, NN(p->nval), NN(p->sval), p->fval, p->tval) );
217 return(p);
219 p = (Cell *) malloc(sizeof(Cell));
220 if (p == NULL)
221 FATAL("out of space for symbol table at %s", n);
222 p->nval = tostring(n);
223 p->sval = s ? tostring(s) : tostring("");
224 p->fval = f;
225 p->tval = t;
226 p->csub = CUNK;
227 p->ctype = OCELL;
228 tp->nelem++;
229 if (tp->nelem > FULLTAB * tp->size)
230 rehash(tp);
231 h = hash(n, tp->size);
232 p->cnext = tp->tab[h];
233 tp->tab[h] = p;
234 dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
235 p, p->nval, p->sval, p->fval, p->tval) );
236 return(p);
239 int hash(const char *s, int n) /* form hash value for string s */
241 unsigned hashval;
243 for (hashval = 0; *s != '\0'; s++)
244 hashval = (*s + 31 * hashval);
245 return hashval % n;
248 void rehash(Array *tp) /* rehash items in small table into big one */
250 int i, nh, nsz;
251 Cell *cp, *op, **np;
253 nsz = GROWTAB * tp->size;
254 np = (Cell **) calloc(nsz, sizeof(Cell *));
255 if (np == NULL) /* can't do it, but can keep running. */
256 return; /* someone else will run out later. */
257 for (i = 0; i < tp->size; i++) {
258 for (cp = tp->tab[i]; cp; cp = op) {
259 op = cp->cnext;
260 nh = hash(cp->nval, nsz);
261 cp->cnext = np[nh];
262 np[nh] = cp;
265 free(tp->tab);
266 tp->tab = np;
267 tp->size = nsz;
270 Cell *lookup(const char *s, const Array *tp) /* look for s in tp */
272 Cell *p;
273 int h;
275 h = hash(s, tp->size);
276 for (p = tp->tab[h]; p != NULL; p = p->cnext)
277 if (strcmp(s, p->nval) == 0)
278 return(p); /* found it */
279 return(NULL); /* not found */
282 Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
284 int fldno;
286 if ((vp->tval & (NUM | STR)) == 0)
287 funnyvar(vp, "assign to");
288 if (isfld(vp)) {
289 donerec = 0; /* mark $0 invalid */
290 fldno = atoi(vp->nval);
291 if (fldno > *NF)
292 newfld(fldno);
293 dprintf( ("setting field %d to %g\n", fldno, f) );
294 } else if (isrec(vp)) {
295 donefld = 0; /* mark $1... invalid */
296 donerec = 1;
298 if (freeable(vp))
299 xfree(vp->sval); /* free any previous string */
300 vp->tval &= ~STR; /* mark string invalid */
301 vp->tval |= NUM; /* mark number ok */
302 dprintf( ("setfval %p: %s = %g, t=%o\n", vp, NN(vp->nval), f, vp->tval) );
303 return vp->fval = f;
306 void funnyvar(Cell *vp, const char *rw)
308 if (isarr(vp))
309 FATAL("can't %s %s; it's an array name.", rw, vp->nval);
310 if (vp->tval & FCN)
311 FATAL("can't %s %s; it's a function.", rw, vp->nval);
312 WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
313 vp, vp->nval, vp->sval, vp->fval, vp->tval);
316 char *setsval(Cell *vp, const char *s) /* set string val of a Cell */
318 char *t;
319 int fldno;
321 dprintf( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
322 vp, NN(vp->nval), s, vp->tval, donerec, donefld) );
323 if ((vp->tval & (NUM | STR)) == 0)
324 funnyvar(vp, "assign to");
325 if (isfld(vp)) {
326 donerec = 0; /* mark $0 invalid */
327 fldno = atoi(vp->nval);
328 if (fldno > *NF)
329 newfld(fldno);
330 dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
331 } else if (isrec(vp)) {
332 donefld = 0; /* mark $1... invalid */
333 donerec = 1;
335 t = tostring(s); /* in case it's self-assign */
336 if (freeable(vp))
337 xfree(vp->sval);
338 vp->tval &= ~NUM;
339 vp->tval |= STR;
340 vp->tval &= ~DONTFREE;
341 dprintf( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
342 vp, NN(vp->nval), t,t, vp->tval, donerec, donefld) );
343 return(vp->sval = t);
346 Awkfloat getfval(Cell *vp) /* get float val of a Cell */
348 if ((vp->tval & (NUM | STR)) == 0)
349 funnyvar(vp, "read value of");
350 if (isfld(vp) && donefld == 0)
351 fldbld();
352 else if (isrec(vp) && donerec == 0)
353 recbld();
354 if (!isnum(vp)) { /* not a number */
355 vp->fval = atof(vp->sval); /* best guess */
356 if (is_number(vp->sval) && !(vp->tval&CON))
357 vp->tval |= NUM; /* make NUM only sparingly */
359 dprintf( ("getfval %p: %s = %g, t=%o\n", vp, NN(vp->nval), vp->fval, vp->tval) );
360 return(vp->fval);
363 static char *get_str_val(Cell *vp, char **fmt)
364 /* get string val of a Cell */
366 char s[100]; /* BUG: unchecked */
367 double dtemp;
369 if ((vp->tval & (NUM | STR)) == 0)
370 funnyvar(vp, "read value of");
371 if (isfld(vp) && donefld == 0)
372 fldbld();
373 else if (isrec(vp) && donerec == 0)
374 recbld();
375 if (isstr(vp) == 0) {
376 if (freeable(vp))
377 xfree(vp->sval);
378 if (modf(vp->fval, &dtemp) == 0) /* it's integral */
379 sprintf(s, "%.30g", vp->fval);
380 else
381 sprintf(s, *fmt, vp->fval);
382 vp->sval = tostring(s);
383 vp->tval &= ~DONTFREE;
384 vp->tval |= STR;
386 dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, NN(vp->nval), vp->sval, vp->sval, vp->tval) );
387 return(vp->sval);
390 char *getsval(Cell *vp) /* get string val of a Cell */
392 return get_str_val(vp, CONVFMT);
395 char *getpssval(Cell *vp) /* get string val of a Cell for print */
397 return get_str_val(vp, OFMT);
401 char *tostring(const char *s) /* make a copy of string s */
403 char *p;
405 p = (char *) malloc(strlen(s)+1);
406 if (p == NULL)
407 FATAL("out of space in tostring on %s", s);
408 strcpy(p, s);
409 return(p);
412 char *qstring(const char *is, int delim) /* collect string up to next delim */
414 const char *os = is;
415 int c, n;
416 uschar *s = (uschar *) is;
417 uschar *buf, *bp;
419 if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL)
420 FATAL( "out of space in qstring(%s)", s);
421 for (bp = buf; (c = *s) != delim; s++) {
422 if (c == '\n')
423 SYNTAX( "newline in string %.20s...", os );
424 else if (c != '\\')
425 *bp++ = c;
426 else { /* \something */
427 c = *++s;
428 if (c == 0) { /* \ at end */
429 *bp++ = '\\';
430 break; /* for loop */
432 switch (c) {
433 case '\\': *bp++ = '\\'; break;
434 case 'n': *bp++ = '\n'; break;
435 case 't': *bp++ = '\t'; break;
436 case 'b': *bp++ = '\b'; break;
437 case 'f': *bp++ = '\f'; break;
438 case 'r': *bp++ = '\r'; break;
439 default:
440 if (!isdigit(c)) {
441 *bp++ = c;
442 break;
444 n = c - '0';
445 if (isdigit(s[1])) {
446 n = 8 * n + *++s - '0';
447 if (isdigit(s[1]))
448 n = 8 * n + *++s - '0';
450 *bp++ = n;
451 break;
455 *bp++ = 0;
456 return (char *) buf;