VM: full munmap
[minix.git] / external / historical / nawk / dist / run.c
blob44cadc24ca8464bfda615befbbe442240e268c94
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
25 #if HAVE_NBTOOL_CONFIG_H
26 #include "nbtool_config.h"
27 #endif
29 #define DEBUG
30 #include <stdio.h>
31 #include <ctype.h>
32 #include <wchar.h>
33 #include <wctype.h>
34 #include <setjmp.h>
35 #include <limits.h>
36 #include <math.h>
37 #include <string.h>
38 #include <stdlib.h>
39 #include <time.h>
40 #include <stdint.h>
41 #include "awk.h"
42 #include "awkgram.h"
44 #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
46 void stdinit(void);
49 #undef tempfree
51 void tempfree(Cell *p) {
52 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
53 WARNING("bad csub %d in Cell %d %s",
54 p->csub, p->ctype, p->sval);
56 if (istemp(p))
57 tfree(p);
61 /* do we really need these? */
62 /* #ifdef _NFILE */
63 /* #ifndef FOPEN_MAX */
64 /* #define FOPEN_MAX _NFILE */
65 /* #endif */
66 /* #endif */
67 /* */
68 /* #ifndef FOPEN_MAX */
69 /* #define FOPEN_MAX 40 */ /* max number of open files */
70 /* #endif */
71 /* */
72 /* #ifndef RAND_MAX */
73 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */
74 /* #endif */
76 jmp_buf env;
77 extern int pairstack[];
78 extern unsigned int srand_seed;
80 Node *winner = NULL; /* root of parse tree */
81 Cell *tmps; /* free temporary cells for execution */
83 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL };
84 Cell *True = &truecell;
85 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL };
86 Cell *False = &falsecell;
87 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL };
88 Cell *jbreak = &breakcell;
89 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL };
90 Cell *jcont = &contcell;
91 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL };
92 Cell *jnext = &nextcell;
93 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL};
94 Cell *jnextfile = &nextfilecell;
95 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL };
96 Cell *jexit = &exitcell;
97 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL };
98 Cell *jret = &retcell;
99 static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL };
101 Node *curnode = NULL; /* the node being executed, for debugging */
103 /* buffer memory management */
104 int adjbuf(uschar **pbuf, int *psiz, int minlen, int quantum, uschar **pbptr,
105 const char *whatrtn)
106 /* pbuf: address of pointer to buffer being managed
107 * psiz: address of buffer size variable
108 * minlen: minimum length of buffer needed
109 * quantum: buffer size quantum
110 * pbptr: address of movable pointer into buffer, or 0 if none
111 * whatrtn: name of the calling routine if failure should cause fatal error
113 * return 0 for realloc failure, !=0 for success
116 if (minlen > *psiz) {
117 char *tbuf;
118 int rminlen = quantum ? minlen % quantum : 0;
119 int boff = pbptr ? *pbptr - *pbuf : 0;
120 /* round up to next multiple of quantum */
121 if (rminlen)
122 minlen += quantum - rminlen;
123 tbuf = realloc(*pbuf, minlen);
124 dprintf( ("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, *pbuf, tbuf) );
125 if (tbuf == NULL) {
126 if (whatrtn)
127 FATAL("out of memory in %s", whatrtn);
128 return 0;
130 *pbuf = tbuf;
131 *psiz = minlen;
132 if (pbptr)
133 *pbptr = tbuf + boff;
135 return 1;
138 void run(Node *a) /* execution of parse tree starts here */
140 stdinit();
141 execute(a);
142 closeall();
145 Cell *execute(Node *u) /* execute a node of the parse tree */
147 Cell *(*proc)(Node **, int);
148 Cell *x;
149 Node *a;
151 if (u == NULL)
152 return(True);
153 for (a = u; ; a = a->nnext) {
154 curnode = a;
155 if (isvalue(a)) {
156 x = (Cell *) (a->narg[0]);
157 if (isfld(x) && !donefld)
158 fldbld();
159 else if (isrec(x) && !donerec)
160 recbld();
161 return(x);
163 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */
164 FATAL("illegal statement");
165 proc = proctab[a->nobj-FIRSTTOKEN];
166 x = (*proc)(a->narg, a->nobj);
167 if (isfld(x) && !donefld)
168 fldbld();
169 else if (isrec(x) && !donerec)
170 recbld();
171 if (isexpr(a))
172 return(x);
173 if (isjump(x))
174 return(x);
175 if (a->nnext == NULL)
176 return(x);
177 tempfree(x);
182 Cell *program(Node **a, int n) /* execute an awk program */
183 { /* a[0] = BEGIN, a[1] = body, a[2] = END */
184 Cell *x;
186 if (setjmp(env) != 0)
187 goto ex;
188 if (a[0]) { /* BEGIN */
189 x = execute(a[0]);
190 if (isexit(x))
191 return(True);
192 if (isjump(x))
193 FATAL("illegal break, continue, next or nextfile from BEGIN");
194 tempfree(x);
196 if (a[1] || a[2])
197 while (getrec(&record, &recsize, 1) > 0) {
198 x = execute(a[1]);
199 if (isexit(x))
200 break;
201 tempfree(x);
204 if (setjmp(env) != 0) /* handles exit within END */
205 goto ex1;
206 if (a[2]) { /* END */
207 x = execute(a[2]);
208 if (isbreak(x) || isnext(x) || iscont(x))
209 FATAL("illegal break, continue, next or nextfile from END");
210 tempfree(x);
212 ex1:
213 return(True);
216 struct Frame { /* stack frame for awk function calls */
217 int nargs; /* number of arguments in this call */
218 Cell *fcncell; /* pointer to Cell for function */
219 Cell **args; /* pointer to array of arguments after execute */
220 Cell *retval; /* return value */
223 #define NARGS 50 /* max args in a call */
225 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */
226 int nframe = 0; /* number of frames allocated */
227 struct Frame *frp = NULL; /* frame pointer. bottom level unused */
229 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
231 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL };
232 int i, ncall, ndef;
233 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
234 Node *x;
235 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */
236 Cell *y, *z, *fcn;
237 char *s;
239 fcn = execute(a[0]); /* the function itself */
240 s = fcn->nval;
241 if (!isfcn(fcn))
242 FATAL("calling undefined function %s", s);
243 if (frame == NULL) {
244 frp = frame = calloc(nframe += 100, sizeof(*frp));
245 if (frame == NULL)
246 FATAL("out of space for stack frames calling %s", s);
248 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */
249 ncall++;
250 ndef = (int) fcn->fval; /* args in defn */
251 dprintf( ("calling %s, %d args (%d in defn), fp=%d\n", s, ncall, ndef, (int) (frp-frame)) );
252 if (ncall > ndef)
253 WARNING("function %s called with %d args, uses only %d",
254 s, ncall, ndef);
255 if (ncall + ndef > NARGS)
256 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
257 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */
258 dprintf( ("evaluate args[%d], fp=%d:\n", i, (int) (frp-frame)) );
259 y = execute(x);
260 oargs[i] = y;
261 dprintf( ("args[%d]: %s %f <%s>, t=%o\n",
262 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval) );
263 if (isfcn(y))
264 FATAL("can't use function %s as argument in %s", y->nval, s);
265 if (isarr(y))
266 args[i] = y; /* arrays by ref */
267 else
268 args[i] = copycell(y);
269 tempfree(y);
271 for ( ; i < ndef; i++) { /* add null args for ones not provided */
272 args[i] = gettemp();
273 *args[i] = newcopycell;
275 frp++; /* now ok to up frame */
276 if (frp >= frame + nframe) {
277 int dfp = frp - frame; /* old index */
278 frame = realloc(frame, (nframe += 100) * sizeof(*frame));
279 if (frame == NULL)
280 FATAL("out of space for stack frames in %s", s);
281 frp = frame + dfp;
283 frp->fcncell = fcn;
284 frp->args = args;
285 frp->nargs = ndef; /* number defined with (excess are locals) */
286 frp->retval = gettemp();
288 dprintf( ("start exec of %s, fp=%d\n", s, (int) (frp-frame)) );
289 y = execute((Node *)(fcn->sval)); /* execute body */
290 dprintf( ("finished exec of %s, fp=%d\n", s, (int) (frp-frame)) );
292 for (i = 0; i < ndef; i++) {
293 Cell *t = frp->args[i];
294 if (isarr(t)) {
295 if (t->csub == CCOPY) {
296 if (i >= ncall) {
297 freesymtab(t);
298 t->csub = CTEMP;
299 tempfree(t);
300 } else {
301 oargs[i]->tval = t->tval;
302 oargs[i]->tval &= ~(STR|NUM|DONTFREE);
303 oargs[i]->sval = t->sval;
304 tempfree(t);
307 } else if (t != y) { /* kludge to prevent freeing twice */
308 t->csub = CTEMP;
309 tempfree(t);
310 } else if (t == y && t->csub == CCOPY) {
311 t->csub = CTEMP;
312 tempfree(t);
313 freed = 1;
316 tempfree(fcn);
317 if (isexit(y) || isnext(y))
318 return y;
319 if (freed == 0) {
320 tempfree(y); /* don't free twice! */
322 z = frp->retval; /* return value */
323 dprintf( ("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval) );
324 frp--;
325 return(z);
328 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */
330 Cell *y;
332 /* copy is not constant or field */
334 y = gettemp();
335 y->tval = x->tval & ~(CON|FLD|REC);
336 y->csub = CCOPY; /* prevents freeing until call is over */
337 y->nval = x->nval; /* BUG? */
338 if (isstr(x) /* || x->ctype == OCELL */) {
339 y->sval = tostring(x->sval);
340 y->tval &= ~DONTFREE;
341 } else
342 y->tval |= DONTFREE;
343 y->fval = x->fval;
344 return y;
347 Cell *arg(Node **a, int n) /* nth argument of a function */
350 n = ptoi(a[0]); /* argument number, counting from 0 */
351 dprintf( ("arg(%d), fp->nargs=%d\n", n, frp->nargs) );
352 if (n+1 > frp->nargs)
353 FATAL("argument #%d of function %s was not supplied",
354 n+1, frp->fcncell->nval);
355 return frp->args[n];
358 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */
360 Cell *y;
362 switch (n) {
363 case EXIT:
364 if (a[0] != NULL) {
365 y = execute(a[0]);
366 errorflag = (int) getfval(y);
367 tempfree(y);
369 longjmp(env, 1);
370 case RETURN:
371 if (a[0] != NULL) {
372 y = execute(a[0]);
373 if ((y->tval & (STR|NUM)) == (STR|NUM)) {
374 setsval(frp->retval, getsval(y));
375 frp->retval->fval = getfval(y);
376 frp->retval->tval |= NUM;
378 else if (y->tval & STR)
379 setsval(frp->retval, getsval(y));
380 else if (y->tval & NUM)
381 setfval(frp->retval, getfval(y));
382 else /* can't happen */
383 FATAL("bad type variable %d", y->tval);
384 tempfree(y);
386 return(jret);
387 case NEXT:
388 return(jnext);
389 case NEXTFILE:
390 nextfile();
391 return(jnextfile);
392 case BREAK:
393 return(jbreak);
394 case CONTINUE:
395 return(jcont);
396 default: /* can't happen */
397 FATAL("illegal jump type %d", n);
399 return 0; /* not reached */
402 Cell *awkgetline(Node **a, int n) /* get next line from specific input */
403 { /* a[0] is variable, a[1] is operator, a[2] is filename */
404 Cell *r, *x;
405 extern Cell **fldtab;
406 FILE *fp;
407 uschar *buf;
408 int bufsize = recsize;
409 int mode, newflag;
411 if ((buf = malloc(bufsize)) == NULL)
412 FATAL("out of memory in getline");
414 fflush(stdout); /* in case someone is waiting for a prompt */
415 r = gettemp();
416 if (a[1] != NULL) { /* getline < file */
417 x = execute(a[2]); /* filename */
418 mode = ptoi(a[1]);
419 if (mode == '|') /* input pipe */
420 mode = LE; /* arbitrary flag */
421 fp = openfile(mode, getsval(x), &newflag);
422 tempfree(x);
423 if (fp == NULL)
424 n = -1;
425 else
426 n = readrec(&buf, &bufsize, fp, newflag);
427 if (n <= 0) {
429 } else if (a[0] != NULL) { /* getline var <file */
430 x = execute(a[0]);
431 setsval(x, buf);
432 tempfree(x);
433 } else { /* getline <file */
434 setsval(fldtab[0], buf);
435 if (is_number(fldtab[0]->sval)) {
436 fldtab[0]->fval = atof(fldtab[0]->sval);
437 fldtab[0]->tval |= NUM;
440 } else { /* bare getline; use current input */
441 if (a[0] == NULL) /* getline */
442 n = getrec(&record, &recsize, 1);
443 else { /* getline var */
444 n = getrec(&buf, &bufsize, 0);
445 x = execute(a[0]);
446 setsval(x, buf);
447 tempfree(x);
450 setfval(r, (Awkfloat) n);
451 free(buf);
452 return r;
455 Cell *getnf(Node **a, int n) /* get NF */
457 if (donefld == 0)
458 fldbld();
459 return (Cell *) a[0];
462 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
464 Cell *x, *y, *z;
465 char *s;
466 Node *np;
467 uschar *buf;
468 int bufsz = recsize;
469 int nsub = strlen(*SUBSEP);
471 if ((buf = malloc(bufsz)) == NULL)
472 FATAL("out of memory in array");
474 x = execute(a[0]); /* Cell* for symbol table */
475 buf[0] = 0;
476 for (np = a[1]; np; np = np->nnext) {
477 y = execute(np); /* subscript */
478 s = getsval(y);
479 if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "array"))
480 FATAL("out of memory for %s[%s...]", x->nval, buf);
481 strlcat(buf, s, bufsz);
482 if (np->nnext)
483 strlcat(buf, *SUBSEP, bufsz);
484 tempfree(y);
486 if (!isarr(x)) {
487 dprintf( ("making %s into an array\n", NN(x->nval)) );
488 if (freeable(x))
489 xfree(x->sval);
490 x->tval &= ~(STR|NUM|DONTFREE);
491 x->tval |= ARR;
492 x->sval = (char *) makesymtab(NSYMTAB);
494 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
495 z->ctype = OCELL;
496 z->csub = CVAR;
497 tempfree(x);
498 free(buf);
499 return(z);
502 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
504 Cell *x, *y;
505 Node *np;
506 uschar *s;
507 int nsub = strlen(*SUBSEP);
509 x = execute(a[0]); /* Cell* for symbol table */
510 if (!isarr(x))
511 return True;
512 if (a[1] == 0) { /* delete the elements, not the table */
513 freesymtab(x);
514 x->tval &= ~STR;
515 x->tval |= ARR;
516 x->sval = (char *) makesymtab(NSYMTAB);
517 } else {
518 int bufsz = recsize;
519 uschar *buf;
520 if ((buf = malloc(bufsz)) == NULL)
521 FATAL("out of memory in adelete");
522 buf[0] = 0;
523 for (np = a[1]; np; np = np->nnext) {
524 y = execute(np); /* subscript */
525 s = getsval(y);
526 if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "awkdelete"))
527 FATAL("out of memory deleting %s[%s...]", x->nval, buf);
528 strlcat(buf, s, bufsz);
529 if (np->nnext)
530 strlcat(buf, *SUBSEP, bufsz);
531 tempfree(y);
533 freeelem(x, buf);
534 free(buf);
536 tempfree(x);
537 return True;
540 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
542 Cell *x, *ap, *k;
543 Node *p;
544 uschar *buf;
545 char *s;
546 int bufsz = recsize;
547 int nsub = strlen(*SUBSEP);
549 ap = execute(a[1]); /* array name */
550 if (!isarr(ap)) {
551 dprintf( ("making %s into an array\n", ap->nval) );
552 if (freeable(ap))
553 xfree(ap->sval);
554 ap->tval &= ~(STR|NUM|DONTFREE);
555 ap->tval |= ARR;
556 ap->sval = (char *) makesymtab(NSYMTAB);
558 if ((buf = malloc(bufsz)) == NULL) {
559 FATAL("out of memory in intest");
561 buf[0] = 0;
562 for (p = a[0]; p; p = p->nnext) {
563 x = execute(p); /* expr */
564 s = getsval(x);
565 if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "intest"))
566 FATAL("out of memory deleting %s[%s...]", x->nval, buf);
567 strcat(buf, s);
568 tempfree(x);
569 if (p->nnext)
570 strcat(buf, *SUBSEP);
572 k = lookup(buf, (Array *) ap->sval);
573 tempfree(ap);
574 free(buf);
575 if (k == NULL)
576 return(False);
577 else
578 return(True);
582 Cell *matchop(Node **a, int n) /* ~ and match() */
584 Cell *x, *y;
585 uschar *s;
586 char *t;
587 int i;
588 fa *pfa;
589 int (*mf)(fa *, const char *) = match, mode = 0;
591 if (n == MATCHFCN) {
592 mf = pmatch;
593 mode = 1;
595 x = execute(a[1]); /* a[1] = target text */
596 s = getsval(x);
597 if (a[0] == 0) /* a[1] == 0: already-compiled reg expr */
598 i = (*mf)((fa *) a[2], s);
599 else {
600 y = execute(a[2]); /* a[2] = regular expr */
601 t = getsval(y);
602 pfa = makedfa(t, mode);
603 i = (*mf)(pfa, s);
604 tempfree(y);
606 tempfree(x);
607 if (n == MATCHFCN) {
608 int start = patbeg - s + 1;
609 if (patlen < 0)
610 start = 0;
611 setfval(rstartloc, (Awkfloat) start);
612 setfval(rlengthloc, (Awkfloat) patlen);
613 x = gettemp();
614 x->tval = NUM;
615 x->fval = start;
616 return x;
617 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
618 return(True);
619 else
620 return(False);
624 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */
626 Cell *x, *y;
627 int i;
629 x = execute(a[0]);
630 i = istrue(x);
631 tempfree(x);
632 switch (n) {
633 case BOR:
634 if (i) return(True);
635 y = execute(a[1]);
636 i = istrue(y);
637 tempfree(y);
638 if (i) return(True);
639 else return(False);
640 case AND:
641 if ( !i ) return(False);
642 y = execute(a[1]);
643 i = istrue(y);
644 tempfree(y);
645 if (i) return(True);
646 else return(False);
647 case NOT:
648 if (i) return(False);
649 else return(True);
650 default: /* can't happen */
651 FATAL("unknown boolean operator %d", n);
653 return 0; /*NOTREACHED*/
656 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */
658 int i;
659 Cell *x, *y;
660 Awkfloat j;
662 x = execute(a[0]);
663 y = execute(a[1]);
664 if (x->tval&NUM && y->tval&NUM) {
665 j = x->fval - y->fval;
666 i = j<0? -1: (j>0? 1: 0);
667 } else {
668 i = strcmp(getsval(x), getsval(y));
670 tempfree(x);
671 tempfree(y);
672 switch (n) {
673 case LT: if (i<0) return(True);
674 else return(False);
675 case LE: if (i<=0) return(True);
676 else return(False);
677 case NE: if (i!=0) return(True);
678 else return(False);
679 case EQ: if (i == 0) return(True);
680 else return(False);
681 case GE: if (i>=0) return(True);
682 else return(False);
683 case GT: if (i>0) return(True);
684 else return(False);
685 default: /* can't happen */
686 FATAL("unknown relational operator %d", n);
688 return 0; /*NOTREACHED*/
691 void tfree(Cell *a) /* free a tempcell */
693 if (freeable(a)) {
694 dprintf( ("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval) );
695 xfree(a->sval);
697 if (a == tmps)
698 FATAL("tempcell list is curdled");
699 a->cnext = tmps;
700 tmps = a;
703 Cell *gettemp(void) /* get a tempcell */
704 { int i;
705 Cell *x;
707 if (!tmps) {
708 tmps = calloc(100, sizeof(*tmps));
709 if (!tmps)
710 FATAL("out of space for temporaries");
711 for(i = 1; i < 100; i++)
712 tmps[i-1].cnext = &tmps[i];
713 tmps[i-1].cnext = 0;
715 x = tmps;
716 tmps = x->cnext;
717 *x = tempcell;
718 return(x);
721 Cell *indirect(Node **a, int n) /* $( a[0] ) */
723 Awkfloat val;
724 Cell *x;
725 int m;
726 char *s;
728 x = execute(a[0]);
729 val = getfval(x); /* freebsd: defend against super large field numbers */
730 if ((Awkfloat)INT_MAX < val)
731 FATAL("trying to access out of range field %s", x->nval);
732 m = (int) val;
733 if (m == 0 && !is_number(s = getsval(x))) /* suspicion! */
734 FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
735 /* BUG: can x->nval ever be null??? */
736 tempfree(x);
737 x = fieldadr(m);
738 x->ctype = OCELL; /* BUG? why are these needed? */
739 x->csub = CFLD;
740 return(x);
743 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
745 int k, m, n;
746 char *s;
747 int temp;
748 Cell *x, *y, *z = 0;
750 x = execute(a[0]);
751 y = execute(a[1]);
752 if (a[2] != 0)
753 z = execute(a[2]);
754 s = getsval(x);
755 k = strlen(s) + 1;
756 if (k <= 1) {
757 tempfree(x);
758 tempfree(y);
759 if (a[2] != 0) {
760 tempfree(z);
762 x = gettemp();
763 setsval(x, "");
764 return(x);
766 m = (int) getfval(y);
767 if (m <= 0)
768 m = 1;
769 else if (m > k)
770 m = k;
771 tempfree(y);
772 if (a[2] != 0) {
773 n = (int) getfval(z);
774 tempfree(z);
775 } else
776 n = k - 1;
777 if (n < 0)
778 n = 0;
779 else if (n > k - m)
780 n = k - m;
781 dprintf( ("substr: m=%d, n=%d, s=%s\n", m, n, s) );
782 y = gettemp();
783 temp = s[n+m-1]; /* with thanks to John Linderman */
784 s[n+m-1] = '\0';
785 setsval(y, s + m - 1);
786 s[n+m-1] = temp;
787 tempfree(x);
788 return(y);
791 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */
793 Cell *x, *y, *z;
794 char *s1, *s2, *p1, *p2, *q;
795 Awkfloat v = 0.0;
797 x = execute(a[0]);
798 s1 = getsval(x);
799 y = execute(a[1]);
800 s2 = getsval(y);
802 z = gettemp();
803 for (p1 = s1; *p1 != '\0'; p1++) {
804 for (q=p1, p2=s2; *p2 != '\0' && *q == *p2; q++, p2++)
806 if (*p2 == '\0') {
807 v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */
808 break;
811 tempfree(x);
812 tempfree(y);
813 setfval(z, v);
814 return(z);
817 #define MAXNUMSIZE 50
819 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */
821 uschar *fmt, *p, *t;
822 const char *os;
823 Cell *x;
824 int flag = 0, n;
825 int fmtwd; /* format width */
826 int fmtsz = recsize;
827 uschar *buf = *pbuf;
828 int bufsize = *pbufsize;
829 #define FMTSZ(a) (fmtsz - ((a) - fmt))
830 #define BUFSZ(a) (bufsize - ((a) - buf))
832 os = s;
833 p = buf;
834 if ((fmt = malloc(fmtsz)) == NULL)
835 FATAL("out of memory in format()");
836 while (*s) {
837 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1");
838 if (*s != '%') {
839 *p++ = *s++;
840 continue;
842 if (*(s+1) == '%') {
843 *p++ = '%';
844 s += 2;
845 continue;
847 /* have to be real careful in case this is a huge number, eg, %100000d */
848 fmtwd = atoi(s+1);
849 if (fmtwd < 0)
850 fmtwd = -fmtwd;
851 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
852 for (t = fmt; (*t++ = *s) != '\0'; s++) {
853 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
854 FATAL("format item %.30s... ran format() out of memory", os);
855 if (*s == 'l' || *s == 'h' || *s == 'L')
856 goto weird;
857 if (isalpha((uschar)*s))
858 break; /* the ansi panoply */
859 if (*s == '*') {
860 if (a == NULL)
861 FATAL("not enough args in printf("
862 "\"%.30s\")", os);
863 x = execute(a);
864 a = a->nnext;
865 snprintf(t - 1, FMTSZ(t - 1),
866 "%d", fmtwd=(int) getfval(x));
867 if (fmtwd < 0)
868 fmtwd = -fmtwd;
869 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
870 t = fmt + strlen(fmt);
871 tempfree(x);
874 *t = '\0';
875 if (fmtwd < 0)
876 fmtwd = -fmtwd;
877 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
879 switch (*s) {
880 case 'f': case 'e': case 'g': case 'E': case 'G':
881 flag = 'f';
882 break;
883 case 'd': case 'i':
884 flag = 'd';
885 if(*(s-1) == 'l') break;
886 *(t-1) = 'j';
887 *t = 'd';
888 *++t = '\0';
889 break;
890 case 'o': case 'x': case 'X': case 'u':
891 flag = *(s-1) == 'l' ? 'd' : 'u';
892 *(t-1) = 'j';
893 *t = *s;
894 *++t = '\0';
895 break;
896 case 's':
897 flag = 's';
898 break;
899 case 'c':
900 flag = 'c';
901 break;
902 default:
903 weird:
904 WARNING("weird printf conversion %s", fmt);
905 flag = '?';
906 break;
908 if (a == NULL)
909 FATAL("not enough args in printf(%s)", os);
910 x = execute(a);
911 a = a->nnext;
912 n = MAXNUMSIZE;
913 if (fmtwd > n)
914 n = fmtwd;
915 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
916 switch (flag) {
917 case '?': snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */
918 t = getsval(x);
919 n = strlen(t);
920 if (fmtwd > n)
921 n = fmtwd;
922 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
923 p += strlen(p);
924 snprintf(p, BUFSZ(p), "%s", t);
925 break;
926 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
927 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
928 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
929 case 's':
930 t = getsval(x);
931 n = strlen(t);
932 if (fmtwd > n)
933 n = fmtwd;
934 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
935 FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t);
936 snprintf(p, BUFSZ(p), fmt, t);
937 break;
938 case 'c':
939 if (isnum(x)) {
940 if (getfval(x))
941 snprintf(p, BUFSZ(p), fmt, (int) getfval(x));
942 else {
943 *p++ = '\0'; /* explicit null byte */
944 *p = '\0'; /* next output will start here */
946 } else
947 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
948 break;
949 default:
950 FATAL("can't happen: bad conversion %c in format()", flag);
952 tempfree(x);
953 p += strlen(p);
954 s++;
956 *p = '\0';
957 free(fmt);
958 for ( ; a; a = a->nnext) /* evaluate any remaining args */
959 execute(a);
960 *pbuf = buf;
961 *pbufsize = bufsize;
962 return p - buf;
965 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */
967 Cell *x;
968 Node *y;
969 char *buf;
970 int bufsz=3*recsize;
972 if ((buf = malloc(bufsz)) == NULL)
973 FATAL("out of memory in awksprintf");
974 y = a[0]->nnext;
975 x = execute(a[0]);
976 if (format(&buf, &bufsz, getsval(x), y) == -1)
977 FATAL("sprintf string %.30s... too long. can't happen.", buf);
978 tempfree(x);
979 x = gettemp();
980 x->sval = buf;
981 x->tval = STR;
982 return(x);
985 Cell *awkprintf(Node **a, int n) /* printf */
986 { /* a[0] is list of args, starting with format string */
987 /* a[1] is redirection operator, a[2] is redirection file */
988 FILE *fp;
989 Cell *x;
990 Node *y;
991 char *buf;
992 int len;
993 int bufsz=3*recsize;
995 if ((buf = malloc(bufsz)) == NULL)
996 FATAL("out of memory in awkprintf");
997 y = a[0]->nnext;
998 x = execute(a[0]);
999 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
1000 FATAL("printf string %.30s... too long. can't happen.", buf);
1001 tempfree(x);
1002 if (a[1] == NULL) {
1003 /* fputs(buf, stdout); */
1004 fwrite(buf, len, 1, stdout);
1005 if (ferror(stdout))
1006 FATAL("write error on stdout");
1007 } else {
1008 fp = redirect(ptoi(a[1]), a[2]);
1009 /* fputs(buf, fp); */
1010 fwrite(buf, len, 1, fp);
1011 fflush(fp);
1012 if (ferror(fp))
1013 FATAL("write error on %s", filename(fp));
1015 free(buf);
1016 return(True);
1019 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */
1021 Awkfloat i, j = 0;
1022 double v;
1023 Cell *x, *y, *z;
1025 x = execute(a[0]);
1026 i = getfval(x);
1027 tempfree(x);
1028 if (n != UMINUS) {
1029 y = execute(a[1]);
1030 j = getfval(y);
1031 tempfree(y);
1033 z = gettemp();
1034 switch (n) {
1035 case ADD:
1036 i += j;
1037 break;
1038 case MINUS:
1039 i -= j;
1040 break;
1041 case MULT:
1042 i *= j;
1043 break;
1044 case DIVIDE:
1045 if (j == 0)
1046 FATAL("division by zero");
1047 i /= j;
1048 break;
1049 case MOD:
1050 if (j == 0)
1051 FATAL("division by zero in mod");
1052 modf(i/j, &v);
1053 i = i - j * v;
1054 break;
1055 case UMINUS:
1056 i = -i;
1057 break;
1058 case POWER:
1059 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */
1060 i = ipow(i, (int) j);
1061 else
1062 i = errcheck(pow(i, j), "pow");
1063 break;
1064 default: /* can't happen */
1065 FATAL("illegal arithmetic operator %d", n);
1067 setfval(z, i);
1068 return(z);
1071 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */
1073 double v;
1075 if (n <= 0)
1076 return 1;
1077 v = ipow(x, n/2);
1078 if (n % 2 == 0)
1079 return v * v;
1080 else
1081 return x * v * v;
1084 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */
1086 Cell *x, *z;
1087 int k;
1088 Awkfloat xf;
1090 x = execute(a[0]);
1091 xf = getfval(x);
1092 k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
1093 if (n == PREINCR || n == PREDECR) {
1094 setfval(x, xf + k);
1095 return(x);
1097 z = gettemp();
1098 setfval(z, xf);
1099 setfval(x, xf + k);
1100 tempfree(x);
1101 return(z);
1104 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */
1105 { /* this is subtle; don't muck with it. */
1106 Cell *x, *y;
1107 Awkfloat xf, yf;
1108 double v;
1110 y = execute(a[1]);
1111 x = execute(a[0]);
1112 if (n == ASSIGN) { /* ordinary assignment */
1113 if (x == y && !(x->tval & (FLD|REC))) /* self-assignment: */
1114 ; /* leave alone unless it's a field */
1115 else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1116 setsval(x, getsval(y));
1117 x->fval = getfval(y);
1118 x->tval |= NUM;
1120 else if (isstr(y))
1121 setsval(x, getsval(y));
1122 else if (isnum(y))
1123 setfval(x, getfval(y));
1124 else
1125 funnyvar(y, "read value of");
1126 tempfree(y);
1127 return(x);
1129 xf = getfval(x);
1130 yf = getfval(y);
1131 switch (n) {
1132 case ADDEQ:
1133 xf += yf;
1134 break;
1135 case SUBEQ:
1136 xf -= yf;
1137 break;
1138 case MULTEQ:
1139 xf *= yf;
1140 break;
1141 case DIVEQ:
1142 if (yf == 0)
1143 FATAL("division by zero in /=");
1144 xf /= yf;
1145 break;
1146 case MODEQ:
1147 if (yf == 0)
1148 FATAL("division by zero in %%=");
1149 modf(xf/yf, &v);
1150 xf = xf - yf * v;
1151 break;
1152 case POWEQ:
1153 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */
1154 xf = ipow(xf, (int) yf);
1155 else
1156 xf = errcheck(pow(xf, yf), "pow");
1157 break;
1158 default:
1159 FATAL("illegal assignment operator %d", n);
1160 break;
1162 tempfree(y);
1163 setfval(x, xf);
1164 return(x);
1167 Cell *cat(Node **a, int q) /* a[0] cat a[1] */
1169 Cell *x, *y, *z;
1170 int n1, n2;
1171 char *s;
1173 x = execute(a[0]);
1174 y = execute(a[1]);
1175 getsval(x);
1176 getsval(y);
1177 n1 = strlen(x->sval);
1178 n2 = strlen(y->sval);
1179 s = malloc(n1 + n2 + 1);
1180 if (s == NULL)
1181 FATAL("out of space concatenating %.15s... and %.15s...",
1182 x->sval, y->sval);
1183 strcpy(s, x->sval);
1184 strcpy(s+n1, y->sval);
1185 tempfree(x);
1186 tempfree(y);
1187 z = gettemp();
1188 z->sval = s;
1189 z->tval = STR;
1190 return(z);
1193 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */
1195 Cell *x;
1197 if (a[0] == 0)
1198 x = execute(a[1]);
1199 else {
1200 x = execute(a[0]);
1201 if (istrue(x)) {
1202 tempfree(x);
1203 x = execute(a[1]);
1206 return x;
1209 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */
1211 Cell *x;
1212 int pair;
1214 pair = ptoi(a[3]);
1215 if (pairstack[pair] == 0) {
1216 x = execute(a[0]);
1217 if (istrue(x))
1218 pairstack[pair] = 1;
1219 tempfree(x);
1221 if (pairstack[pair] == 1) {
1222 x = execute(a[1]);
1223 if (istrue(x))
1224 pairstack[pair] = 0;
1225 tempfree(x);
1226 x = execute(a[2]);
1227 return(x);
1229 return(False);
1232 static char regexpr[] = "(regexpr)";
1233 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
1235 Cell *x = 0, *y, *ap;
1236 char *s;
1237 int sep;
1238 char *t, temp, num[50], *fs = 0;
1239 int n, tempstat, arg3type;
1241 y = execute(a[0]); /* source string */
1242 s = getsval(y);
1243 arg3type = ptoi(a[3]);
1244 if (a[2] == 0) /* fs string */
1245 fs = *FS;
1246 else if (arg3type == STRING) { /* split(str,arr,"string") */
1247 x = execute(a[2]);
1248 fs = getsval(x);
1249 } else if (arg3type == REGEXPR)
1250 fs = regexpr; /* split(str,arr,/regexpr/) */
1251 else
1252 FATAL("illegal type of split");
1253 sep = *fs;
1254 ap = execute(a[1]); /* array name */
1255 freesymtab(ap);
1256 dprintf( ("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs) );
1257 ap->tval &= ~STR;
1258 ap->tval |= ARR;
1259 ap->sval = (char *) makesymtab(NSYMTAB);
1261 n = 0;
1262 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */
1263 fa *pfa;
1264 if (arg3type == REGEXPR) { /* it's ready already */
1265 pfa = (fa *) a[2];
1266 } else {
1267 pfa = makedfa(fs, 1);
1269 if (nematch(pfa,s)) {
1270 tempstat = pfa->initstat;
1271 pfa->initstat = 2;
1272 do {
1273 n++;
1274 snprintf(num, sizeof(num), "%d", n);
1275 temp = *patbeg;
1276 *patbeg = '\0';
1277 if (is_number(s))
1278 setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval);
1279 else
1280 setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1281 *patbeg = temp;
1282 s = patbeg + patlen;
1283 if (*(patbeg+patlen-1) == 0 || *s == 0) {
1284 n++;
1285 snprintf(num, sizeof(num), "%d", n);
1286 setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
1287 pfa->initstat = tempstat;
1288 goto spdone;
1290 } while (nematch(pfa,s));
1291 pfa->initstat = tempstat; /* bwk: has to be here to reset */
1292 /* cf gsub and refldbld */
1294 n++;
1295 snprintf(num, sizeof(num), "%d", n);
1296 if (is_number(s))
1297 setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval);
1298 else
1299 setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1300 spdone:
1301 pfa = NULL;
1302 } else if (sep == ' ') {
1303 for (n = 0; ; ) {
1304 while (*s == ' ' || *s == '\t' || *s == '\n')
1305 s++;
1306 if (*s == 0)
1307 break;
1308 n++;
1309 t = s;
1311 s++;
1312 while (*s!=' ' && *s!='\t' && *s!='\n' && *s!='\0');
1313 temp = *s;
1314 *s = '\0';
1315 snprintf(num, sizeof(num), "%d", n);
1316 if (is_number(t))
1317 setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
1318 else
1319 setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1320 *s = temp;
1321 if (*s != 0)
1322 s++;
1324 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */
1325 for (n = 0; *s != 0; s++) {
1326 char buf[2];
1327 n++;
1328 snprintf(num, sizeof(num), "%d", n);
1329 buf[0] = *s;
1330 buf[1] = 0;
1331 if (isdigit((uschar)buf[0]))
1332 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
1333 else
1334 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
1336 } else if (*s != 0) {
1337 for (;;) {
1338 n++;
1339 t = s;
1340 while (*s != sep && *s != '\n' && *s != '\0')
1341 s++;
1342 temp = *s;
1343 *s = '\0';
1344 snprintf(num, sizeof(num), "%d", n);
1345 if (is_number(t))
1346 setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
1347 else
1348 setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1349 *s = temp;
1350 if (*s++ == 0)
1351 break;
1354 tempfree(ap);
1355 tempfree(y);
1356 if (a[2] != 0 && arg3type == STRING) {
1357 tempfree(x);
1359 x = gettemp();
1360 x->tval = NUM;
1361 x->fval = n;
1362 return(x);
1365 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */
1367 Cell *x;
1369 x = execute(a[0]);
1370 if (istrue(x)) {
1371 tempfree(x);
1372 x = execute(a[1]);
1373 } else {
1374 tempfree(x);
1375 x = execute(a[2]);
1377 return(x);
1380 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */
1382 Cell *x;
1384 x = execute(a[0]);
1385 if (istrue(x)) {
1386 tempfree(x);
1387 x = execute(a[1]);
1388 } else if (a[2] != 0) {
1389 tempfree(x);
1390 x = execute(a[2]);
1392 return(x);
1395 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */
1397 Cell *x;
1399 for (;;) {
1400 x = execute(a[0]);
1401 if (!istrue(x))
1402 return(x);
1403 tempfree(x);
1404 x = execute(a[1]);
1405 if (isbreak(x)) {
1406 x = True;
1407 return(x);
1409 if (isnext(x) || isexit(x) || isret(x))
1410 return(x);
1411 tempfree(x);
1415 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */
1417 Cell *x;
1419 for (;;) {
1420 x = execute(a[0]);
1421 if (isbreak(x))
1422 return True;
1423 if (isnext(x) || isexit(x) || isret(x))
1424 return(x);
1425 tempfree(x);
1426 x = execute(a[1]);
1427 if (!istrue(x))
1428 return(x);
1429 tempfree(x);
1433 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */
1435 Cell *x;
1437 x = execute(a[0]);
1438 tempfree(x);
1439 for (;;) {
1440 if (a[1]!=0) {
1441 x = execute(a[1]);
1442 if (!istrue(x)) return(x);
1443 else tempfree(x);
1445 x = execute(a[3]);
1446 if (isbreak(x)) /* turn off break */
1447 return True;
1448 if (isnext(x) || isexit(x) || isret(x))
1449 return(x);
1450 tempfree(x);
1451 x = execute(a[2]);
1452 tempfree(x);
1456 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */
1458 Cell *x, *vp, *arrayp, *cp, *ncp;
1459 Array *tp;
1460 int i;
1462 vp = execute(a[0]);
1463 arrayp = execute(a[1]);
1464 if (!isarr(arrayp)) {
1465 return True;
1467 tp = (Array *) arrayp->sval;
1468 tempfree(arrayp);
1469 for (i = 0; i < tp->size; i++) { /* this routine knows too much */
1470 for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
1471 setsval(vp, cp->nval);
1472 ncp = cp->cnext;
1473 x = execute(a[2]);
1474 if (isbreak(x)) {
1475 tempfree(vp);
1476 return True;
1478 if (isnext(x) || isexit(x) || isret(x)) {
1479 tempfree(vp);
1480 return(x);
1482 tempfree(x);
1485 return True;
1488 void flush_all(void);
1490 static char *nawk_toXXX(const char *s,
1491 int (*fun_c)(int),
1492 wint_t (*fun_wc)(wint_t))
1494 char *buf = NULL;
1495 char *pbuf = NULL;
1496 const char *ps = NULL;
1497 size_t n = 0;
1498 mbstate_t mbs, mbs2;
1499 wchar_t wc;
1500 size_t sz = MB_CUR_MAX;
1502 if (sz == 1) {
1503 buf = tostring(s);
1505 for (pbuf = buf; *pbuf; pbuf++)
1506 *pbuf = fun_c((uschar)*pbuf);
1508 return buf;
1509 } else {
1510 /* upper/lower character may be shorter/longer */
1511 buf = tostringN(s, strlen(s) * sz + 1);
1513 memset(&mbs, 0, sizeof(mbs));
1514 memset(&mbs2, 0, sizeof(mbs2));
1516 ps = s;
1517 pbuf = buf;
1518 while (n = mbrtowc(&wc, ps, sz, &mbs),
1519 n > 0 && n != (size_t)-1 && n != (size_t)-2)
1521 ps += n;
1523 n = wcrtomb(pbuf, fun_wc(wc), &mbs2);
1524 if (n == (size_t)-1)
1525 FATAL("illegal wide character %s", s);
1527 pbuf += n;
1530 *pbuf = 0;
1532 if (n)
1533 FATAL("illegal byte sequence %s", s);
1535 return buf;
1539 static char *nawk_toupper(const char *s)
1541 return nawk_toXXX(s, toupper, towupper);
1544 static char *nawk_tolower(const char *s)
1546 return nawk_toXXX(s, tolower, towlower);
1549 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */
1551 Cell *x, *y;
1552 Awkfloat u;
1553 int t, sz;
1554 unsigned int tmp;
1555 char *buf, *fmt;
1556 Node *nextarg;
1557 FILE *fp;
1558 time_t tv;
1559 struct tm *tm;
1561 t = ptoi(a[0]);
1562 x = execute(a[1]);
1563 nextarg = a[1]->nnext;
1564 switch (t) {
1565 case FLENGTH:
1566 if (isarr(x))
1567 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/
1568 else
1569 u = strlen(getsval(x));
1570 break;
1571 case FLOG:
1572 u = errcheck(log(getfval(x)), "log"); break;
1573 case FINT:
1574 modf(getfval(x), &u); break;
1575 case FEXP:
1576 u = errcheck(exp(getfval(x)), "exp"); break;
1577 case FSQRT:
1578 u = errcheck(sqrt(getfval(x)), "sqrt"); break;
1579 case FSIN:
1580 u = sin(getfval(x)); break;
1581 case FCOS:
1582 u = cos(getfval(x)); break;
1583 case FATAN:
1584 if (nextarg == 0) {
1585 WARNING("atan2 requires two arguments; returning 1.0");
1586 u = 1.0;
1587 } else {
1588 y = execute(a[1]->nnext);
1589 u = atan2(getfval(x), getfval(y));
1590 tempfree(y);
1591 nextarg = nextarg->nnext;
1593 break;
1594 case FSYSTEM:
1595 fflush(stdout); /* in case something is buffered already */
1596 u = (Awkfloat) system(getsval(x)) / 256; /* 256 is unix-dep */
1597 break;
1598 case FRAND:
1599 /* in principle, rand() returns something in 0..RAND_MAX */
1600 u = (Awkfloat) (rand() % RAND_MAX) / RAND_MAX;
1601 break;
1602 case FSRAND:
1603 if (isrec(x)) /* no argument provided */
1604 u = time((time_t *)0);
1605 else
1606 u = getfval(x);
1607 srand(tmp = (unsigned int) u);
1608 u = srand_seed;
1609 srand_seed = tmp;
1610 break;
1611 case FTOUPPER:
1612 case FTOLOWER:
1613 if (t == FTOUPPER)
1614 buf = nawk_toupper(getsval(x));
1615 else
1616 buf = nawk_tolower(getsval(x));
1617 tempfree(x);
1618 x = gettemp();
1619 setsval(x, buf);
1620 free(buf);
1621 return x;
1622 case FFLUSH:
1623 if (isrec(x) || strlen(getsval(x)) == 0) {
1624 flush_all(); /* fflush() or fflush("") -> all */
1625 u = 0;
1626 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
1627 u = -1;
1628 else
1629 u = fflush(fp);
1630 break;
1631 case FSYSTIME:
1632 u = time((time_t *) 0); break;
1633 case FSTRFTIME:
1634 /* strftime([format [,timestamp]]) */
1635 if (nextarg) {
1636 y = execute(nextarg), nextarg = nextarg->nnext;
1637 tv = (time_t) getfval(y);
1638 tempfree(y);
1639 } else
1640 tv = time((time_t *) 0);
1641 tm = localtime(&tv);
1643 if (isrec(x)) {
1644 /* format argument not provided, use default */
1645 fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
1646 } else
1647 fmt = tostring(getsval(x));
1649 sz = 32, buf = NULL;
1650 do {
1651 if ((buf = realloc(buf, (sz *= 2))) == NULL)
1652 FATAL("out of memory in strftime");
1653 } while(strftime(buf, sz, fmt, tm) == 0);
1655 y = gettemp();
1656 setsval(y, buf);
1657 free(fmt);
1658 free(buf);
1660 return y;
1661 default: /* can't happen */
1662 FATAL("illegal function type %d", t);
1663 break;
1665 tempfree(x);
1666 x = gettemp();
1667 setfval(x, u);
1668 if (nextarg != 0) {
1669 WARNING("warning: function has too many arguments");
1670 for ( ; nextarg; nextarg = nextarg->nnext)
1671 execute(nextarg);
1673 return(x);
1676 Cell *printstat(Node **a, int n) /* print a[0] */
1678 Node *x;
1679 Cell *y;
1680 FILE *fp;
1682 if (a[1] == 0) /* a[1] is redirection operator, a[2] is file */
1683 fp = stdout;
1684 else
1685 fp = redirect(ptoi(a[1]), a[2]);
1686 for (x = a[0]; x != NULL; x = x->nnext) {
1687 y = execute(x);
1688 fputs(getpssval(y), fp);
1689 tempfree(y);
1690 if (x->nnext == NULL)
1691 fputs(*ORS, fp);
1692 else
1693 fputs(*OFS, fp);
1695 if (a[1] != 0)
1696 fflush(fp);
1697 if (ferror(fp))
1698 FATAL("write error on %s", filename(fp));
1699 return(True);
1702 Cell *nullproc(Node **a, int n)
1704 n = n;
1705 a = a;
1706 return 0;
1710 FILE *redirect(int a, Node *b) /* set up all i/o redirections */
1712 FILE *fp;
1713 Cell *x;
1714 char *fname;
1716 x = execute(b);
1717 fname = getsval(x);
1718 fp = openfile(a, fname, NULL);
1719 if (fp == NULL)
1720 FATAL("can't open file %s", fname);
1721 tempfree(x);
1722 return fp;
1725 struct files {
1726 FILE *fp;
1727 const char *fname;
1728 int mode; /* '|', 'a', 'w' => LE/LT, GT */
1729 } *files;
1730 size_t nfiles;
1732 void stdinit(void) /* in case stdin, etc., are not constants */
1734 nfiles = FOPEN_MAX;
1735 files = calloc(nfiles, sizeof(*files));
1736 if (files == NULL)
1737 FATAL("can't allocate file memory for %zu files", nfiles);
1738 files[0].fp = stdin;
1739 files[0].fname = "/dev/stdin";
1740 files[0].mode = LT;
1741 files[1].fp = stdout;
1742 files[1].fname = "/dev/stdout";
1743 files[1].mode = GT;
1744 files[2].fp = stderr;
1745 files[2].fname = "/dev/stderr";
1746 files[2].mode = GT;
1749 FILE *openfile(int a, const char *us, int *pnewflag)
1751 const char *s = us;
1752 size_t i;
1753 int m;
1754 FILE *fp = 0;
1756 if (*s == '\0')
1757 FATAL("null file name in print or getline");
1758 for (i = 0; i < nfiles; i++)
1759 if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
1760 (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
1761 a == FFLUSH)) {
1762 if (pnewflag)
1763 *pnewflag = 0;
1764 return files[i].fp;
1766 if (a == FFLUSH) /* didn't find it, so don't create it! */
1767 return NULL;
1769 for (i = 0; i < nfiles; i++)
1770 if (files[i].fp == NULL)
1771 break;
1772 if (i >= nfiles) {
1773 struct files *nf;
1774 size_t nnf = nfiles + FOPEN_MAX;
1775 nf = realloc(files, nnf * sizeof(*nf));
1776 if (nf == NULL)
1777 FATAL("cannot grow files for %s and %zu files", s, nnf);
1778 (void)memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
1779 nfiles = nnf;
1780 files = nf;
1782 fflush(stdout); /* force a semblance of order */
1783 m = a;
1784 if (a == GT) {
1785 fp = fopen(s, "w");
1786 } else if (a == APPEND) {
1787 fp = fopen(s, "a");
1788 m = GT; /* so can mix > and >> */
1789 } else if (a == '|') { /* output pipe */
1790 fp = popen(s, "w");
1791 } else if (a == LE) { /* input pipe */
1792 fp = popen(s, "r");
1793 } else if (a == LT) { /* getline <file */
1794 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */
1795 } else /* can't happen */
1796 FATAL("illegal redirection %d", a);
1797 if (fp != NULL) {
1798 files[i].fname = tostring(s);
1799 files[i].fp = fp;
1800 files[i].mode = m;
1801 if (pnewflag)
1802 *pnewflag = 1;
1804 return fp;
1807 const char *filename(FILE *fp)
1809 size_t i;
1811 for (i = 0; i < nfiles; i++)
1812 if (fp == files[i].fp)
1813 return files[i].fname;
1814 return "???";
1817 Cell *closefile(Node **a, int n)
1819 Cell *x;
1820 size_t i;
1821 int stat;
1823 n = n;
1824 x = execute(a[0]);
1825 getsval(x);
1826 stat = -1;
1827 for (i = 0; i < nfiles; i++) {
1828 if (files[i].fname && strcmp(x->sval, files[i].fname) == 0) {
1829 if (ferror(files[i].fp))
1830 WARNING( "i/o error occurred on %s", files[i].fname );
1831 if (files[i].mode == '|' || files[i].mode == LE)
1832 stat = pclose(files[i].fp) == -1;
1833 else
1834 stat = fclose(files[i].fp) == EOF;
1835 if (stat) {
1836 stat = -1;
1837 WARNING( "i/o error occurred closing %s",
1838 files[i].fname );
1840 if (i > 2) /* don't do /dev/std... */
1841 free(__UNCONST(files[i].fname));
1842 files[i].fname = NULL; /* watch out for ref thru this */
1843 files[i].fp = NULL;
1846 tempfree(x);
1847 x = gettemp();
1848 setfval(x, (Awkfloat) stat);
1849 return(x);
1852 void closeall(void)
1854 size_t i;
1855 int stat;
1857 for (i = 0; i < nfiles; i++) {
1858 if (files[i].fp) {
1859 if (ferror(files[i].fp))
1860 WARNING( "i/o error occurred on %s", files[i].fname );
1861 if (i == 0)
1862 stat = fpurge(files[i].fp) == EOF;
1863 else if (i <= 2)
1864 stat = fflush(files[i].fp) == EOF;
1865 else if (files[i].mode == '|' || files[i].mode == LE)
1866 stat = pclose(files[i].fp) == -1;
1867 else
1868 stat = fclose(files[i].fp) == EOF;
1869 if (stat)
1870 WARNING( "i/o error occurred while closing %s", files[i].fname );
1875 void flush_all(void)
1877 size_t i;
1879 for (i = 0; i < nfiles; i++)
1880 if (files[i].fp)
1881 fflush(files[i].fp);
1884 void backsub(uschar **pb_ptr, const uschar **sptr_ptr);
1886 Cell *sub(Node **a, int nnn) /* substitute command */
1888 const uschar *sptr;
1889 uschar *q;
1890 Cell *x, *y, *result;
1891 uschar *t, *buf, *pb;
1892 fa *pfa;
1893 int bufsz = recsize;
1895 if ((buf = malloc(bufsz)) == NULL)
1896 FATAL("out of memory in sub");
1897 x = execute(a[3]); /* target string */
1898 t = getsval(x);
1899 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
1900 pfa = (fa *) a[1]; /* regular expression */
1901 else {
1902 y = execute(a[1]);
1903 pfa = makedfa(getsval(y), 1);
1904 tempfree(y);
1906 y = execute(a[2]); /* replacement string */
1907 result = False;
1908 if (pmatch(pfa, t)) {
1909 sptr = t;
1910 adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub");
1911 pb = buf;
1912 while (sptr < patbeg)
1913 *pb++ = *sptr++;
1914 sptr = getsval(y);
1915 while (*sptr != 0) {
1916 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub");
1917 if (*sptr == '\\') {
1918 backsub(&pb, &sptr);
1919 } else if (*sptr == '&') {
1920 sptr++;
1921 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub");
1922 for (q = patbeg; q < patbeg+patlen; )
1923 *pb++ = *q++;
1924 } else
1925 *pb++ = *sptr++;
1927 *pb = '\0';
1928 if (pb > buf + bufsz)
1929 FATAL("sub result1 %.30s too big; can't happen", buf);
1930 sptr = patbeg + patlen;
1931 if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) {
1932 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub");
1933 while ((*pb++ = *sptr++) != 0)
1936 if (pb > buf + bufsz)
1937 FATAL("sub result2 %.30s too big; can't happen", buf);
1938 setsval(x, buf); /* BUG: should be able to avoid copy */
1939 result = True;;
1941 tempfree(x);
1942 tempfree(y);
1943 free(buf);
1944 return result;
1947 Cell *gsub(Node **a, int nnn) /* global substitute */
1949 Cell *x, *y;
1950 const char *rptr;
1951 const uschar *sptr;
1952 uschar *t, *q;
1953 uschar *pb, *buf;
1954 fa *pfa;
1955 int mflag, tempstat, num;
1956 int bufsz = recsize;
1958 if ((buf = malloc(bufsz)) == NULL)
1959 FATAL("out of memory in gsub");
1960 mflag = 0; /* if mflag == 0, can replace empty string */
1961 num = 0;
1962 x = execute(a[3]); /* target string */
1963 t = getsval(x);
1964 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
1965 pfa = (fa *) a[1]; /* regular expression */
1966 else {
1967 y = execute(a[1]);
1968 pfa = makedfa(getsval(y), 1);
1969 tempfree(y);
1971 y = execute(a[2]); /* replacement string */
1972 if (pmatch(pfa, t)) {
1973 tempstat = pfa->initstat;
1974 pfa->initstat = 2;
1975 pb = buf;
1976 rptr = getsval(y);
1977 do {
1978 if (patlen == 0 && *patbeg != 0) { /* matched empty string */
1979 if (mflag == 0) { /* can replace empty */
1980 num++;
1981 sptr = rptr;
1982 while (*sptr != 0) {
1983 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
1984 if (*sptr == '\\') {
1985 backsub(&pb, &sptr);
1986 } else if (*sptr == '&') {
1987 sptr++;
1988 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
1989 for (q = patbeg; q < patbeg+patlen; )
1990 *pb++ = *q++;
1991 } else
1992 *pb++ = *sptr++;
1995 if (*t == 0) /* at end */
1996 goto done;
1997 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub");
1998 *pb++ = *t++;
1999 if (pb > buf + bufsz) /* BUG: not sure of this test */
2000 FATAL("gsub result0 %.30s too big; can't happen", buf);
2001 mflag = 0;
2003 else { /* matched nonempty string */
2004 num++;
2005 sptr = t;
2006 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub");
2007 while (sptr < patbeg)
2008 *pb++ = *sptr++;
2009 sptr = rptr;
2010 while (*sptr != 0) {
2011 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
2012 if (*sptr == '\\') {
2013 backsub(&pb, &sptr);
2014 } else if (*sptr == '&') {
2015 sptr++;
2016 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
2017 for (q = patbeg; q < patbeg+patlen; )
2018 *pb++ = *q++;
2019 } else
2020 *pb++ = *sptr++;
2022 t = patbeg + patlen;
2023 if (patlen == 0 || *t == 0 || *(t-1) == 0)
2024 goto done;
2025 if (pb > buf + bufsz)
2026 FATAL("gsub result1 %.30s too big; can't happen", buf);
2027 mflag = 1;
2029 } while (pmatch(pfa,t));
2030 sptr = t;
2031 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub");
2032 while ((*pb++ = *sptr++) != 0)
2034 done: if (pb < buf + bufsz)
2035 *pb = '\0';
2036 else if (*(pb-1) != '\0')
2037 FATAL("gsub result2 %.30s truncated; can't happen", buf);
2038 setsval(x, buf); /* BUG: should be able to avoid copy + free */
2039 pfa->initstat = tempstat;
2041 tempfree(x);
2042 tempfree(y);
2043 x = gettemp();
2044 x->tval = NUM;
2045 x->fval = num;
2046 free(buf);
2047 return(x);
2050 Cell *gensub(Node **a, int nnn) /* global selective substitute */
2051 /* XXX incomplete - doesn't support backreferences \0 ... \9 */
2053 Cell *x, *y, *res, *h;
2054 char *rptr;
2055 const uschar *sptr;
2056 uschar *q, *pb, *t, *buf;
2057 fa *pfa;
2058 int mflag, tempstat, num, whichm;
2059 int bufsz = recsize;
2061 if ((buf = malloc(bufsz)) == NULL)
2062 FATAL("out of memory in gensub");
2063 mflag = 0; /* if mflag == 0, can replace empty string */
2064 num = 0;
2065 x = execute(a[4]); /* source string */
2066 t = getsval(x);
2067 res = copycell(x); /* target string - initially copy of source */
2068 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
2069 pfa = (fa *) a[1]; /* regular expression */
2070 else {
2071 y = execute(a[1]);
2072 pfa = makedfa(getsval(y), 1);
2073 tempfree(y);
2075 y = execute(a[2]); /* replacement string */
2076 h = execute(a[3]); /* which matches should be replaced */
2077 sptr = getsval(h);
2078 if (sptr[0] == 'g' || sptr[0] == 'G')
2079 whichm = -1;
2080 else {
2082 * The specified number is index of replacement, starting
2083 * from 1. GNU awk treats index lower than 0 same as
2084 * 1, we do same for compatibility.
2086 whichm = (int) getfval(h) - 1;
2087 if (whichm < 0)
2088 whichm = 0;
2090 tempfree(h);
2092 if (pmatch(pfa, t)) {
2093 char *sl;
2095 tempstat = pfa->initstat;
2096 pfa->initstat = 2;
2097 pb = buf;
2098 rptr = getsval(y);
2100 * XXX if there are any backreferences in subst string,
2101 * complain now.
2103 for(sl=rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
2104 if (strchr("0123456789", sl[1])) {
2105 FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
2109 do {
2110 if (whichm >= 0 && whichm != num) {
2111 num++;
2112 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
2114 /* copy the part of string up to and including
2115 * match to output buffer */
2116 while (t < patbeg + patlen)
2117 *pb++ = *t++;
2118 continue;
2121 if (patlen == 0 && *patbeg != 0) { /* matched empty string */
2122 if (mflag == 0) { /* can replace empty */
2123 num++;
2124 sptr = rptr;
2125 while (*sptr != 0) {
2126 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2127 if (*sptr == '\\') {
2128 backsub(&pb, &sptr);
2129 } else if (*sptr == '&') {
2130 sptr++;
2131 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2132 for (q = patbeg; q < patbeg+patlen; )
2133 *pb++ = *q++;
2134 } else
2135 *pb++ = *sptr++;
2138 if (*t == 0) /* at end */
2139 goto done;
2140 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
2141 *pb++ = *t++;
2142 if (pb > buf + bufsz) /* BUG: not sure of this test */
2143 FATAL("gensub result0 %.30s too big; can't happen", buf);
2144 mflag = 0;
2146 else { /* matched nonempty string */
2147 num++;
2148 sptr = t;
2149 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
2150 while (sptr < patbeg)
2151 *pb++ = *sptr++;
2152 sptr = rptr;
2153 while (*sptr != 0) {
2154 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2155 if (*sptr == '\\') {
2156 backsub(&pb, &sptr);
2157 } else if (*sptr == '&') {
2158 sptr++;
2159 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2160 for (q = patbeg; q < patbeg+patlen; )
2161 *pb++ = *q++;
2162 } else
2163 *pb++ = *sptr++;
2165 t = patbeg + patlen;
2166 if (patlen == 0 || *t == 0 || *(t-1) == 0)
2167 goto done;
2168 if (pb > buf + bufsz)
2169 FATAL("gensub result1 %.30s too big; can't happen", buf);
2170 mflag = 1;
2172 } while (pmatch(pfa,t));
2173 sptr = t;
2174 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
2175 while ((*pb++ = *sptr++) != 0)
2177 done: if (pb > buf + bufsz)
2178 FATAL("gensub result2 %.30s too big; can't happen", buf);
2179 *pb = '\0';
2180 setsval(res, buf);
2181 pfa->initstat = tempstat;
2183 tempfree(x);
2184 tempfree(y);
2185 free(buf);
2186 return(res);
2189 void backsub(uschar **pb_ptr, const uschar **sptr_ptr)/* handle \\& variations */
2190 { /* sptr[0] == '\\' */
2191 uschar *pb = *pb_ptr;
2192 const uschar *sptr = *sptr_ptr;
2194 if (sptr[1] == '\\') {
2195 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
2196 *pb++ = '\\';
2197 *pb++ = '&';
2198 sptr += 4;
2199 } else if (sptr[2] == '&') { /* \\& -> \ + matched */
2200 *pb++ = '\\';
2201 sptr += 2;
2202 } else { /* \\x -> \\x */
2203 *pb++ = *sptr++;
2204 *pb++ = *sptr++;
2206 } else if (sptr[1] == '&') { /* literal & */
2207 sptr++;
2208 *pb++ = *sptr++;
2209 } else /* literal \ */
2210 *pb++ = *sptr++;
2212 *pb_ptr = pb;
2213 *sptr_ptr = sptr;