etc/services - sync with NetBSD-8
[minix.git] / external / historical / nawk / dist / run.c
blob56bf049a7d33589188c7d22acf004f4d6b22bfd6
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
25 #if HAVE_NBTOOL_CONFIG_H
26 #include "nbtool_config.h"
27 #endif
29 #define DEBUG
30 #include <stdio.h>
31 #include <ctype.h>
32 #include <wchar.h>
33 #include <wctype.h>
34 #include <setjmp.h>
35 #include <limits.h>
36 #include <math.h>
37 #include <string.h>
38 #include <stdlib.h>
39 #include <time.h>
40 #include <stdint.h>
41 #include "awk.h"
42 #include "awkgram.h"
44 #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
46 void stdinit(void);
49 #undef tempfree
51 void tempfree(Cell *p) {
52 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
53 WARNING("bad csub %d in Cell %d %s",
54 p->csub, p->ctype, p->sval);
56 if (istemp(p))
57 tfree(p);
61 /* do we really need these? */
62 /* #ifdef _NFILE */
63 /* #ifndef FOPEN_MAX */
64 /* #define FOPEN_MAX _NFILE */
65 /* #endif */
66 /* #endif */
67 /* */
68 /* #ifndef FOPEN_MAX */
69 /* #define FOPEN_MAX 40 */ /* max number of open files */
70 /* #endif */
71 /* */
72 /* #ifndef RAND_MAX */
73 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */
74 /* #endif */
76 jmp_buf env;
77 extern int pairstack[];
78 extern unsigned int srand_seed;
80 Node *winner = NULL; /* root of parse tree */
81 Cell *tmps; /* free temporary cells for execution */
83 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL };
84 Cell *True = &truecell;
85 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL };
86 Cell *False = &falsecell;
87 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL };
88 Cell *jbreak = &breakcell;
89 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL };
90 Cell *jcont = &contcell;
91 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL };
92 Cell *jnext = &nextcell;
93 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL};
94 Cell *jnextfile = &nextfilecell;
95 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL };
96 Cell *jexit = &exitcell;
97 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL };
98 Cell *jret = &retcell;
99 static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL };
101 Node *curnode = NULL; /* the node being executed, for debugging */
103 /* buffer memory management */
104 int adjbuf(uschar **pbuf, int *psiz, int minlen, int quantum, uschar **pbptr,
105 const char *whatrtn)
106 /* pbuf: address of pointer to buffer being managed
107 * psiz: address of buffer size variable
108 * minlen: minimum length of buffer needed
109 * quantum: buffer size quantum
110 * pbptr: address of movable pointer into buffer, or 0 if none
111 * whatrtn: name of the calling routine if failure should cause fatal error
113 * return 0 for realloc failure, !=0 for success
116 if (minlen > *psiz) {
117 char *tbuf;
118 int rminlen = quantum ? minlen % quantum : 0;
119 int boff = pbptr ? *pbptr - *pbuf : 0;
120 /* round up to next multiple of quantum */
121 if (rminlen)
122 minlen += quantum - rminlen;
123 tbuf = realloc(*pbuf, minlen);
124 dprintf( ("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, *pbuf, tbuf) );
125 if (tbuf == NULL) {
126 if (whatrtn)
127 FATAL("out of memory in %s", whatrtn);
128 return 0;
130 *pbuf = tbuf;
131 *psiz = minlen;
132 if (pbptr)
133 *pbptr = tbuf + boff;
135 return 1;
138 void run(Node *a) /* execution of parse tree starts here */
140 stdinit();
141 execute(a);
142 closeall();
145 Cell *execute(Node *u) /* execute a node of the parse tree */
147 Cell *(*proc)(Node **, int);
148 Cell *x;
149 Node *a;
151 if (u == NULL)
152 return(True);
153 for (a = u; ; a = a->nnext) {
154 curnode = a;
155 if (isvalue(a)) {
156 x = (Cell *) (a->narg[0]);
157 if (isfld(x) && !donefld)
158 fldbld();
159 else if (isrec(x) && !donerec)
160 recbld();
161 return(x);
163 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */
164 FATAL("illegal statement");
165 proc = proctab[a->nobj-FIRSTTOKEN];
166 x = (*proc)(a->narg, a->nobj);
167 if (isfld(x) && !donefld)
168 fldbld();
169 else if (isrec(x) && !donerec)
170 recbld();
171 if (isexpr(a))
172 return(x);
173 if (isjump(x))
174 return(x);
175 if (a->nnext == NULL)
176 return(x);
177 tempfree(x);
182 Cell *program(Node **a, int n) /* execute an awk program */
183 { /* a[0] = BEGIN, a[1] = body, a[2] = END */
184 Cell *x;
186 if (setjmp(env) != 0)
187 goto ex;
188 if (a[0]) { /* BEGIN */
189 x = execute(a[0]);
190 if (isexit(x))
191 return(True);
192 if (isjump(x))
193 FATAL("illegal break, continue, next or nextfile from BEGIN");
194 tempfree(x);
196 if (a[1] || a[2])
197 while (getrec(&record, &recsize, 1) > 0) {
198 x = execute(a[1]);
199 if (isexit(x))
200 break;
201 tempfree(x);
204 if (setjmp(env) != 0) /* handles exit within END */
205 goto ex1;
206 if (a[2]) { /* END */
207 x = execute(a[2]);
208 if (isbreak(x) || isnext(x) || iscont(x))
209 FATAL("illegal break, continue, next or nextfile from END");
210 tempfree(x);
212 ex1:
213 return(True);
216 struct Frame { /* stack frame for awk function calls */
217 int nargs; /* number of arguments in this call */
218 Cell *fcncell; /* pointer to Cell for function */
219 Cell **args; /* pointer to array of arguments after execute */
220 Cell *retval; /* return value */
223 #define NARGS 50 /* max args in a call */
225 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */
226 int nframe = 0; /* number of frames allocated */
227 struct Frame *frp = NULL; /* frame pointer. bottom level unused */
229 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
231 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL };
232 int i, ncall, ndef;
233 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
234 Node *x;
235 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */
236 Cell *y, *z, *fcn;
237 char *s;
239 fcn = execute(a[0]); /* the function itself */
240 s = fcn->nval;
241 if (!isfcn(fcn))
242 FATAL("calling undefined function %s", s);
243 if (frame == NULL) {
244 frp = frame = calloc(nframe += 100, sizeof(*frp));
245 if (frame == NULL)
246 FATAL("out of space for stack frames calling %s", s);
248 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */
249 ncall++;
250 ndef = (int) fcn->fval; /* args in defn */
251 dprintf( ("calling %s, %d args (%d in defn), fp=%d\n", s, ncall, ndef, (int) (frp-frame)) );
252 if (ncall > ndef)
253 WARNING("function %s called with %d args, uses only %d",
254 s, ncall, ndef);
255 if (ncall + ndef > NARGS)
256 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
257 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */
258 dprintf( ("evaluate args[%d], fp=%d:\n", i, (int) (frp-frame)) );
259 y = execute(x);
260 oargs[i] = y;
261 dprintf( ("args[%d]: %s %f <%s>, t=%o\n",
262 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval) );
263 if (isfcn(y))
264 FATAL("can't use function %s as argument in %s", y->nval, s);
265 if (isarr(y))
266 args[i] = y; /* arrays by ref */
267 else
268 args[i] = copycell(y);
269 tempfree(y);
271 for ( ; i < ndef; i++) { /* add null args for ones not provided */
272 args[i] = gettemp();
273 *args[i] = newcopycell;
275 frp++; /* now ok to up frame */
276 if (frp >= frame + nframe) {
277 int dfp = frp - frame; /* old index */
278 frame = realloc(frame, (nframe += 100) * sizeof(*frame));
279 if (frame == NULL)
280 FATAL("out of space for stack frames in %s", s);
281 frp = frame + dfp;
283 frp->fcncell = fcn;
284 frp->args = args;
285 frp->nargs = ndef; /* number defined with (excess are locals) */
286 frp->retval = gettemp();
288 dprintf( ("start exec of %s, fp=%d\n", s, (int) (frp-frame)) );
289 y = execute((Node *)(fcn->sval)); /* execute body */
290 dprintf( ("finished exec of %s, fp=%d\n", s, (int) (frp-frame)) );
292 for (i = 0; i < ndef; i++) {
293 Cell *t = frp->args[i];
294 if (isarr(t)) {
295 if (t->csub == CCOPY) {
296 if (i >= ncall) {
297 freesymtab(t);
298 t->csub = CTEMP;
299 tempfree(t);
300 } else {
301 oargs[i]->tval = t->tval;
302 oargs[i]->tval &= ~(STR|NUM|DONTFREE);
303 oargs[i]->sval = t->sval;
304 tempfree(t);
307 } else if (t != y) { /* kludge to prevent freeing twice */
308 t->csub = CTEMP;
309 tempfree(t);
310 } else if (t == y && t->csub == CCOPY) {
311 t->csub = CTEMP;
312 tempfree(t);
313 freed = 1;
316 tempfree(fcn);
317 if (isexit(y) || isnext(y))
318 return y;
319 if (freed == 0) {
320 tempfree(y); /* don't free twice! */
322 z = frp->retval; /* return value */
323 dprintf( ("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval) );
324 frp--;
325 return(z);
328 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */
330 Cell *y;
332 /* copy is not constant or field */
334 y = gettemp();
335 y->tval = x->tval & ~(CON|FLD|REC);
336 y->csub = CCOPY; /* prevents freeing until call is over */
337 y->nval = x->nval; /* BUG? */
338 if (isstr(x) /* || x->ctype == OCELL */) {
339 y->sval = tostring(x->sval);
340 y->tval &= ~DONTFREE;
341 } else
342 y->tval |= DONTFREE;
343 y->fval = x->fval;
344 return y;
347 Cell *arg(Node **a, int n) /* nth argument of a function */
350 n = ptoi(a[0]); /* argument number, counting from 0 */
351 dprintf( ("arg(%d), fp->nargs=%d\n", n, frp->nargs) );
352 if (n+1 > frp->nargs)
353 FATAL("argument #%d of function %s was not supplied",
354 n+1, frp->fcncell->nval);
355 return frp->args[n];
358 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */
360 Cell *y;
362 switch (n) {
363 case EXIT:
364 if (a[0] != NULL) {
365 y = execute(a[0]);
366 errorflag = (int) getfval(y);
367 tempfree(y);
369 longjmp(env, 1);
370 case RETURN:
371 if (a[0] != NULL) {
372 y = execute(a[0]);
373 if ((y->tval & (STR|NUM)) == (STR|NUM)) {
374 setsval(frp->retval, getsval(y));
375 frp->retval->fval = getfval(y);
376 frp->retval->tval |= NUM;
378 else if (y->tval & STR)
379 setsval(frp->retval, getsval(y));
380 else if (y->tval & NUM)
381 setfval(frp->retval, getfval(y));
382 else /* can't happen */
383 FATAL("bad type variable %d", y->tval);
384 tempfree(y);
386 return(jret);
387 case NEXT:
388 return(jnext);
389 case NEXTFILE:
390 nextfile();
391 return(jnextfile);
392 case BREAK:
393 return(jbreak);
394 case CONTINUE:
395 return(jcont);
396 default: /* can't happen */
397 FATAL("illegal jump type %d", n);
399 return 0; /* not reached */
402 Cell *awkgetline(Node **a, int n) /* get next line from specific input */
403 { /* a[0] is variable, a[1] is operator, a[2] is filename */
404 Cell *r, *x;
405 extern Cell **fldtab;
406 FILE *fp;
407 uschar *buf;
408 int bufsize = recsize;
409 int mode, newflag;
411 if ((buf = malloc(bufsize)) == NULL)
412 FATAL("out of memory in getline");
414 fflush(stdout); /* in case someone is waiting for a prompt */
415 r = gettemp();
416 if (a[1] != NULL) { /* getline < file */
417 x = execute(a[2]); /* filename */
418 mode = ptoi(a[1]);
419 if (mode == '|') /* input pipe */
420 mode = LE; /* arbitrary flag */
421 fp = openfile(mode, getsval(x), &newflag);
422 tempfree(x);
423 if (fp == NULL)
424 n = -1;
425 else
426 n = readrec(&buf, &bufsize, fp, newflag);
427 if (n <= 0) {
429 } else if (a[0] != NULL) { /* getline var <file */
430 x = execute(a[0]);
431 setsval(x, buf);
432 tempfree(x);
433 } else { /* getline <file */
434 setsval(fldtab[0], buf);
435 if (is_number(fldtab[0]->sval)) {
436 fldtab[0]->fval = atof(fldtab[0]->sval);
437 fldtab[0]->tval |= NUM;
440 } else { /* bare getline; use current input */
441 if (a[0] == NULL) /* getline */
442 n = getrec(&record, &recsize, 1);
443 else { /* getline var */
444 n = getrec(&buf, &bufsize, 0);
445 x = execute(a[0]);
446 setsval(x, buf);
447 tempfree(x);
450 setfval(r, (Awkfloat) n);
451 free(buf);
452 return r;
455 Cell *getnf(Node **a, int n) /* get NF */
457 if (donefld == 0)
458 fldbld();
459 return (Cell *) a[0];
462 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
464 Cell *x, *y, *z;
465 char *s;
466 Node *np;
467 uschar *buf;
468 int bufsz = recsize;
469 int nsub = strlen(*SUBSEP);
471 if ((buf = malloc(bufsz)) == NULL)
472 FATAL("out of memory in array");
474 x = execute(a[0]); /* Cell* for symbol table */
475 buf[0] = 0;
476 for (np = a[1]; np; np = np->nnext) {
477 y = execute(np); /* subscript */
478 s = getsval(y);
479 if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "array"))
480 FATAL("out of memory for %s[%s...]", x->nval, buf);
481 strlcat(buf, s, bufsz);
482 if (np->nnext)
483 strlcat(buf, *SUBSEP, bufsz);
484 tempfree(y);
486 if (!isarr(x)) {
487 dprintf( ("making %s into an array\n", NN(x->nval)) );
488 if (freeable(x))
489 xfree(x->sval);
490 x->tval &= ~(STR|NUM|DONTFREE);
491 x->tval |= ARR;
492 x->sval = (char *) makesymtab(NSYMTAB);
494 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
495 z->ctype = OCELL;
496 z->csub = CVAR;
497 tempfree(x);
498 free(buf);
499 return(z);
502 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
504 Cell *x, *y;
505 Node *np;
506 uschar *s;
507 int nsub = strlen(*SUBSEP);
509 x = execute(a[0]); /* Cell* for symbol table */
510 if (!isarr(x))
511 return True;
512 if (a[1] == 0) { /* delete the elements, not the table */
513 freesymtab(x);
514 x->tval &= ~STR;
515 x->tval |= ARR;
516 x->sval = (char *) makesymtab(NSYMTAB);
517 } else {
518 int bufsz = recsize;
519 uschar *buf;
520 if ((buf = malloc(bufsz)) == NULL)
521 FATAL("out of memory in adelete");
522 buf[0] = 0;
523 for (np = a[1]; np; np = np->nnext) {
524 y = execute(np); /* subscript */
525 s = getsval(y);
526 if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "awkdelete"))
527 FATAL("out of memory deleting %s[%s...]", x->nval, buf);
528 strlcat(buf, s, bufsz);
529 if (np->nnext)
530 strlcat(buf, *SUBSEP, bufsz);
531 tempfree(y);
533 freeelem(x, buf);
534 free(buf);
536 tempfree(x);
537 return True;
540 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
542 Cell *x, *ap, *k;
543 Node *p;
544 uschar *buf;
545 char *s;
546 int bufsz = recsize;
547 int nsub = strlen(*SUBSEP);
549 ap = execute(a[1]); /* array name */
550 if (!isarr(ap)) {
551 dprintf( ("making %s into an array\n", ap->nval) );
552 if (freeable(ap))
553 xfree(ap->sval);
554 ap->tval &= ~(STR|NUM|DONTFREE);
555 ap->tval |= ARR;
556 ap->sval = (char *) makesymtab(NSYMTAB);
558 if ((buf = malloc(bufsz)) == NULL) {
559 FATAL("out of memory in intest");
561 buf[0] = 0;
562 for (p = a[0]; p; p = p->nnext) {
563 x = execute(p); /* expr */
564 s = getsval(x);
565 if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "intest"))
566 FATAL("out of memory deleting %s[%s...]", x->nval, buf);
567 strcat(buf, s);
568 tempfree(x);
569 if (p->nnext)
570 strcat(buf, *SUBSEP);
572 k = lookup(buf, (Array *) ap->sval);
573 tempfree(ap);
574 free(buf);
575 if (k == NULL)
576 return(False);
577 else
578 return(True);
582 Cell *matchop(Node **a, int n) /* ~ and match() */
584 Cell *x, *y;
585 uschar *s;
586 char *t;
587 int i;
588 fa *pfa;
589 int (*mf)(fa *, const char *) = match, mode = 0;
591 if (n == MATCHFCN) {
592 mf = pmatch;
593 mode = 1;
595 x = execute(a[1]); /* a[1] = target text */
596 s = getsval(x);
597 if (a[0] == 0) /* a[1] == 0: already-compiled reg expr */
598 i = (*mf)((fa *) a[2], s);
599 else {
600 y = execute(a[2]); /* a[2] = regular expr */
601 t = getsval(y);
602 pfa = makedfa(t, mode);
603 i = (*mf)(pfa, s);
604 tempfree(y);
606 tempfree(x);
607 if (n == MATCHFCN) {
608 int start = patbeg - s + 1;
609 if (patlen < 0)
610 start = 0;
611 setfval(rstartloc, (Awkfloat) start);
612 setfval(rlengthloc, (Awkfloat) patlen);
613 x = gettemp();
614 x->tval = NUM;
615 x->fval = start;
616 return x;
617 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
618 return(True);
619 else
620 return(False);
624 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */
626 Cell *x, *y;
627 int i;
629 x = execute(a[0]);
630 i = istrue(x);
631 tempfree(x);
632 switch (n) {
633 case BOR:
634 if (i) return(True);
635 y = execute(a[1]);
636 i = istrue(y);
637 tempfree(y);
638 if (i) return(True);
639 else return(False);
640 case AND:
641 if ( !i ) return(False);
642 y = execute(a[1]);
643 i = istrue(y);
644 tempfree(y);
645 if (i) return(True);
646 else return(False);
647 case NOT:
648 if (i) return(False);
649 else return(True);
650 default: /* can't happen */
651 FATAL("unknown boolean operator %d", n);
653 return 0; /*NOTREACHED*/
656 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */
658 int i;
659 Cell *x, *y;
660 Awkfloat j;
662 x = execute(a[0]);
663 y = execute(a[1]);
664 if (x->tval&NUM && y->tval&NUM) {
665 j = x->fval - y->fval;
666 i = j<0? -1: (j>0? 1: 0);
667 } else {
668 i = strcmp(getsval(x), getsval(y));
670 tempfree(x);
671 tempfree(y);
672 switch (n) {
673 case LT: if (i<0) return(True);
674 else return(False);
675 case LE: if (i<=0) return(True);
676 else return(False);
677 case NE: if (i!=0) return(True);
678 else return(False);
679 case EQ: if (i == 0) return(True);
680 else return(False);
681 case GE: if (i>=0) return(True);
682 else return(False);
683 case GT: if (i>0) return(True);
684 else return(False);
685 default: /* can't happen */
686 FATAL("unknown relational operator %d", n);
688 return 0; /*NOTREACHED*/
691 void tfree(Cell *a) /* free a tempcell */
693 if (freeable(a)) {
694 dprintf( ("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval) );
695 xfree(a->sval);
697 if (a == tmps)
698 FATAL("tempcell list is curdled");
699 a->cnext = tmps;
700 tmps = a;
703 Cell *gettemp(void) /* get a tempcell */
704 { int i;
705 Cell *x;
707 if (!tmps) {
708 tmps = calloc(100, sizeof(*tmps));
709 if (!tmps)
710 FATAL("out of space for temporaries");
711 for(i = 1; i < 100; i++)
712 tmps[i-1].cnext = &tmps[i];
713 tmps[i-1].cnext = 0;
715 x = tmps;
716 tmps = x->cnext;
717 *x = tempcell;
718 return(x);
721 Cell *indirect(Node **a, int n) /* $( a[0] ) */
723 Awkfloat val;
724 Cell *x;
725 int m;
726 char *s;
728 x = execute(a[0]);
729 val = getfval(x); /* freebsd: defend against super large field numbers */
730 if ((Awkfloat)INT_MAX < val)
731 FATAL("trying to access out of range field %s", x->nval);
732 m = (int) val;
733 if (m == 0 && !is_number(s = getsval(x))) /* suspicion! */
734 FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
735 /* BUG: can x->nval ever be null??? */
736 tempfree(x);
737 x = fieldadr(m);
738 x->ctype = OCELL; /* BUG? why are these needed? */
739 x->csub = CFLD;
740 return(x);
743 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
745 int k, m, n;
746 char *s;
747 int temp;
748 Cell *x, *y, *z = 0;
750 x = execute(a[0]);
751 y = execute(a[1]);
752 if (a[2] != 0)
753 z = execute(a[2]);
754 s = getsval(x);
755 k = strlen(s) + 1;
756 if (k <= 1) {
757 tempfree(x);
758 tempfree(y);
759 if (a[2] != 0) {
760 tempfree(z);
762 x = gettemp();
763 setsval(x, "");
764 return(x);
766 m = (int) getfval(y);
767 if (m <= 0)
768 m = 1;
769 else if (m > k)
770 m = k;
771 tempfree(y);
772 if (a[2] != 0) {
773 n = (int) getfval(z);
774 tempfree(z);
775 } else
776 n = k - 1;
777 if (n < 0)
778 n = 0;
779 else if (n > k - m)
780 n = k - m;
781 dprintf( ("substr: m=%d, n=%d, s=%s\n", m, n, s) );
782 y = gettemp();
783 temp = s[n+m-1]; /* with thanks to John Linderman */
784 s[n+m-1] = '\0';
785 setsval(y, s + m - 1);
786 s[n+m-1] = temp;
787 tempfree(x);
788 return(y);
791 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */
793 Cell *x, *y, *z;
794 char *s1, *s2, *p1, *p2, *q;
795 Awkfloat v = 0.0;
797 x = execute(a[0]);
798 s1 = getsval(x);
799 y = execute(a[1]);
800 s2 = getsval(y);
802 z = gettemp();
803 for (p1 = s1; *p1 != '\0'; p1++) {
804 for (q=p1, p2=s2; *p2 != '\0' && *q == *p2; q++, p2++)
806 if (*p2 == '\0') {
807 v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */
808 break;
811 tempfree(x);
812 tempfree(y);
813 setfval(z, v);
814 return(z);
817 #define MAXNUMSIZE 50
819 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */
821 uschar *fmt, *p, *t;
822 const char *os;
823 Cell *x;
824 int flag = 0, n;
825 int fmtwd; /* format width */
826 int fmtsz = recsize;
827 uschar *buf = *pbuf;
828 int bufsize = *pbufsize;
829 #define FMTSZ(a) (fmtsz - ((a) - fmt))
830 #define BUFSZ(a) (bufsize - ((a) - buf))
832 os = s;
833 p = buf;
834 if ((fmt = malloc(fmtsz)) == NULL)
835 FATAL("out of memory in format()");
836 while (*s) {
837 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1");
838 if (*s != '%') {
839 *p++ = *s++;
840 continue;
842 if (*(s+1) == '%') {
843 *p++ = '%';
844 s += 2;
845 continue;
847 /* have to be real careful in case this is a huge number, eg, %100000d */
848 fmtwd = atoi(s+1);
849 if (fmtwd < 0)
850 fmtwd = -fmtwd;
851 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
852 for (t = fmt; (*t++ = *s) != '\0'; s++) {
853 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
854 FATAL("format item %.30s... ran format() out of memory", os);
855 if (*s == 'l' || *s == 'h' || *s == 'L')
856 goto weird;
857 if (isalpha((uschar)*s))
858 break; /* the ansi panoply */
859 if (*s == '*') {
860 if (a == NULL)
861 FATAL("not enough args in printf("
862 "\"%.30s\")", os);
863 x = execute(a);
864 a = a->nnext;
865 snprintf(t - 1, FMTSZ(t - 1),
866 "%d", fmtwd=(int) getfval(x));
867 if (fmtwd < 0)
868 fmtwd = -fmtwd;
869 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
870 t = fmt + strlen(fmt);
871 tempfree(x);
874 *t = '\0';
875 if (fmtwd < 0)
876 fmtwd = -fmtwd;
877 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
879 switch (*s) {
880 case 'f': case 'e': case 'g': case 'E': case 'G':
881 flag = 'f';
882 break;
883 case 'd': case 'i':
884 flag = 'd';
885 if(*(s-1) == 'l') break;
886 *(t-1) = 'j';
887 *t = 'd';
888 *++t = '\0';
889 break;
890 case 'o': case 'x': case 'X': case 'u':
891 flag = *(s-1) == 'l' ? 'd' : 'u';
892 *(t-1) = 'j';
893 *t = *s;
894 *++t = '\0';
895 break;
896 case 's':
897 flag = 's';
898 break;
899 case 'c':
900 flag = 'c';
901 break;
902 default:
903 weird:
904 WARNING("weird printf conversion %s", fmt);
905 flag = '?';
906 break;
908 if (a == NULL)
909 FATAL("not enough args in printf(%s)", os);
910 x = execute(a);
911 a = a->nnext;
912 n = MAXNUMSIZE;
913 if (fmtwd > n)
914 n = fmtwd;
915 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
916 switch (flag) {
917 case '?': snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */
918 t = getsval(x);
919 n = strlen(t);
920 if (fmtwd > n)
921 n = fmtwd;
922 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
923 p += strlen(p);
924 snprintf(p, BUFSZ(p), "%s", t);
925 break;
926 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
927 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
928 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
929 case 's':
930 t = getsval(x);
931 n = strlen(t);
932 if (fmtwd > n)
933 n = fmtwd;
934 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
935 FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t);
936 snprintf(p, BUFSZ(p), fmt, t);
937 break;
938 case 'c':
939 if (isnum(x)) {
940 if (getfval(x))
941 snprintf(p, BUFSZ(p), fmt, (int) getfval(x));
942 else {
943 *p++ = '\0'; /* explicit null byte */
944 *p = '\0'; /* next output will start here */
946 } else
947 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
948 break;
949 default:
950 FATAL("can't happen: bad conversion %c in format()", flag);
952 tempfree(x);
953 p += strlen(p);
954 s++;
956 *p = '\0';
957 free(fmt);
958 for ( ; a; a = a->nnext) /* evaluate any remaining args */
959 execute(a);
960 *pbuf = buf;
961 *pbufsize = bufsize;
962 return p - buf;
965 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */
967 Cell *x;
968 Node *y;
969 char *buf;
970 int bufsz=3*recsize;
972 if ((buf = malloc(bufsz)) == NULL)
973 FATAL("out of memory in awksprintf");
974 y = a[0]->nnext;
975 x = execute(a[0]);
976 if (format(&buf, &bufsz, getsval(x), y) == -1)
977 FATAL("sprintf string %.30s... too long. can't happen.", buf);
978 tempfree(x);
979 x = gettemp();
980 x->sval = buf;
981 x->tval = STR;
982 return(x);
985 Cell *awkprintf(Node **a, int n) /* printf */
986 { /* a[0] is list of args, starting with format string */
987 /* a[1] is redirection operator, a[2] is redirection file */
988 FILE *fp;
989 Cell *x;
990 Node *y;
991 char *buf;
992 int len;
993 int bufsz=3*recsize;
995 if ((buf = malloc(bufsz)) == NULL)
996 FATAL("out of memory in awkprintf");
997 y = a[0]->nnext;
998 x = execute(a[0]);
999 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
1000 FATAL("printf string %.30s... too long. can't happen.", buf);
1001 tempfree(x);
1002 if (a[1] == NULL) {
1003 /* fputs(buf, stdout); */
1004 fwrite(buf, len, 1, stdout);
1005 if (ferror(stdout))
1006 FATAL("write error on stdout");
1007 } else {
1008 fp = redirect(ptoi(a[1]), a[2]);
1009 /* fputs(buf, fp); */
1010 fwrite(buf, len, 1, fp);
1011 fflush(fp);
1012 if (ferror(fp))
1013 FATAL("write error on %s", filename(fp));
1015 free(buf);
1016 return(True);
1019 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */
1021 Awkfloat i, j = 0;
1022 double v;
1023 Cell *x, *y, *z;
1025 x = execute(a[0]);
1026 i = getfval(x);
1027 tempfree(x);
1028 if (n != UMINUS) {
1029 y = execute(a[1]);
1030 j = getfval(y);
1031 tempfree(y);
1033 z = gettemp();
1034 switch (n) {
1035 case ADD:
1036 i += j;
1037 break;
1038 case MINUS:
1039 i -= j;
1040 break;
1041 case MULT:
1042 i *= j;
1043 break;
1044 case DIVIDE:
1045 if (j == 0)
1046 FATAL("division by zero");
1047 i /= j;
1048 break;
1049 case MOD:
1050 if (j == 0)
1051 FATAL("division by zero in mod");
1052 modf(i/j, &v);
1053 i = i - j * v;
1054 break;
1055 case UMINUS:
1056 i = -i;
1057 break;
1058 case POWER:
1059 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */
1060 i = ipow(i, (int) j);
1061 else
1062 i = errcheck(pow(i, j), "pow");
1063 break;
1064 default: /* can't happen */
1065 FATAL("illegal arithmetic operator %d", n);
1067 setfval(z, i);
1068 return(z);
1071 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */
1073 double v;
1075 if (n <= 0)
1076 return 1;
1077 v = ipow(x, n/2);
1078 if (n % 2 == 0)
1079 return v * v;
1080 else
1081 return x * v * v;
1084 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */
1086 Cell *x, *z;
1087 int k;
1088 Awkfloat xf;
1090 x = execute(a[0]);
1091 xf = getfval(x);
1092 k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
1093 if (n == PREINCR || n == PREDECR) {
1094 setfval(x, xf + k);
1095 return(x);
1097 z = gettemp();
1098 setfval(z, xf);
1099 setfval(x, xf + k);
1100 tempfree(x);
1101 return(z);
1104 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */
1105 { /* this is subtle; don't muck with it. */
1106 Cell *x, *y;
1107 Awkfloat xf, yf;
1108 double v;
1110 y = execute(a[1]);
1111 x = execute(a[0]);
1112 if (n == ASSIGN) { /* ordinary assignment */
1113 if (x == y && !(x->tval & (FLD|REC))) /* self-assignment: */
1114 ; /* leave alone unless it's a field */
1115 else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1116 setsval(x, getsval(y));
1117 x->fval = getfval(y);
1118 x->tval |= NUM;
1120 else if (isstr(y))
1121 setsval(x, getsval(y));
1122 else if (isnum(y))
1123 setfval(x, getfval(y));
1124 else
1125 funnyvar(y, "read value of");
1126 tempfree(y);
1127 return(x);
1129 xf = getfval(x);
1130 yf = getfval(y);
1131 switch (n) {
1132 case ADDEQ:
1133 xf += yf;
1134 break;
1135 case SUBEQ:
1136 xf -= yf;
1137 break;
1138 case MULTEQ:
1139 xf *= yf;
1140 break;
1141 case DIVEQ:
1142 if (yf == 0)
1143 FATAL("division by zero in /=");
1144 xf /= yf;
1145 break;
1146 case MODEQ:
1147 if (yf == 0)
1148 FATAL("division by zero in %%=");
1149 modf(xf/yf, &v);
1150 xf = xf - yf * v;
1151 break;
1152 case POWEQ:
1153 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */
1154 xf = ipow(xf, (int) yf);
1155 else
1156 xf = errcheck(pow(xf, yf), "pow");
1157 break;
1158 default:
1159 FATAL("illegal assignment operator %d", n);
1160 break;
1162 tempfree(y);
1163 setfval(x, xf);
1164 return(x);
1167 Cell *cat(Node **a, int q) /* a[0] cat a[1] */
1169 Cell *x, *y, *z;
1170 int n1, n2;
1171 char *s;
1173 x = execute(a[0]);
1174 y = execute(a[1]);
1175 getsval(x);
1176 getsval(y);
1177 n1 = strlen(x->sval);
1178 n2 = strlen(y->sval);
1179 s = malloc(n1 + n2 + 1);
1180 if (s == NULL)
1181 FATAL("out of space concatenating %.15s... and %.15s...",
1182 x->sval, y->sval);
1183 strcpy(s, x->sval);
1184 strcpy(s+n1, y->sval);
1185 tempfree(x);
1186 tempfree(y);
1187 z = gettemp();
1188 z->sval = s;
1189 z->tval = STR;
1190 return(z);
1193 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */
1195 Cell *x;
1197 if (a[0] == 0)
1198 x = execute(a[1]);
1199 else {
1200 x = execute(a[0]);
1201 if (istrue(x)) {
1202 tempfree(x);
1203 x = execute(a[1]);
1206 return x;
1209 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */
1211 Cell *x;
1212 int pair;
1214 pair = ptoi(a[3]);
1215 if (pairstack[pair] == 0) {
1216 x = execute(a[0]);
1217 if (istrue(x))
1218 pairstack[pair] = 1;
1219 tempfree(x);
1221 if (pairstack[pair] == 1) {
1222 x = execute(a[1]);
1223 if (istrue(x))
1224 pairstack[pair] = 0;
1225 tempfree(x);
1226 x = execute(a[2]);
1227 return(x);
1229 return(False);
1232 static char regexpr[] = "(regexpr)";
1233 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
1235 Cell *x = 0, *y, *ap;
1236 char *s, *origs;
1237 int sep;
1238 char *t, temp, num[50], *fs = 0;
1239 int n, tempstat, arg3type;
1241 y = execute(a[0]); /* source string */
1242 origs = s = strdup(getsval(y));
1243 arg3type = ptoi(a[3]);
1244 if (a[2] == 0) /* fs string */
1245 fs = *FS;
1246 else if (arg3type == STRING) { /* split(str,arr,"string") */
1247 x = execute(a[2]);
1248 fs = getsval(x);
1249 } else if (arg3type == REGEXPR)
1250 fs = regexpr; /* split(str,arr,/regexpr/) */
1251 else
1252 FATAL("illegal type of split");
1253 sep = *fs;
1254 ap = execute(a[1]); /* array name */
1255 freesymtab(ap);
1256 dprintf( ("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs) );
1257 ap->tval &= ~STR;
1258 ap->tval |= ARR;
1259 ap->sval = (char *) makesymtab(NSYMTAB);
1261 n = 0;
1262 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) {
1263 /* split(s, a, //); have to arrange that it looks like empty sep */
1264 arg3type = 0;
1265 fs = EMPTY;
1266 sep = 0;
1268 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */
1269 fa *pfa;
1270 if (arg3type == REGEXPR) { /* it's ready already */
1271 pfa = (fa *) a[2];
1272 } else {
1273 pfa = makedfa(fs, 1);
1275 if (nematch(pfa,s)) {
1276 tempstat = pfa->initstat;
1277 pfa->initstat = 2;
1278 do {
1279 n++;
1280 snprintf(num, sizeof(num), "%d", n);
1281 temp = *patbeg;
1282 *patbeg = '\0';
1283 if (is_number(s))
1284 setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval);
1285 else
1286 setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1287 *patbeg = temp;
1288 s = patbeg + patlen;
1289 if (*(patbeg+patlen-1) == 0 || *s == 0) {
1290 n++;
1291 snprintf(num, sizeof(num), "%d", n);
1292 setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
1293 pfa->initstat = tempstat;
1294 goto spdone;
1296 } while (nematch(pfa,s));
1297 pfa->initstat = tempstat; /* bwk: has to be here to reset */
1298 /* cf gsub and refldbld */
1300 n++;
1301 snprintf(num, sizeof(num), "%d", n);
1302 if (is_number(s))
1303 setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval);
1304 else
1305 setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1306 spdone:
1307 pfa = NULL;
1308 } else if (sep == ' ') {
1309 for (n = 0; ; ) {
1310 while (*s == ' ' || *s == '\t' || *s == '\n')
1311 s++;
1312 if (*s == 0)
1313 break;
1314 n++;
1315 t = s;
1317 s++;
1318 while (*s!=' ' && *s!='\t' && *s!='\n' && *s!='\0');
1319 temp = *s;
1320 *s = '\0';
1321 snprintf(num, sizeof(num), "%d", n);
1322 if (is_number(t))
1323 setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
1324 else
1325 setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1326 *s = temp;
1327 if (*s != 0)
1328 s++;
1330 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */
1331 for (n = 0; *s != 0; s++) {
1332 char buf[2];
1333 n++;
1334 snprintf(num, sizeof(num), "%d", n);
1335 buf[0] = *s;
1336 buf[1] = 0;
1337 if (isdigit((uschar)buf[0]))
1338 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
1339 else
1340 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
1342 } else if (*s != 0) {
1343 for (;;) {
1344 n++;
1345 t = s;
1346 while (*s != sep && *s != '\n' && *s != '\0')
1347 s++;
1348 temp = *s;
1349 *s = '\0';
1350 snprintf(num, sizeof(num), "%d", n);
1351 if (is_number(t))
1352 setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
1353 else
1354 setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1355 *s = temp;
1356 if (*s++ == 0)
1357 break;
1360 tempfree(ap);
1361 tempfree(y);
1362 free(origs);
1363 if (a[2] != 0 && arg3type == STRING) {
1364 tempfree(x);
1366 x = gettemp();
1367 x->tval = NUM;
1368 x->fval = n;
1369 return(x);
1372 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */
1374 Cell *x;
1376 x = execute(a[0]);
1377 if (istrue(x)) {
1378 tempfree(x);
1379 x = execute(a[1]);
1380 } else {
1381 tempfree(x);
1382 x = execute(a[2]);
1384 return(x);
1387 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */
1389 Cell *x;
1391 x = execute(a[0]);
1392 if (istrue(x)) {
1393 tempfree(x);
1394 x = execute(a[1]);
1395 } else if (a[2] != 0) {
1396 tempfree(x);
1397 x = execute(a[2]);
1399 return(x);
1402 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */
1404 Cell *x;
1406 for (;;) {
1407 x = execute(a[0]);
1408 if (!istrue(x))
1409 return(x);
1410 tempfree(x);
1411 x = execute(a[1]);
1412 if (isbreak(x)) {
1413 x = True;
1414 return(x);
1416 if (isnext(x) || isexit(x) || isret(x))
1417 return(x);
1418 tempfree(x);
1422 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */
1424 Cell *x;
1426 for (;;) {
1427 x = execute(a[0]);
1428 if (isbreak(x))
1429 return True;
1430 if (isnext(x) || isexit(x) || isret(x))
1431 return(x);
1432 tempfree(x);
1433 x = execute(a[1]);
1434 if (!istrue(x))
1435 return(x);
1436 tempfree(x);
1440 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */
1442 Cell *x;
1444 x = execute(a[0]);
1445 tempfree(x);
1446 for (;;) {
1447 if (a[1]!=0) {
1448 x = execute(a[1]);
1449 if (!istrue(x)) return(x);
1450 else tempfree(x);
1452 x = execute(a[3]);
1453 if (isbreak(x)) /* turn off break */
1454 return True;
1455 if (isnext(x) || isexit(x) || isret(x))
1456 return(x);
1457 tempfree(x);
1458 x = execute(a[2]);
1459 tempfree(x);
1463 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */
1465 Cell *x, *vp, *arrayp, *cp, *ncp;
1466 Array *tp;
1467 int i;
1469 vp = execute(a[0]);
1470 arrayp = execute(a[1]);
1471 if (!isarr(arrayp)) {
1472 return True;
1474 tp = (Array *) arrayp->sval;
1475 tempfree(arrayp);
1476 for (i = 0; i < tp->size; i++) { /* this routine knows too much */
1477 for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
1478 setsval(vp, cp->nval);
1479 ncp = cp->cnext;
1480 x = execute(a[2]);
1481 if (isbreak(x)) {
1482 tempfree(vp);
1483 return True;
1485 if (isnext(x) || isexit(x) || isret(x)) {
1486 tempfree(vp);
1487 return(x);
1489 tempfree(x);
1492 return True;
1495 void flush_all(void);
1497 static char *nawk_toXXX(const char *s,
1498 int (*fun_c)(int),
1499 wint_t (*fun_wc)(wint_t))
1501 char *buf = NULL;
1502 char *pbuf = NULL;
1503 const char *ps = NULL;
1504 size_t n = 0;
1505 mbstate_t mbs, mbs2;
1506 wchar_t wc;
1507 size_t sz = MB_CUR_MAX;
1509 if (sz == 1) {
1510 buf = tostring(s);
1512 for (pbuf = buf; *pbuf; pbuf++)
1513 *pbuf = fun_c((uschar)*pbuf);
1515 return buf;
1516 } else {
1517 /* upper/lower character may be shorter/longer */
1518 buf = tostringN(s, strlen(s) * sz + 1);
1520 memset(&mbs, 0, sizeof(mbs));
1521 memset(&mbs2, 0, sizeof(mbs2));
1523 ps = s;
1524 pbuf = buf;
1525 while (n = mbrtowc(&wc, ps, sz, &mbs),
1526 n > 0 && n != (size_t)-1 && n != (size_t)-2)
1528 ps += n;
1530 n = wcrtomb(pbuf, fun_wc(wc), &mbs2);
1531 if (n == (size_t)-1)
1532 FATAL("illegal wide character %s", s);
1534 pbuf += n;
1537 *pbuf = 0;
1539 if (n)
1540 FATAL("illegal byte sequence %s", s);
1542 return buf;
1546 static char *nawk_toupper(const char *s)
1548 return nawk_toXXX(s, toupper, towupper);
1551 static char *nawk_tolower(const char *s)
1553 return nawk_toXXX(s, tolower, towlower);
1556 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */
1558 Cell *x, *y;
1559 Awkfloat u;
1560 int t, sz;
1561 unsigned int tmp;
1562 char *buf, *fmt;
1563 Node *nextarg;
1564 FILE *fp;
1565 time_t tv;
1566 struct tm *tm;
1568 t = ptoi(a[0]);
1569 x = execute(a[1]);
1570 nextarg = a[1]->nnext;
1571 switch (t) {
1572 case FLENGTH:
1573 if (isarr(x))
1574 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/
1575 else
1576 u = strlen(getsval(x));
1577 break;
1578 case FLOG:
1579 u = errcheck(log(getfval(x)), "log"); break;
1580 case FINT:
1581 modf(getfval(x), &u); break;
1582 case FEXP:
1583 u = errcheck(exp(getfval(x)), "exp"); break;
1584 case FSQRT:
1585 u = errcheck(sqrt(getfval(x)), "sqrt"); break;
1586 case FSIN:
1587 u = sin(getfval(x)); break;
1588 case FCOS:
1589 u = cos(getfval(x)); break;
1590 case FATAN:
1591 if (nextarg == 0) {
1592 WARNING("atan2 requires two arguments; returning 1.0");
1593 u = 1.0;
1594 } else {
1595 y = execute(a[1]->nnext);
1596 u = atan2(getfval(x), getfval(y));
1597 tempfree(y);
1598 nextarg = nextarg->nnext;
1600 break;
1601 case FSYSTEM:
1602 fflush(stdout); /* in case something is buffered already */
1603 u = (Awkfloat) system(getsval(x)) / 256; /* 256 is unix-dep */
1604 break;
1605 case FRAND:
1606 /* in principle, rand() returns something in 0..RAND_MAX */
1607 u = (Awkfloat) (rand() % RAND_MAX) / RAND_MAX;
1608 break;
1609 case FSRAND:
1610 if (isrec(x)) /* no argument provided */
1611 u = time((time_t *)0);
1612 else
1613 u = getfval(x);
1614 tmp = (unsigned int) u;
1615 srand(tmp);
1616 u = srand_seed;
1617 srand_seed = tmp;
1618 break;
1619 case FTOUPPER:
1620 case FTOLOWER:
1621 if (t == FTOUPPER)
1622 buf = nawk_toupper(getsval(x));
1623 else
1624 buf = nawk_tolower(getsval(x));
1625 tempfree(x);
1626 x = gettemp();
1627 setsval(x, buf);
1628 free(buf);
1629 return x;
1630 case FFLUSH:
1631 if (isrec(x) || strlen(getsval(x)) == 0) {
1632 flush_all(); /* fflush() or fflush("") -> all */
1633 u = 0;
1634 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
1635 u = -1;
1636 else
1637 u = fflush(fp);
1638 break;
1639 case FSYSTIME:
1640 u = time((time_t *) 0); break;
1641 case FSTRFTIME:
1642 /* strftime([format [,timestamp]]) */
1643 if (nextarg) {
1644 y = execute(nextarg), nextarg = nextarg->nnext;
1645 tv = (time_t) getfval(y);
1646 tempfree(y);
1647 } else
1648 tv = time((time_t *) 0);
1649 tm = localtime(&tv);
1650 if (tm == NULL)
1651 FATAL("bad time %jd", (intmax_t)tv);
1653 if (isrec(x)) {
1654 /* format argument not provided, use default */
1655 fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
1656 } else
1657 fmt = tostring(getsval(x));
1659 sz = 32, buf = NULL;
1660 do {
1661 if ((buf = realloc(buf, (sz *= 2))) == NULL)
1662 FATAL("out of memory in strftime");
1663 } while(strftime(buf, sz, fmt, tm) == 0);
1665 y = gettemp();
1666 setsval(y, buf);
1667 free(fmt);
1668 free(buf);
1670 return y;
1671 default: /* can't happen */
1672 FATAL("illegal function type %d", t);
1673 break;
1675 tempfree(x);
1676 x = gettemp();
1677 setfval(x, u);
1678 if (nextarg != 0) {
1679 WARNING("warning: function has too many arguments");
1680 for ( ; nextarg; nextarg = nextarg->nnext)
1681 execute(nextarg);
1683 return(x);
1686 Cell *printstat(Node **a, int n) /* print a[0] */
1688 Node *x;
1689 Cell *y;
1690 FILE *fp;
1692 if (a[1] == 0) /* a[1] is redirection operator, a[2] is file */
1693 fp = stdout;
1694 else
1695 fp = redirect(ptoi(a[1]), a[2]);
1696 for (x = a[0]; x != NULL; x = x->nnext) {
1697 y = execute(x);
1698 fputs(getpssval(y), fp);
1699 tempfree(y);
1700 if (x->nnext == NULL)
1701 fputs(*ORS, fp);
1702 else
1703 fputs(*OFS, fp);
1705 if (a[1] != 0)
1706 fflush(fp);
1707 if (ferror(fp))
1708 FATAL("write error on %s", filename(fp));
1709 return(True);
1712 Cell *nullproc(Node **a, int n)
1714 n = n;
1715 a = a;
1716 return 0;
1720 FILE *redirect(int a, Node *b) /* set up all i/o redirections */
1722 FILE *fp;
1723 Cell *x;
1724 char *fname;
1726 x = execute(b);
1727 fname = getsval(x);
1728 fp = openfile(a, fname, NULL);
1729 if (fp == NULL)
1730 FATAL("can't open file %s", fname);
1731 tempfree(x);
1732 return fp;
1735 struct files {
1736 FILE *fp;
1737 const char *fname;
1738 int mode; /* '|', 'a', 'w' => LE/LT, GT */
1739 } *files;
1741 size_t nfiles;
1743 void stdinit(void) /* in case stdin, etc., are not constants */
1745 nfiles = FOPEN_MAX;
1746 files = calloc(nfiles, sizeof(*files));
1747 if (files == NULL)
1748 FATAL("can't allocate file memory for %zu files", nfiles);
1749 files[0].fp = stdin;
1750 files[0].fname = "/dev/stdin";
1751 files[0].mode = LT;
1752 files[1].fp = stdout;
1753 files[1].fname = "/dev/stdout";
1754 files[1].mode = GT;
1755 files[2].fp = stderr;
1756 files[2].fname = "/dev/stderr";
1757 files[2].mode = GT;
1760 FILE *openfile(int a, const char *us, int *pnewflag)
1762 const char *s = us;
1763 size_t i;
1764 int m;
1765 FILE *fp = 0;
1767 if (*s == '\0')
1768 FATAL("null file name in print or getline");
1769 for (i = 0; i < nfiles; i++)
1770 if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
1771 (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
1772 a == FFLUSH)) {
1773 if (pnewflag)
1774 *pnewflag = 0;
1775 return files[i].fp;
1777 if (a == FFLUSH) /* didn't find it, so don't create it! */
1778 return NULL;
1780 for (i = 0; i < nfiles; i++)
1781 if (files[i].fp == NULL)
1782 break;
1783 if (i >= nfiles) {
1784 struct files *nf;
1785 size_t nnf = nfiles + FOPEN_MAX;
1786 nf = realloc(files, nnf * sizeof(*nf));
1787 if (nf == NULL)
1788 FATAL("cannot grow files for %s and %zu files", s, nnf);
1789 (void)memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
1790 nfiles = nnf;
1791 files = nf;
1793 fflush(stdout); /* force a semblance of order */
1794 m = a;
1795 if (a == GT) {
1796 fp = fopen(s, "w");
1797 } else if (a == APPEND) {
1798 fp = fopen(s, "a");
1799 m = GT; /* so can mix > and >> */
1800 } else if (a == '|') { /* output pipe */
1801 fp = popen(s, "w");
1802 } else if (a == LE) { /* input pipe */
1803 fp = popen(s, "r");
1804 } else if (a == LT) { /* getline <file */
1805 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */
1806 } else /* can't happen */
1807 FATAL("illegal redirection %d", a);
1808 if (fp != NULL) {
1809 files[i].fname = tostring(s);
1810 files[i].fp = fp;
1811 files[i].mode = m;
1812 if (pnewflag)
1813 *pnewflag = 1;
1815 return fp;
1818 const char *filename(FILE *fp)
1820 size_t i;
1822 for (i = 0; i < nfiles; i++)
1823 if (fp == files[i].fp)
1824 return files[i].fname;
1825 return "???";
1828 Cell *closefile(Node **a, int n)
1830 Cell *x;
1831 size_t i;
1832 int stat;
1834 n = n;
1835 x = execute(a[0]);
1836 getsval(x);
1837 stat = -1;
1838 for (i = 0; i < nfiles; i++) {
1839 if (files[i].fname && strcmp(x->sval, files[i].fname) == 0) {
1840 if (ferror(files[i].fp))
1841 WARNING( "i/o error occurred on %s", files[i].fname );
1842 if (files[i].mode == '|' || files[i].mode == LE)
1843 stat = pclose(files[i].fp) == -1;
1844 else
1845 stat = fclose(files[i].fp) == EOF;
1846 if (stat) {
1847 stat = -1;
1848 WARNING( "i/o error occurred closing %s",
1849 files[i].fname );
1851 if (i > 2) /* don't do /dev/std... */
1852 free(__UNCONST(files[i].fname));
1853 files[i].fname = NULL; /* watch out for ref thru this */
1854 files[i].fp = NULL;
1857 tempfree(x);
1858 x = gettemp();
1859 setfval(x, (Awkfloat) stat);
1860 return(x);
1863 void closeall(void)
1865 size_t i;
1866 int stat;
1868 for (i = 0; i < nfiles; i++) {
1869 if (files[i].fp) {
1870 if (ferror(files[i].fp))
1871 WARNING( "i/o error occurred on %s", files[i].fname );
1872 if (i == 0)
1873 stat = fpurge(files[i].fp) == EOF;
1874 else if (i <= 2)
1875 stat = fflush(files[i].fp) == EOF;
1876 else if (files[i].mode == '|' || files[i].mode == LE)
1877 stat = pclose(files[i].fp) == -1;
1878 else
1879 stat = fclose(files[i].fp) == EOF;
1880 if (stat)
1881 WARNING( "i/o error occurred while closing %s", files[i].fname );
1886 void flush_all(void)
1888 size_t i;
1890 for (i = 0; i < nfiles; i++)
1891 if (files[i].fp)
1892 fflush(files[i].fp);
1895 void backsub(uschar **pb_ptr, const uschar **sptr_ptr);
1897 Cell *sub(Node **a, int nnn) /* substitute command */
1899 const uschar *sptr;
1900 uschar *q;
1901 Cell *x, *y, *result;
1902 uschar *t, *buf, *pb;
1903 fa *pfa;
1904 int bufsz = recsize;
1906 if ((buf = malloc(bufsz)) == NULL)
1907 FATAL("out of memory in sub");
1908 x = execute(a[3]); /* target string */
1909 t = getsval(x);
1910 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
1911 pfa = (fa *) a[1]; /* regular expression */
1912 else {
1913 y = execute(a[1]);
1914 pfa = makedfa(getsval(y), 1);
1915 tempfree(y);
1917 y = execute(a[2]); /* replacement string */
1918 result = False;
1919 if (pmatch(pfa, t)) {
1920 sptr = t;
1921 adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub");
1922 pb = buf;
1923 while (sptr < patbeg)
1924 *pb++ = *sptr++;
1925 sptr = getsval(y);
1926 while (*sptr != 0) {
1927 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub");
1928 if (*sptr == '\\') {
1929 backsub(&pb, &sptr);
1930 } else if (*sptr == '&') {
1931 sptr++;
1932 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub");
1933 for (q = patbeg; q < patbeg+patlen; )
1934 *pb++ = *q++;
1935 } else
1936 *pb++ = *sptr++;
1938 *pb = '\0';
1939 if (pb > buf + bufsz)
1940 FATAL("sub result1 %.30s too big; can't happen", buf);
1941 sptr = patbeg + patlen;
1942 if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) {
1943 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub");
1944 while ((*pb++ = *sptr++) != 0)
1947 if (pb > buf + bufsz)
1948 FATAL("sub result2 %.30s too big; can't happen", buf);
1949 setsval(x, buf); /* BUG: should be able to avoid copy */
1950 result = True;;
1952 tempfree(x);
1953 tempfree(y);
1954 free(buf);
1955 return result;
1958 Cell *gsub(Node **a, int nnn) /* global substitute */
1960 Cell *x, *y;
1961 const char *rptr;
1962 const uschar *sptr;
1963 uschar *t, *q;
1964 uschar *pb, *buf;
1965 fa *pfa;
1966 int mflag, tempstat, num;
1967 int bufsz = recsize;
1969 if ((buf = malloc(bufsz)) == NULL)
1970 FATAL("out of memory in gsub");
1971 mflag = 0; /* if mflag == 0, can replace empty string */
1972 num = 0;
1973 x = execute(a[3]); /* target string */
1974 t = getsval(x);
1975 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
1976 pfa = (fa *) a[1]; /* regular expression */
1977 else {
1978 y = execute(a[1]);
1979 pfa = makedfa(getsval(y), 1);
1980 tempfree(y);
1982 y = execute(a[2]); /* replacement string */
1983 if (pmatch(pfa, t)) {
1984 tempstat = pfa->initstat;
1985 pfa->initstat = 2;
1986 pb = buf;
1987 rptr = getsval(y);
1988 do {
1989 if (patlen == 0 && *patbeg != 0) { /* matched empty string */
1990 if (mflag == 0) { /* can replace empty */
1991 num++;
1992 sptr = rptr;
1993 while (*sptr != 0) {
1994 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
1995 if (*sptr == '\\') {
1996 backsub(&pb, &sptr);
1997 } else if (*sptr == '&') {
1998 sptr++;
1999 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
2000 for (q = patbeg; q < patbeg+patlen; )
2001 *pb++ = *q++;
2002 } else
2003 *pb++ = *sptr++;
2006 if (*t == 0) /* at end */
2007 goto done;
2008 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub");
2009 *pb++ = *t++;
2010 if (pb > buf + bufsz) /* BUG: not sure of this test */
2011 FATAL("gsub result0 %.30s too big; can't happen", buf);
2012 mflag = 0;
2014 else { /* matched nonempty string */
2015 num++;
2016 sptr = t;
2017 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub");
2018 while (sptr < patbeg)
2019 *pb++ = *sptr++;
2020 sptr = rptr;
2021 while (*sptr != 0) {
2022 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
2023 if (*sptr == '\\') {
2024 backsub(&pb, &sptr);
2025 } else if (*sptr == '&') {
2026 sptr++;
2027 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
2028 for (q = patbeg; q < patbeg+patlen; )
2029 *pb++ = *q++;
2030 } else
2031 *pb++ = *sptr++;
2033 t = patbeg + patlen;
2034 if (patlen == 0 || *t == 0 || *(t-1) == 0)
2035 goto done;
2036 if (pb > buf + bufsz)
2037 FATAL("gsub result1 %.30s too big; can't happen", buf);
2038 mflag = 1;
2040 } while (pmatch(pfa,t));
2041 sptr = t;
2042 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub");
2043 while ((*pb++ = *sptr++) != 0)
2045 done: if (pb < buf + bufsz)
2046 *pb = '\0';
2047 else if (*(pb-1) != '\0')
2048 FATAL("gsub result2 %.30s truncated; can't happen", buf);
2049 setsval(x, buf); /* BUG: should be able to avoid copy + free */
2050 pfa->initstat = tempstat;
2052 tempfree(x);
2053 tempfree(y);
2054 x = gettemp();
2055 x->tval = NUM;
2056 x->fval = num;
2057 free(buf);
2058 return(x);
2061 Cell *gensub(Node **a, int nnn) /* global selective substitute */
2062 /* XXX incomplete - doesn't support backreferences \0 ... \9 */
2064 Cell *x, *y, *res, *h;
2065 char *rptr;
2066 const uschar *sptr;
2067 uschar *q, *pb, *t, *buf;
2068 fa *pfa;
2069 int mflag, tempstat, num, whichm;
2070 int bufsz = recsize;
2072 if ((buf = malloc(bufsz)) == NULL)
2073 FATAL("out of memory in gensub");
2074 mflag = 0; /* if mflag == 0, can replace empty string */
2075 num = 0;
2076 x = execute(a[4]); /* source string */
2077 t = getsval(x);
2078 res = copycell(x); /* target string - initially copy of source */
2079 res->csub = CTEMP; /* result values are temporary */
2080 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
2081 pfa = (fa *) a[1]; /* regular expression */
2082 else {
2083 y = execute(a[1]);
2084 pfa = makedfa(getsval(y), 1);
2085 tempfree(y);
2087 y = execute(a[2]); /* replacement string */
2088 h = execute(a[3]); /* which matches should be replaced */
2089 sptr = getsval(h);
2090 if (sptr[0] == 'g' || sptr[0] == 'G')
2091 whichm = -1;
2092 else {
2094 * The specified number is index of replacement, starting
2095 * from 1. GNU awk treats index lower than 0 same as
2096 * 1, we do same for compatibility.
2098 whichm = (int) getfval(h) - 1;
2099 if (whichm < 0)
2100 whichm = 0;
2102 tempfree(h);
2104 if (pmatch(pfa, t)) {
2105 char *sl;
2107 tempstat = pfa->initstat;
2108 pfa->initstat = 2;
2109 pb = buf;
2110 rptr = getsval(y);
2112 * XXX if there are any backreferences in subst string,
2113 * complain now.
2115 for(sl=rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
2116 if (strchr("0123456789", sl[1])) {
2117 FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
2121 do {
2122 if (whichm >= 0 && whichm != num) {
2123 num++;
2124 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
2126 /* copy the part of string up to and including
2127 * match to output buffer */
2128 while (t < patbeg + patlen)
2129 *pb++ = *t++;
2130 continue;
2133 if (patlen == 0 && *patbeg != 0) { /* matched empty string */
2134 if (mflag == 0) { /* can replace empty */
2135 num++;
2136 sptr = rptr;
2137 while (*sptr != 0) {
2138 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2139 if (*sptr == '\\') {
2140 backsub(&pb, &sptr);
2141 } else if (*sptr == '&') {
2142 sptr++;
2143 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2144 for (q = patbeg; q < patbeg+patlen; )
2145 *pb++ = *q++;
2146 } else
2147 *pb++ = *sptr++;
2150 if (*t == 0) /* at end */
2151 goto done;
2152 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
2153 *pb++ = *t++;
2154 if (pb > buf + bufsz) /* BUG: not sure of this test */
2155 FATAL("gensub result0 %.30s too big; can't happen", buf);
2156 mflag = 0;
2158 else { /* matched nonempty string */
2159 num++;
2160 sptr = t;
2161 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
2162 while (sptr < patbeg)
2163 *pb++ = *sptr++;
2164 sptr = rptr;
2165 while (*sptr != 0) {
2166 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2167 if (*sptr == '\\') {
2168 backsub(&pb, &sptr);
2169 } else if (*sptr == '&') {
2170 sptr++;
2171 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2172 for (q = patbeg; q < patbeg+patlen; )
2173 *pb++ = *q++;
2174 } else
2175 *pb++ = *sptr++;
2177 t = patbeg + patlen;
2178 if (patlen == 0 || *t == 0 || *(t-1) == 0)
2179 goto done;
2180 if (pb > buf + bufsz)
2181 FATAL("gensub result1 %.30s too big; can't happen", buf);
2182 mflag = 1;
2184 } while (pmatch(pfa,t));
2185 sptr = t;
2186 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
2187 while ((*pb++ = *sptr++) != 0)
2189 done: if (pb > buf + bufsz)
2190 FATAL("gensub result2 %.30s too big; can't happen", buf);
2191 *pb = '\0';
2192 setsval(res, buf);
2193 pfa->initstat = tempstat;
2195 tempfree(x);
2196 tempfree(y);
2197 free(buf);
2198 return(res);
2201 void backsub(uschar **pb_ptr, const uschar **sptr_ptr)/* handle \\& variations */
2202 { /* sptr[0] == '\\' */
2203 uschar *pb = *pb_ptr;
2204 const uschar *sptr = *sptr_ptr;
2206 if (sptr[1] == '\\') {
2207 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
2208 *pb++ = '\\';
2209 *pb++ = '&';
2210 sptr += 4;
2211 } else if (sptr[2] == '&') { /* \\& -> \ + matched */
2212 *pb++ = '\\';
2213 sptr += 2;
2214 } else { /* \\x -> \\x */
2215 *pb++ = *sptr++;
2216 *pb++ = *sptr++;
2218 } else if (sptr[1] == '&') { /* literal & */
2219 sptr++;
2220 *pb++ = *sptr++;
2221 } else /* literal \ */
2222 *pb++ = *sptr++;
2224 *pb_ptr = pb;
2225 *sptr_ptr = sptr;