modified: makefile
[GalaxyCodeBases.git] / tools / bioawk / run.c
blobfa82dfd387f8a16fe83f473e2b25681c4ece773c
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
25 #define DEBUG
26 #include <stdio.h>
27 #include <ctype.h>
28 #include <setjmp.h>
29 #include <limits.h>
30 #include <math.h>
31 #include <string.h>
32 #include <stdlib.h>
33 #include <time.h>
34 #include "awk.h"
35 #include "ytab.h"
37 #define tempfree(x) if (istemp(x)) tfree(x); else
40 #undef tempfree
42 void tempfree(Cell *p) {
43 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
44 WARNING("bad csub %d in Cell %d %s",
45 p->csub, p->ctype, p->sval);
47 if (istemp(p))
48 tfree(p);
52 /* do we really need these? */
53 /* #ifdef _NFILE */
54 /* #ifndef FOPEN_MAX */
55 /* #define FOPEN_MAX _NFILE */
56 /* #endif */
57 /* #endif */
58 /* */
59 /* #ifndef FOPEN_MAX */
60 /* #define FOPEN_MAX 40 */ /* max number of open files */
61 /* #endif */
62 /* */
63 /* #ifndef RAND_MAX */
64 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */
65 /* #endif */
67 jmp_buf env;
68 extern int pairstack[];
69 extern Awkfloat srand_seed;
71 Node *winner = NULL; /* root of parse tree */
72 Cell *tmps; /* free temporary cells for execution */
74 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM };
75 Cell *True = &truecell;
76 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM };
77 Cell *False = &falsecell;
78 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM };
79 Cell *jbreak = &breakcell;
80 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM };
81 Cell *jcont = &contcell;
82 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM };
83 Cell *jnext = &nextcell;
84 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM };
85 Cell *jnextfile = &nextfilecell;
86 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM };
87 Cell *jexit = &exitcell;
88 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM };
89 Cell *jret = &retcell;
90 static Cell tempcell ={ OCELL, CTEMP, 0, "", 0.0, NUM|STR|DONTFREE };
92 Node *curnode = NULL; /* the node being executed, for debugging */
94 /* buffer memory management */
95 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
96 const char *whatrtn)
97 /* pbuf: address of pointer to buffer being managed
98 * psiz: address of buffer size variable
99 * minlen: minimum length of buffer needed
100 * quantum: buffer size quantum
101 * pbptr: address of movable pointer into buffer, or 0 if none
102 * whatrtn: name of the calling routine if failure should cause fatal error
104 * return 0 for realloc failure, !=0 for success
107 if (minlen > *psiz) {
108 char *tbuf;
109 int rminlen = quantum ? minlen % quantum : 0;
110 int boff = pbptr ? *pbptr - *pbuf : 0;
111 /* round up to next multiple of quantum */
112 if (rminlen)
113 minlen += quantum - rminlen;
114 tbuf = (char *) realloc(*pbuf, minlen);
115 dprintf( ("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, *pbuf, tbuf) );
116 if (tbuf == NULL) {
117 if (whatrtn)
118 FATAL("out of memory in %s", whatrtn);
119 return 0;
121 *pbuf = tbuf;
122 *psiz = minlen;
123 if (pbptr)
124 *pbptr = tbuf + boff;
126 return 1;
129 void run(Node *a) /* execution of parse tree starts here */
131 extern void stdinit(void);
133 stdinit();
134 execute(a);
135 closeall();
138 Cell *execute(Node *u) /* execute a node of the parse tree */
140 Cell *(*proc)(Node **, int);
141 Cell *x;
142 Node *a;
144 if (u == NULL)
145 return(True);
146 for (a = u; ; a = a->nnext) {
147 curnode = a;
148 if (isvalue(a)) {
149 x = (Cell *) (a->narg[0]);
150 if (isfld(x) && !donefld)
151 fldbld();
152 else if (isrec(x) && !donerec)
153 recbld();
154 return(x);
156 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */
157 FATAL("illegal statement");
158 proc = proctab[a->nobj-FIRSTTOKEN];
159 x = (*proc)(a->narg, a->nobj);
160 if (isfld(x) && !donefld)
161 fldbld();
162 else if (isrec(x) && !donerec)
163 recbld();
164 if (isexpr(a))
165 return(x);
166 if (isjump(x))
167 return(x);
168 if (a->nnext == NULL)
169 return(x);
170 tempfree(x);
175 Cell *program(Node **a, int n) /* execute an awk program */
176 { /* a[0] = BEGIN, a[1] = body, a[2] = END */
177 Cell *x;
179 if (setjmp(env) != 0)
180 goto ex;
181 if (a[0]) { /* BEGIN */
182 x = execute(a[0]);
183 if (isexit(x))
184 return(True);
185 if (isjump(x))
186 FATAL("illegal break, continue, next or nextfile from BEGIN");
187 tempfree(x);
189 if (a[1] || a[2]) {
190 if (bio_fmt > BIO_HDR) bio_set_colnm();
191 while (getrec(&record, &recsize, 1) > 0) {
192 if (bio_skip_hdr(record)) continue;
193 if (bio_fmt == BIO_HDR && (int)(*NR + .499) == 1) bio_set_colnm();
194 x = execute(a[1]);
195 if (isexit(x))
196 break;
197 tempfree(x);
201 if (setjmp(env) != 0) /* handles exit within END */
202 goto ex1;
203 if (a[2]) { /* END */
204 x = execute(a[2]);
205 if (isbreak(x) || isnext(x) || iscont(x))
206 FATAL("illegal break, continue, next or nextfile from END");
207 tempfree(x);
209 ex1:
210 return(True);
213 struct Frame { /* stack frame for awk function calls */
214 int nargs; /* number of arguments in this call */
215 Cell *fcncell; /* pointer to Cell for function */
216 Cell **args; /* pointer to array of arguments after execute */
217 Cell *retval; /* return value */
220 #define NARGS 50 /* max args in a call */
222 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */
223 int nframe = 0; /* number of frames allocated */
224 struct Frame *fp = NULL; /* frame pointer. bottom level unused */
226 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
228 static Cell newcopycell = { OCELL, CCOPY, 0, "", 0.0, NUM|STR|DONTFREE };
229 int i, ncall, ndef;
230 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
231 Node *x;
232 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */
233 Cell *y, *z, *fcn;
234 char *s;
236 fcn = execute(a[0]); /* the function itself */
237 s = fcn->nval;
238 if (!isfcn(fcn))
239 FATAL("calling undefined function %s", s);
240 if (frame == NULL) {
241 fp = frame = (struct Frame *) calloc(nframe += 100, sizeof(struct Frame));
242 if (frame == NULL)
243 FATAL("out of space for stack frames calling %s", s);
245 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */
246 ncall++;
247 ndef = (int) fcn->fval; /* args in defn */
248 dprintf( ("calling %s, %d args (%d in defn), fp=%d\n", s, ncall, ndef, (int) (fp-frame)) );
249 if (ncall > ndef)
250 WARNING("function %s called with %d args, uses only %d",
251 s, ncall, ndef);
252 if (ncall + ndef > NARGS)
253 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
254 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */
255 dprintf( ("evaluate args[%d], fp=%d:\n", i, (int) (fp-frame)) );
256 y = execute(x);
257 oargs[i] = y;
258 dprintf( ("args[%d]: %s %f <%s>, t=%o\n",
259 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval) );
260 if (isfcn(y))
261 FATAL("can't use function %s as argument in %s", y->nval, s);
262 if (isarr(y))
263 args[i] = y; /* arrays by ref */
264 else
265 args[i] = copycell(y);
266 tempfree(y);
268 for ( ; i < ndef; i++) { /* add null args for ones not provided */
269 args[i] = gettemp();
270 *args[i] = newcopycell;
272 fp++; /* now ok to up frame */
273 if (fp >= frame + nframe) {
274 int dfp = fp - frame; /* old index */
275 frame = (struct Frame *)
276 realloc((char *) frame, (nframe += 100) * sizeof(struct Frame));
277 if (frame == NULL)
278 FATAL("out of space for stack frames in %s", s);
279 fp = frame + dfp;
281 fp->fcncell = fcn;
282 fp->args = args;
283 fp->nargs = ndef; /* number defined with (excess are locals) */
284 fp->retval = gettemp();
286 dprintf( ("start exec of %s, fp=%d\n", s, (int) (fp-frame)) );
287 y = execute((Node *)(fcn->sval)); /* execute body */
288 dprintf( ("finished exec of %s, fp=%d\n", s, (int) (fp-frame)) );
290 for (i = 0; i < ndef; i++) {
291 Cell *t = fp->args[i];
292 if (isarr(t)) {
293 if (t->csub == CCOPY) {
294 if (i >= ncall) {
295 freesymtab(t);
296 t->csub = CTEMP;
297 tempfree(t);
298 } else {
299 oargs[i]->tval = t->tval;
300 oargs[i]->tval &= ~(STR|NUM|DONTFREE);
301 oargs[i]->sval = t->sval;
302 tempfree(t);
305 } else if (t != y) { /* kludge to prevent freeing twice */
306 t->csub = CTEMP;
307 tempfree(t);
308 } else if (t == y && t->csub == CCOPY) {
309 t->csub = CTEMP;
310 tempfree(t);
311 freed = 1;
314 tempfree(fcn);
315 if (isexit(y) || isnext(y))
316 return y;
317 if (freed == 0) {
318 tempfree(y); /* don't free twice! */
320 z = fp->retval; /* return value */
321 dprintf( ("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval) );
322 fp--;
323 return(z);
326 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */
328 Cell *y;
330 y = gettemp();
331 y->csub = CCOPY; /* prevents freeing until call is over */
332 y->nval = x->nval; /* BUG? */
333 if (isstr(x))
334 y->sval = tostring(x->sval);
335 y->fval = x->fval;
336 y->tval = x->tval & ~(CON|FLD|REC|DONTFREE); /* copy is not constant or field */
337 /* is DONTFREE right? */
338 return y;
341 Cell *arg(Node **a, int n) /* nth argument of a function */
344 n = ptoi(a[0]); /* argument number, counting from 0 */
345 dprintf( ("arg(%d), fp->nargs=%d\n", n, fp->nargs) );
346 if (n+1 > fp->nargs)
347 FATAL("argument #%d of function %s was not supplied",
348 n+1, fp->fcncell->nval);
349 return fp->args[n];
352 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */
354 Cell *y;
356 switch (n) {
357 case EXIT:
358 if (a[0] != NULL) {
359 y = execute(a[0]);
360 errorflag = (int) getfval(y);
361 tempfree(y);
363 longjmp(env, 1);
364 case RETURN:
365 if (a[0] != NULL) {
366 y = execute(a[0]);
367 if ((y->tval & (STR|NUM)) == (STR|NUM)) {
368 setsval(fp->retval, getsval(y));
369 fp->retval->fval = getfval(y);
370 fp->retval->tval |= NUM;
372 else if (y->tval & STR)
373 setsval(fp->retval, getsval(y));
374 else if (y->tval & NUM)
375 setfval(fp->retval, getfval(y));
376 else /* can't happen */
377 FATAL("bad type variable %d", y->tval);
378 tempfree(y);
380 return(jret);
381 case NEXT:
382 return(jnext);
383 case NEXTFILE:
384 nextfile();
385 return(jnextfile);
386 case BREAK:
387 return(jbreak);
388 case CONTINUE:
389 return(jcont);
390 default: /* can't happen */
391 FATAL("illegal jump type %d", n);
393 return 0; /* not reached */
396 Cell *awkgetline(Node **a, int n) /* get next line from specific input */
397 { /* a[0] is variable, a[1] is operator, a[2] is filename */
398 Cell *r, *x;
399 extern Cell **fldtab;
400 FILE *fp;
401 char *buf;
402 int bufsize = recsize;
403 int mode;
405 if ((buf = (char *) malloc(bufsize)) == NULL)
406 FATAL("out of memory in getline");
408 fflush(stdout); /* in case someone is waiting for a prompt */
409 r = gettemp();
410 if (a[1] != NULL) { /* getline < file */
411 x = execute(a[2]); /* filename */
412 mode = ptoi(a[1]);
413 if (mode == '|') /* input pipe */
414 mode = LE; /* arbitrary flag */
415 fp = openfile(mode, getsval(x));
416 tempfree(x);
417 if (fp == NULL)
418 n = -1;
419 else
420 n = readrec(&buf, &bufsize, fp);
421 if (n <= 0) {
423 } else if (a[0] != NULL) { /* getline var <file */
424 x = execute(a[0]);
425 setsval(x, buf);
426 tempfree(x);
427 } else { /* getline <file */
428 setsval(fldtab[0], buf);
429 if (is_number(fldtab[0]->sval)) {
430 fldtab[0]->fval = atof(fldtab[0]->sval);
431 fldtab[0]->tval |= NUM;
434 } else { /* bare getline; use current input */
435 if (a[0] == NULL) /* getline */
436 n = getrec(&record, &recsize, 1);
437 else { /* getline var */
438 n = getrec(&buf, &bufsize, 0);
439 x = execute(a[0]);
440 setsval(x, buf);
441 tempfree(x);
444 setfval(r, (Awkfloat) n);
445 free(buf);
446 return r;
449 Cell *getnf(Node **a, int n) /* get NF */
451 if (donefld == 0)
452 fldbld();
453 return (Cell *) a[0];
456 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
458 Cell *x, *y, *z;
459 char *s;
460 Node *np;
461 char *buf;
462 int bufsz = recsize;
463 int nsub = strlen(*SUBSEP);
465 if ((buf = (char *) malloc(bufsz)) == NULL)
466 FATAL("out of memory in array");
468 x = execute(a[0]); /* Cell* for symbol table */
469 buf[0] = 0;
470 for (np = a[1]; np; np = np->nnext) {
471 y = execute(np); /* subscript */
472 s = getsval(y);
473 if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "array"))
474 FATAL("out of memory for %s[%s...]", x->nval, buf);
475 strcat(buf, s);
476 if (np->nnext)
477 strcat(buf, *SUBSEP);
478 tempfree(y);
480 if (!isarr(x)) {
481 dprintf( ("making %s into an array\n", NN(x->nval)) );
482 if (freeable(x))
483 xfree(x->sval);
484 x->tval &= ~(STR|NUM|DONTFREE);
485 x->tval |= ARR;
486 x->sval = (char *) makesymtab(NSYMTAB);
488 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
489 z->ctype = OCELL;
490 z->csub = CVAR;
491 tempfree(x);
492 free(buf);
493 return(z);
496 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
498 Cell *x, *y;
499 Node *np;
500 char *s;
501 int nsub = strlen(*SUBSEP);
503 x = execute(a[0]); /* Cell* for symbol table */
504 if (!isarr(x))
505 return True;
506 if (a[1] == 0) { /* delete the elements, not the table */
507 freesymtab(x);
508 x->tval &= ~STR;
509 x->tval |= ARR;
510 x->sval = (char *) makesymtab(NSYMTAB);
511 } else {
512 int bufsz = recsize;
513 char *buf;
514 if ((buf = (char *) malloc(bufsz)) == NULL)
515 FATAL("out of memory in adelete");
516 buf[0] = 0;
517 for (np = a[1]; np; np = np->nnext) {
518 y = execute(np); /* subscript */
519 s = getsval(y);
520 if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "awkdelete"))
521 FATAL("out of memory deleting %s[%s...]", x->nval, buf);
522 strcat(buf, s);
523 if (np->nnext)
524 strcat(buf, *SUBSEP);
525 tempfree(y);
527 freeelem(x, buf);
528 free(buf);
530 tempfree(x);
531 return True;
534 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
536 Cell *x, *ap, *k;
537 Node *p;
538 char *buf;
539 char *s;
540 int bufsz = recsize;
541 int nsub = strlen(*SUBSEP);
543 ap = execute(a[1]); /* array name */
544 if (!isarr(ap)) {
545 dprintf( ("making %s into an array\n", ap->nval) );
546 if (freeable(ap))
547 xfree(ap->sval);
548 ap->tval &= ~(STR|NUM|DONTFREE);
549 ap->tval |= ARR;
550 ap->sval = (char *) makesymtab(NSYMTAB);
552 if ((buf = (char *) malloc(bufsz)) == NULL) {
553 FATAL("out of memory in intest");
555 buf[0] = 0;
556 for (p = a[0]; p; p = p->nnext) {
557 x = execute(p); /* expr */
558 s = getsval(x);
559 if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "intest"))
560 FATAL("out of memory deleting %s[%s...]", x->nval, buf);
561 strcat(buf, s);
562 tempfree(x);
563 if (p->nnext)
564 strcat(buf, *SUBSEP);
566 k = lookup(buf, (Array *) ap->sval);
567 tempfree(ap);
568 free(buf);
569 if (k == NULL)
570 return(False);
571 else
572 return(True);
576 Cell *matchop(Node **a, int n) /* ~ and match() */
578 Cell *x, *y;
579 char *s, *t;
580 int i;
581 fa *pfa;
582 int (*mf)(fa *, const char *) = match, mode = 0;
584 if (n == MATCHFCN) {
585 mf = pmatch;
586 mode = 1;
588 x = execute(a[1]); /* a[1] = target text */
589 s = getsval(x);
590 if (a[0] == 0) /* a[1] == 0: already-compiled reg expr */
591 i = (*mf)((fa *) a[2], s);
592 else {
593 y = execute(a[2]); /* a[2] = regular expr */
594 t = getsval(y);
595 pfa = makedfa(t, mode);
596 i = (*mf)(pfa, s);
597 tempfree(y);
599 tempfree(x);
600 if (n == MATCHFCN) {
601 int start = patbeg - s + 1;
602 if (patlen < 0)
603 start = 0;
604 setfval(rstartloc, (Awkfloat) start);
605 setfval(rlengthloc, (Awkfloat) patlen);
606 x = gettemp();
607 x->tval = NUM;
608 x->fval = start;
609 return x;
610 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
611 return(True);
612 else
613 return(False);
617 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */
619 Cell *x, *y;
620 int i;
622 x = execute(a[0]);
623 i = istrue(x);
624 tempfree(x);
625 switch (n) {
626 case BOR:
627 if (i) return(True);
628 y = execute(a[1]);
629 i = istrue(y);
630 tempfree(y);
631 if (i) return(True);
632 else return(False);
633 case AND:
634 if ( !i ) return(False);
635 y = execute(a[1]);
636 i = istrue(y);
637 tempfree(y);
638 if (i) return(True);
639 else return(False);
640 case NOT:
641 if (i) return(False);
642 else return(True);
643 default: /* can't happen */
644 FATAL("unknown boolean operator %d", n);
646 return 0; /*NOTREACHED*/
649 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */
651 int i;
652 Cell *x, *y;
653 Awkfloat j;
655 x = execute(a[0]);
656 y = execute(a[1]);
657 if (x->tval&NUM && y->tval&NUM) {
658 j = x->fval - y->fval;
659 i = j<0? -1: (j>0? 1: 0);
660 } else {
661 i = strcmp(getsval(x), getsval(y));
663 tempfree(x);
664 tempfree(y);
665 switch (n) {
666 case LT: if (i<0) return(True);
667 else return(False);
668 case LE: if (i<=0) return(True);
669 else return(False);
670 case NE: if (i!=0) return(True);
671 else return(False);
672 case EQ: if (i == 0) return(True);
673 else return(False);
674 case GE: if (i>=0) return(True);
675 else return(False);
676 case GT: if (i>0) return(True);
677 else return(False);
678 default: /* can't happen */
679 FATAL("unknown relational operator %d", n);
681 return 0; /*NOTREACHED*/
684 void tfree(Cell *a) /* free a tempcell */
686 if (freeable(a)) {
687 dprintf( ("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval) );
688 xfree(a->sval);
690 if (a == tmps)
691 FATAL("tempcell list is curdled");
692 a->cnext = tmps;
693 tmps = a;
696 Cell *gettemp(void) /* get a tempcell */
697 { int i;
698 Cell *x;
700 if (!tmps) {
701 tmps = (Cell *) calloc(100, sizeof(Cell));
702 if (!tmps)
703 FATAL("out of space for temporaries");
704 for(i = 1; i < 100; i++)
705 tmps[i-1].cnext = &tmps[i];
706 tmps[i-1].cnext = 0;
708 x = tmps;
709 tmps = x->cnext;
710 *x = tempcell;
711 return(x);
714 Cell *indirect(Node **a, int n) /* $( a[0] ) */
716 Awkfloat val;
717 Cell *x;
718 int m;
719 char *s;
721 x = execute(a[0]);
722 val = getfval(x); /* freebsd: defend against super large field numbers */
723 if ((Awkfloat)INT_MAX < val)
724 FATAL("trying to access out of range field %s", x->nval);
725 m = (int) val;
726 if (m == 0 && !is_number(s = getsval(x))) /* suspicion! */
727 FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
728 /* BUG: can x->nval ever be null??? */
729 tempfree(x);
730 x = fieldadr(m);
731 x->ctype = OCELL; /* BUG? why are these needed? */
732 x->csub = CFLD;
733 return(x);
736 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
738 int k, m, n;
739 char *s;
740 int temp;
741 Cell *x, *y, *z = 0;
743 x = execute(a[0]);
744 y = execute(a[1]);
745 if (a[2] != 0)
746 z = execute(a[2]);
747 s = getsval(x);
748 k = strlen(s) + 1;
749 if (k <= 1) {
750 tempfree(x);
751 tempfree(y);
752 if (a[2] != 0) {
753 tempfree(z);
755 x = gettemp();
756 setsval(x, "");
757 return(x);
759 m = (int) getfval(y);
760 if (m <= 0)
761 m = 1;
762 else if (m > k)
763 m = k;
764 tempfree(y);
765 if (a[2] != 0) {
766 n = (int) getfval(z);
767 tempfree(z);
768 } else
769 n = k - 1;
770 if (n < 0)
771 n = 0;
772 else if (n > k - m)
773 n = k - m;
774 dprintf( ("substr: m=%d, n=%d, s=%s\n", m, n, s) );
775 y = gettemp();
776 temp = s[n+m-1]; /* with thanks to John Linderman */
777 s[n+m-1] = '\0';
778 setsval(y, s + m - 1);
779 s[n+m-1] = temp;
780 tempfree(x);
781 return(y);
784 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */
786 Cell *x, *y, *z;
787 char *s1, *s2, *p1, *p2, *q;
788 Awkfloat v = 0.0;
790 x = execute(a[0]);
791 s1 = getsval(x);
792 y = execute(a[1]);
793 s2 = getsval(y);
795 z = gettemp();
796 for (p1 = s1; *p1 != '\0'; p1++) {
797 for (q=p1, p2=s2; *p2 != '\0' && *q == *p2; q++, p2++)
799 if (*p2 == '\0') {
800 v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */
801 break;
804 tempfree(x);
805 tempfree(y);
806 setfval(z, v);
807 return(z);
810 #define MAXNUMSIZE 50
812 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */
814 char *fmt;
815 char *p, *t;
816 const char *os;
817 Cell *x;
818 int flag = 0, n;
819 int fmtwd; /* format width */
820 int fmtsz = recsize;
821 char *buf = *pbuf;
822 int bufsize = *pbufsize;
824 os = s;
825 p = buf;
826 if ((fmt = (char *) malloc(fmtsz)) == NULL)
827 FATAL("out of memory in format()");
828 while (*s) {
829 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1");
830 if (*s != '%') {
831 *p++ = *s++;
832 continue;
834 if (*(s+1) == '%') {
835 *p++ = '%';
836 s += 2;
837 continue;
839 /* have to be real careful in case this is a huge number, eg, %100000d */
840 fmtwd = atoi(s+1);
841 if (fmtwd < 0)
842 fmtwd = -fmtwd;
843 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
844 for (t = fmt; (*t++ = *s) != '\0'; s++) {
845 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
846 FATAL("format item %.30s... ran format() out of memory", os);
847 if (isalpha((uschar)*s) && *s != 'l' && *s != 'h' && *s != 'L')
848 break; /* the ansi panoply */
849 if (*s == '*') {
850 x = execute(a);
851 a = a->nnext;
852 sprintf(t-1, "%d", fmtwd=(int) getfval(x));
853 if (fmtwd < 0)
854 fmtwd = -fmtwd;
855 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
856 t = fmt + strlen(fmt);
857 tempfree(x);
860 *t = '\0';
861 if (fmtwd < 0)
862 fmtwd = -fmtwd;
863 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
865 switch (*s) {
866 case 'f': case 'e': case 'g': case 'E': case 'G':
867 flag = 'f';
868 break;
869 case 'd': case 'i':
870 flag = 'd';
871 if(*(s-1) == 'l') break;
872 *(t-1) = 'l';
873 *t = 'd';
874 *++t = '\0';
875 break;
876 case 'o': case 'x': case 'X': case 'u':
877 flag = *(s-1) == 'l' ? 'd' : 'u';
878 break;
879 case 's':
880 flag = 's';
881 break;
882 case 'c':
883 flag = 'c';
884 break;
885 default:
886 WARNING("weird printf conversion %s", fmt);
887 flag = '?';
888 break;
890 if (a == NULL)
891 FATAL("not enough args in printf(%s)", os);
892 x = execute(a);
893 a = a->nnext;
894 n = MAXNUMSIZE;
895 if (fmtwd > n)
896 n = fmtwd;
897 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
898 switch (flag) {
899 case '?': sprintf(p, "%s", fmt); /* unknown, so dump it too */
900 t = getsval(x);
901 n = strlen(t);
902 if (fmtwd > n)
903 n = fmtwd;
904 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
905 p += strlen(p);
906 sprintf(p, "%s", t);
907 break;
908 case 'f': sprintf(p, fmt, getfval(x)); break;
909 case 'd': sprintf(p, fmt, (long) getfval(x)); break;
910 case 'u': sprintf(p, fmt, (int) getfval(x)); break;
911 case 's':
912 t = getsval(x);
913 n = strlen(t);
914 if (fmtwd > n)
915 n = fmtwd;
916 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
917 FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t);
918 sprintf(p, fmt, t);
919 break;
920 case 'c':
921 if (isnum(x)) {
922 if (getfval(x))
923 sprintf(p, fmt, (int) getfval(x));
924 else {
925 *p++ = '\0'; /* explicit null byte */
926 *p = '\0'; /* next output will start here */
928 } else
929 sprintf(p, fmt, getsval(x)[0]);
930 break;
931 default:
932 FATAL("can't happen: bad conversion %c in format()", flag);
934 tempfree(x);
935 p += strlen(p);
936 s++;
938 *p = '\0';
939 free(fmt);
940 for ( ; a; a = a->nnext) /* evaluate any remaining args */
941 execute(a);
942 *pbuf = buf;
943 *pbufsize = bufsize;
944 return p - buf;
947 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */
949 Cell *x;
950 Node *y;
951 char *buf;
952 int bufsz=3*recsize;
954 if ((buf = (char *) malloc(bufsz)) == NULL)
955 FATAL("out of memory in awksprintf");
956 y = a[0]->nnext;
957 x = execute(a[0]);
958 if (format(&buf, &bufsz, getsval(x), y) == -1)
959 FATAL("sprintf string %.30s... too long. can't happen.", buf);
960 tempfree(x);
961 x = gettemp();
962 x->sval = buf;
963 x->tval = STR;
964 return(x);
967 Cell *awkprintf(Node **a, int n) /* printf */
968 { /* a[0] is list of args, starting with format string */
969 /* a[1] is redirection operator, a[2] is redirection file */
970 FILE *fp;
971 Cell *x;
972 Node *y;
973 char *buf;
974 int len;
975 int bufsz=3*recsize;
977 if ((buf = (char *) malloc(bufsz)) == NULL)
978 FATAL("out of memory in awkprintf");
979 y = a[0]->nnext;
980 x = execute(a[0]);
981 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
982 FATAL("printf string %.30s... too long. can't happen.", buf);
983 tempfree(x);
984 if (a[1] == NULL) {
985 /* fputs(buf, stdout); */
986 fwrite(buf, len, 1, stdout);
987 if (ferror(stdout))
988 FATAL("write error on stdout");
989 } else {
990 fp = redirect(ptoi(a[1]), a[2]);
991 /* fputs(buf, fp); */
992 fwrite(buf, len, 1, fp);
993 fflush(fp);
994 if (ferror(fp))
995 FATAL("write error on %s", filename(fp));
997 free(buf);
998 return(True);
1001 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */
1003 Awkfloat i, j = 0;
1004 double v;
1005 Cell *x, *y, *z;
1007 x = execute(a[0]);
1008 i = getfval(x);
1009 tempfree(x);
1010 if (n != UMINUS) {
1011 y = execute(a[1]);
1012 j = getfval(y);
1013 tempfree(y);
1015 z = gettemp();
1016 switch (n) {
1017 case ADD:
1018 i += j;
1019 break;
1020 case MINUS:
1021 i -= j;
1022 break;
1023 case MULT:
1024 i *= j;
1025 break;
1026 case DIVIDE:
1027 if (j == 0)
1028 FATAL("division by zero");
1029 i /= j;
1030 break;
1031 case MOD:
1032 if (j == 0)
1033 FATAL("division by zero in mod");
1034 modf(i/j, &v);
1035 i = i - j * v;
1036 break;
1037 case UMINUS:
1038 i = -i;
1039 break;
1040 case POWER:
1041 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */
1042 i = ipow(i, (int) j);
1043 else
1044 i = errcheck(pow(i, j), "pow");
1045 break;
1046 default: /* can't happen */
1047 FATAL("illegal arithmetic operator %d", n);
1049 setfval(z, i);
1050 return(z);
1053 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */
1055 double v;
1057 if (n <= 0)
1058 return 1;
1059 v = ipow(x, n/2);
1060 if (n % 2 == 0)
1061 return v * v;
1062 else
1063 return x * v * v;
1066 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */
1068 Cell *x, *z;
1069 int k;
1070 Awkfloat xf;
1072 x = execute(a[0]);
1073 xf = getfval(x);
1074 k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
1075 if (n == PREINCR || n == PREDECR) {
1076 setfval(x, xf + k);
1077 return(x);
1079 z = gettemp();
1080 setfval(z, xf);
1081 setfval(x, xf + k);
1082 tempfree(x);
1083 return(z);
1086 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */
1087 { /* this is subtle; don't muck with it. */
1088 Cell *x, *y;
1089 Awkfloat xf, yf;
1090 double v;
1092 y = execute(a[1]);
1093 x = execute(a[0]);
1094 if (n == ASSIGN) { /* ordinary assignment */
1095 if (x == y && !(x->tval & (FLD|REC))) /* self-assignment: */
1096 ; /* leave alone unless it's a field */
1097 else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1098 setsval(x, getsval(y));
1099 x->fval = getfval(y);
1100 x->tval |= NUM;
1102 else if (isstr(y))
1103 setsval(x, getsval(y));
1104 else if (isnum(y))
1105 setfval(x, getfval(y));
1106 else
1107 funnyvar(y, "read value of");
1108 tempfree(y);
1109 return(x);
1111 xf = getfval(x);
1112 yf = getfval(y);
1113 switch (n) {
1114 case ADDEQ:
1115 xf += yf;
1116 break;
1117 case SUBEQ:
1118 xf -= yf;
1119 break;
1120 case MULTEQ:
1121 xf *= yf;
1122 break;
1123 case DIVEQ:
1124 if (yf == 0)
1125 FATAL("division by zero in /=");
1126 xf /= yf;
1127 break;
1128 case MODEQ:
1129 if (yf == 0)
1130 FATAL("division by zero in %%=");
1131 modf(xf/yf, &v);
1132 xf = xf - yf * v;
1133 break;
1134 case POWEQ:
1135 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */
1136 xf = ipow(xf, (int) yf);
1137 else
1138 xf = errcheck(pow(xf, yf), "pow");
1139 break;
1140 default:
1141 FATAL("illegal assignment operator %d", n);
1142 break;
1144 tempfree(y);
1145 setfval(x, xf);
1146 return(x);
1149 Cell *cat(Node **a, int q) /* a[0] cat a[1] */
1151 Cell *x, *y, *z;
1152 int n1, n2;
1153 char *s;
1155 x = execute(a[0]);
1156 y = execute(a[1]);
1157 getsval(x);
1158 getsval(y);
1159 n1 = strlen(x->sval);
1160 n2 = strlen(y->sval);
1161 s = (char *) malloc(n1 + n2 + 1);
1162 if (s == NULL)
1163 FATAL("out of space concatenating %.15s... and %.15s...",
1164 x->sval, y->sval);
1165 strcpy(s, x->sval);
1166 strcpy(s+n1, y->sval);
1167 tempfree(x);
1168 tempfree(y);
1169 z = gettemp();
1170 z->sval = s;
1171 z->tval = STR;
1172 return(z);
1175 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */
1177 Cell *x;
1179 if (a[0] == 0)
1180 x = execute(a[1]);
1181 else {
1182 x = execute(a[0]);
1183 if (istrue(x)) {
1184 tempfree(x);
1185 x = execute(a[1]);
1188 return x;
1191 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */
1193 Cell *x;
1194 int pair;
1196 pair = ptoi(a[3]);
1197 if (pairstack[pair] == 0) {
1198 x = execute(a[0]);
1199 if (istrue(x))
1200 pairstack[pair] = 1;
1201 tempfree(x);
1203 if (pairstack[pair] == 1) {
1204 x = execute(a[1]);
1205 if (istrue(x))
1206 pairstack[pair] = 0;
1207 tempfree(x);
1208 x = execute(a[2]);
1209 return(x);
1211 return(False);
1214 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
1216 Cell *x = 0, *y, *ap;
1217 char *s;
1218 int sep;
1219 char *t, temp, num[50], *fs = 0;
1220 int n, tempstat, arg3type;
1222 y = execute(a[0]); /* source string */
1223 s = getsval(y);
1224 arg3type = ptoi(a[3]);
1225 if (a[2] == 0) /* fs string */
1226 fs = *FS;
1227 else if (arg3type == STRING) { /* split(str,arr,"string") */
1228 x = execute(a[2]);
1229 fs = getsval(x);
1230 } else if (arg3type == REGEXPR)
1231 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */
1232 else
1233 FATAL("illegal type of split");
1234 sep = *fs;
1235 ap = execute(a[1]); /* array name */
1236 freesymtab(ap);
1237 dprintf( ("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs) );
1238 ap->tval &= ~STR;
1239 ap->tval |= ARR;
1240 ap->sval = (char *) makesymtab(NSYMTAB);
1242 n = 0;
1243 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) {
1244 /* split(s, a, //); have to arrange that it looks like empty sep */
1245 arg3type = 0;
1246 fs = "";
1247 sep = 0;
1249 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */
1250 fa *pfa;
1251 if (arg3type == REGEXPR) { /* it's ready already */
1252 pfa = (fa *) a[2];
1253 } else {
1254 pfa = makedfa(fs, 1);
1256 if (nematch(pfa,s)) {
1257 tempstat = pfa->initstat;
1258 pfa->initstat = 2;
1259 do {
1260 n++;
1261 sprintf(num, "%d", n);
1262 temp = *patbeg;
1263 *patbeg = '\0';
1264 if (is_number(s))
1265 setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval);
1266 else
1267 setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1268 *patbeg = temp;
1269 s = patbeg + patlen;
1270 if (*(patbeg+patlen-1) == 0 || *s == 0) {
1271 n++;
1272 sprintf(num, "%d", n);
1273 setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
1274 pfa->initstat = tempstat;
1275 goto spdone;
1277 } while (nematch(pfa,s));
1278 pfa->initstat = tempstat; /* bwk: has to be here to reset */
1279 /* cf gsub and refldbld */
1281 n++;
1282 sprintf(num, "%d", n);
1283 if (is_number(s))
1284 setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval);
1285 else
1286 setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1287 spdone:
1288 pfa = NULL;
1289 } else if (sep == ' ') {
1290 for (n = 0; ; ) {
1291 while (*s == ' ' || *s == '\t' || *s == '\n')
1292 s++;
1293 if (*s == 0)
1294 break;
1295 n++;
1296 t = s;
1298 s++;
1299 while (*s!=' ' && *s!='\t' && *s!='\n' && *s!='\0');
1300 temp = *s;
1301 *s = '\0';
1302 sprintf(num, "%d", n);
1303 if (is_number(t))
1304 setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
1305 else
1306 setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1307 *s = temp;
1308 if (*s != 0)
1309 s++;
1311 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */
1312 for (n = 0; *s != 0; s++) {
1313 char buf[2];
1314 n++;
1315 sprintf(num, "%d", n);
1316 buf[0] = *s;
1317 buf[1] = 0;
1318 if (isdigit((uschar)buf[0]))
1319 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
1320 else
1321 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
1323 } else if (*s != 0) {
1324 for (;;) {
1325 n++;
1326 t = s;
1327 while (*s != sep && *s != '\n' && *s != '\0')
1328 s++;
1329 temp = *s;
1330 *s = '\0';
1331 sprintf(num, "%d", n);
1332 if (is_number(t))
1333 setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
1334 else
1335 setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1336 *s = temp;
1337 if (*s++ == 0)
1338 break;
1341 tempfree(ap);
1342 tempfree(y);
1343 if (a[2] != 0 && arg3type == STRING) {
1344 tempfree(x);
1346 x = gettemp();
1347 x->tval = NUM;
1348 x->fval = n;
1349 return(x);
1352 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */
1354 Cell *x;
1356 x = execute(a[0]);
1357 if (istrue(x)) {
1358 tempfree(x);
1359 x = execute(a[1]);
1360 } else {
1361 tempfree(x);
1362 x = execute(a[2]);
1364 return(x);
1367 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */
1369 Cell *x;
1371 x = execute(a[0]);
1372 if (istrue(x)) {
1373 tempfree(x);
1374 x = execute(a[1]);
1375 } else if (a[2] != 0) {
1376 tempfree(x);
1377 x = execute(a[2]);
1379 return(x);
1382 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */
1384 Cell *x;
1386 for (;;) {
1387 x = execute(a[0]);
1388 if (!istrue(x))
1389 return(x);
1390 tempfree(x);
1391 x = execute(a[1]);
1392 if (isbreak(x)) {
1393 x = True;
1394 return(x);
1396 if (isnext(x) || isexit(x) || isret(x))
1397 return(x);
1398 tempfree(x);
1402 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */
1404 Cell *x;
1406 for (;;) {
1407 x = execute(a[0]);
1408 if (isbreak(x))
1409 return True;
1410 if (isnext(x) || isexit(x) || isret(x))
1411 return(x);
1412 tempfree(x);
1413 x = execute(a[1]);
1414 if (!istrue(x))
1415 return(x);
1416 tempfree(x);
1420 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */
1422 Cell *x;
1424 x = execute(a[0]);
1425 tempfree(x);
1426 for (;;) {
1427 if (a[1]!=0) {
1428 x = execute(a[1]);
1429 if (!istrue(x)) return(x);
1430 else tempfree(x);
1432 x = execute(a[3]);
1433 if (isbreak(x)) /* turn off break */
1434 return True;
1435 if (isnext(x) || isexit(x) || isret(x))
1436 return(x);
1437 tempfree(x);
1438 x = execute(a[2]);
1439 tempfree(x);
1443 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */
1445 Cell *x, *vp, *arrayp, *cp, *ncp;
1446 Array *tp;
1447 int i;
1449 vp = execute(a[0]);
1450 arrayp = execute(a[1]);
1451 if (!isarr(arrayp)) {
1452 return True;
1454 tp = (Array *) arrayp->sval;
1455 tempfree(arrayp);
1456 for (i = 0; i < tp->size; i++) { /* this routine knows too much */
1457 for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
1458 setsval(vp, cp->nval);
1459 ncp = cp->cnext;
1460 x = execute(a[2]);
1461 if (isbreak(x)) {
1462 tempfree(vp);
1463 return True;
1465 if (isnext(x) || isexit(x) || isret(x)) {
1466 tempfree(vp);
1467 return(x);
1469 tempfree(x);
1472 return True;
1475 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */
1477 Cell *x, *y;
1478 Awkfloat u;
1479 int t;
1480 Awkfloat tmp;
1481 char *p, *buf;
1482 Node *nextarg;
1483 FILE *fp;
1484 void flush_all(void);
1486 t = ptoi(a[0]);
1487 x = execute(a[1]);
1488 nextarg = a[1]->nnext;
1489 switch (t) {
1490 case FLENGTH:
1491 if (isarr(x))
1492 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/
1493 else
1494 u = strlen(getsval(x));
1495 break;
1496 case FLOG:
1497 u = errcheck(log(getfval(x)), "log"); break;
1498 case FINT:
1499 modf(getfval(x), &u); break;
1500 case FEXP:
1501 u = errcheck(exp(getfval(x)), "exp"); break;
1502 case FSQRT:
1503 u = errcheck(sqrt(getfval(x)), "sqrt"); break;
1504 case FSIN:
1505 u = sin(getfval(x)); break;
1506 case FCOS:
1507 u = cos(getfval(x)); break;
1508 case FATAN:
1509 if (nextarg == 0) {
1510 WARNING("atan2 requires two arguments; returning 1.0");
1511 u = 1.0;
1512 } else {
1513 y = execute(a[1]->nnext);
1514 u = atan2(getfval(x), getfval(y));
1515 tempfree(y);
1516 nextarg = nextarg->nnext;
1518 break;
1519 case FSYSTEM:
1520 fflush(stdout); /* in case something is buffered already */
1521 u = (Awkfloat) system(getsval(x)) / 256; /* 256 is unix-dep */
1522 break;
1523 case FRAND:
1524 /* in principle, rand() returns something in 0..RAND_MAX */
1525 u = (Awkfloat) (rand() % RAND_MAX) / RAND_MAX;
1526 break;
1527 case FSRAND:
1528 if (isrec(x)) /* no argument provided */
1529 u = time((time_t *)0);
1530 else
1531 u = getfval(x);
1532 tmp = u;
1533 srand((unsigned int) u);
1534 u = srand_seed;
1535 srand_seed = tmp;
1536 break;
1537 case FTOUPPER:
1538 case FTOLOWER:
1539 buf = tostring(getsval(x));
1540 if (t == FTOUPPER) {
1541 for (p = buf; *p; p++)
1542 if (islower((uschar) *p))
1543 *p = toupper((uschar)*p);
1544 } else {
1545 for (p = buf; *p; p++)
1546 if (isupper((uschar) *p))
1547 *p = tolower((uschar)*p);
1549 tempfree(x);
1550 x = gettemp();
1551 setsval(x, buf);
1552 free(buf);
1553 return x;
1554 case FFLUSH:
1555 if (isrec(x) || strlen(getsval(x)) == 0) {
1556 flush_all(); /* fflush() or fflush("") -> all */
1557 u = 0;
1558 } else if ((fp = openfile(FFLUSH, getsval(x))) == NULL)
1559 u = EOF;
1560 else
1561 u = fflush(fp);
1562 break;
1563 default: /* can't happen */
1564 if ((y = bio_func(t, x, a)) != 0) {
1565 tempfree(x);
1566 return y;
1568 FATAL("illegal function type %d", t);
1569 break;
1571 tempfree(x);
1572 x = gettemp();
1573 setfval(x, u);
1574 if (nextarg != 0) {
1575 WARNING("warning: function has too many arguments");
1576 for ( ; nextarg; nextarg = nextarg->nnext)
1577 execute(nextarg);
1579 return(x);
1582 Cell *printstat(Node **a, int n) /* print a[0] */
1584 Node *x;
1585 Cell *y;
1586 FILE *fp;
1588 if (a[1] == 0) /* a[1] is redirection operator, a[2] is file */
1589 fp = stdout;
1590 else
1591 fp = redirect(ptoi(a[1]), a[2]);
1592 for (x = a[0]; x != NULL; x = x->nnext) {
1593 y = execute(x);
1594 fputs(getpssval(y), fp);
1595 tempfree(y);
1596 if (x->nnext == NULL)
1597 fputs(*ORS, fp);
1598 else
1599 fputs(*OFS, fp);
1601 if (a[1] != 0)
1602 fflush(fp);
1603 if (ferror(fp))
1604 FATAL("write error on %s", filename(fp));
1605 return(True);
1608 Cell *nullproc(Node **a, int n)
1610 n = n;
1611 a = a;
1612 return 0;
1616 FILE *redirect(int a, Node *b) /* set up all i/o redirections */
1618 FILE *fp;
1619 Cell *x;
1620 char *fname;
1622 x = execute(b);
1623 fname = getsval(x);
1624 fp = openfile(a, fname);
1625 if (fp == NULL)
1626 FATAL("can't open file %s", fname);
1627 tempfree(x);
1628 return fp;
1631 struct files {
1632 FILE *fp;
1633 const char *fname;
1634 int mode; /* '|', 'a', 'w' => LE/LT, GT */
1635 } *files;
1637 int nfiles;
1639 void stdinit(void) /* in case stdin, etc., are not constants */
1641 nfiles = FOPEN_MAX;
1642 files = calloc(nfiles, sizeof(*files));
1643 if (files == NULL)
1644 FATAL("can't allocate file memory for %u files", nfiles);
1645 files[0].fp = stdin;
1646 files[0].fname = "/dev/stdin";
1647 files[0].mode = LT;
1648 files[1].fp = stdout;
1649 files[1].fname = "/dev/stdout";
1650 files[1].mode = GT;
1651 files[2].fp = stderr;
1652 files[2].fname = "/dev/stderr";
1653 files[2].mode = GT;
1656 FILE *openfile(int a, const char *us)
1658 const char *s = us;
1659 int i, m;
1660 FILE *fp = 0;
1662 if (*s == '\0')
1663 FATAL("null file name in print or getline");
1664 for (i=0; i < nfiles; i++)
1665 if (files[i].fname && strcmp(s, files[i].fname) == 0) {
1666 if (a == files[i].mode || (a==APPEND && files[i].mode==GT))
1667 return files[i].fp;
1668 if (a == FFLUSH)
1669 return files[i].fp;
1671 if (a == FFLUSH) /* didn't find it, so don't create it! */
1672 return NULL;
1674 for (i=0; i < nfiles; i++)
1675 if (files[i].fp == 0)
1676 break;
1677 if (i >= nfiles) {
1678 struct files *nf;
1679 int nnf = nfiles + FOPEN_MAX;
1680 nf = realloc(files, nnf * sizeof(*nf));
1681 if (nf == NULL)
1682 FATAL("cannot grow files for %s and %d files", s, nnf);
1683 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
1684 nfiles = nnf;
1685 files = nf;
1687 fflush(stdout); /* force a semblance of order */
1688 m = a;
1689 if (a == GT) {
1690 fp = fopen(s, "w");
1691 } else if (a == APPEND) {
1692 fp = fopen(s, "a");
1693 m = GT; /* so can mix > and >> */
1694 } else if (a == '|') { /* output pipe */
1695 fp = popen(s, "w");
1696 } else if (a == LE) { /* input pipe */
1697 fp = popen(s, "r");
1698 } else if (a == LT) { /* getline <file */
1699 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */
1700 } else /* can't happen */
1701 FATAL("illegal redirection %d", a);
1702 if (fp != NULL) {
1703 files[i].fname = tostring(s);
1704 files[i].fp = fp;
1705 files[i].mode = m;
1707 return fp;
1710 const char *filename(FILE *fp)
1712 int i;
1714 for (i = 0; i < nfiles; i++)
1715 if (fp == files[i].fp)
1716 return files[i].fname;
1717 return "???";
1720 Cell *closefile(Node **a, int n)
1722 Cell *x;
1723 int i, stat;
1725 n = n;
1726 x = execute(a[0]);
1727 getsval(x);
1728 stat = -1;
1729 for (i = 0; i < nfiles; i++) {
1730 if (files[i].fname && strcmp(x->sval, files[i].fname) == 0) {
1731 if (ferror(files[i].fp))
1732 WARNING( "i/o error occurred on %s", files[i].fname );
1733 if (files[i].mode == '|' || files[i].mode == LE)
1734 stat = pclose(files[i].fp);
1735 else
1736 stat = fclose(files[i].fp);
1737 if (stat == EOF)
1738 WARNING( "i/o error occurred closing %s", files[i].fname );
1739 if (i > 2) /* don't do /dev/std... */
1740 xfree(files[i].fname);
1741 files[i].fname = NULL; /* watch out for ref thru this */
1742 files[i].fp = NULL;
1745 tempfree(x);
1746 x = gettemp();
1747 setfval(x, (Awkfloat) stat);
1748 return(x);
1751 void closeall(void)
1753 int i, stat;
1755 for (i = 0; i < FOPEN_MAX; i++) {
1756 if (files[i].fp) {
1757 if (ferror(files[i].fp))
1758 WARNING( "i/o error occurred on %s", files[i].fname );
1759 if (files[i].mode == '|' || files[i].mode == LE)
1760 stat = pclose(files[i].fp);
1761 else
1762 stat = fclose(files[i].fp);
1763 if (stat == EOF)
1764 WARNING( "i/o error occurred while closing %s", files[i].fname );
1769 void flush_all(void)
1771 int i;
1773 for (i = 0; i < nfiles; i++)
1774 if (files[i].fp)
1775 fflush(files[i].fp);
1778 void backsub(char **pb_ptr, char **sptr_ptr);
1780 Cell *sub(Node **a, int nnn) /* substitute command */
1782 char *sptr, *pb, *q;
1783 Cell *x, *y, *result;
1784 char *t, *buf;
1785 fa *pfa;
1786 int bufsz = recsize;
1788 if ((buf = (char *) malloc(bufsz)) == NULL)
1789 FATAL("out of memory in sub");
1790 x = execute(a[3]); /* target string */
1791 t = getsval(x);
1792 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
1793 pfa = (fa *) a[1]; /* regular expression */
1794 else {
1795 y = execute(a[1]);
1796 pfa = makedfa(getsval(y), 1);
1797 tempfree(y);
1799 y = execute(a[2]); /* replacement string */
1800 result = False;
1801 if (pmatch(pfa, t)) {
1802 sptr = t;
1803 adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub");
1804 pb = buf;
1805 while (sptr < patbeg)
1806 *pb++ = *sptr++;
1807 sptr = getsval(y);
1808 while (*sptr != 0) {
1809 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub");
1810 if (*sptr == '\\') {
1811 backsub(&pb, &sptr);
1812 } else if (*sptr == '&') {
1813 sptr++;
1814 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub");
1815 for (q = patbeg; q < patbeg+patlen; )
1816 *pb++ = *q++;
1817 } else
1818 *pb++ = *sptr++;
1820 *pb = '\0';
1821 if (pb > buf + bufsz)
1822 FATAL("sub result1 %.30s too big; can't happen", buf);
1823 sptr = patbeg + patlen;
1824 if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) {
1825 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub");
1826 while ((*pb++ = *sptr++) != 0)
1829 if (pb > buf + bufsz)
1830 FATAL("sub result2 %.30s too big; can't happen", buf);
1831 setsval(x, buf); /* BUG: should be able to avoid copy */
1832 result = True;;
1834 tempfree(x);
1835 tempfree(y);
1836 free(buf);
1837 return result;
1840 Cell *gsub(Node **a, int nnn) /* global substitute */
1842 Cell *x, *y;
1843 char *rptr, *sptr, *t, *pb, *q;
1844 char *buf;
1845 fa *pfa;
1846 int mflag, tempstat, num;
1847 int bufsz = recsize;
1849 if ((buf = (char *) malloc(bufsz)) == NULL)
1850 FATAL("out of memory in gsub");
1851 mflag = 0; /* if mflag == 0, can replace empty string */
1852 num = 0;
1853 x = execute(a[3]); /* target string */
1854 t = getsval(x);
1855 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
1856 pfa = (fa *) a[1]; /* regular expression */
1857 else {
1858 y = execute(a[1]);
1859 pfa = makedfa(getsval(y), 1);
1860 tempfree(y);
1862 y = execute(a[2]); /* replacement string */
1863 if (pmatch(pfa, t)) {
1864 tempstat = pfa->initstat;
1865 pfa->initstat = 2;
1866 pb = buf;
1867 rptr = getsval(y);
1868 do {
1869 if (patlen == 0 && *patbeg != 0) { /* matched empty string */
1870 if (mflag == 0) { /* can replace empty */
1871 num++;
1872 sptr = rptr;
1873 while (*sptr != 0) {
1874 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
1875 if (*sptr == '\\') {
1876 backsub(&pb, &sptr);
1877 } else if (*sptr == '&') {
1878 sptr++;
1879 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
1880 for (q = patbeg; q < patbeg+patlen; )
1881 *pb++ = *q++;
1882 } else
1883 *pb++ = *sptr++;
1886 if (*t == 0) /* at end */
1887 goto done;
1888 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub");
1889 *pb++ = *t++;
1890 if (pb > buf + bufsz) /* BUG: not sure of this test */
1891 FATAL("gsub result0 %.30s too big; can't happen", buf);
1892 mflag = 0;
1894 else { /* matched nonempty string */
1895 num++;
1896 sptr = t;
1897 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub");
1898 while (sptr < patbeg)
1899 *pb++ = *sptr++;
1900 sptr = rptr;
1901 while (*sptr != 0) {
1902 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
1903 if (*sptr == '\\') {
1904 backsub(&pb, &sptr);
1905 } else if (*sptr == '&') {
1906 sptr++;
1907 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
1908 for (q = patbeg; q < patbeg+patlen; )
1909 *pb++ = *q++;
1910 } else
1911 *pb++ = *sptr++;
1913 t = patbeg + patlen;
1914 if (patlen == 0 || *t == 0 || *(t-1) == 0)
1915 goto done;
1916 if (pb > buf + bufsz)
1917 FATAL("gsub result1 %.30s too big; can't happen", buf);
1918 mflag = 1;
1920 } while (pmatch(pfa,t));
1921 sptr = t;
1922 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub");
1923 while ((*pb++ = *sptr++) != 0)
1925 done: if (pb < buf + bufsz)
1926 *pb = '\0';
1927 else if (*(pb-1) != '\0')
1928 FATAL("gsub result2 %.30s truncated; can't happen", buf);
1929 setsval(x, buf); /* BUG: should be able to avoid copy + free */
1930 pfa->initstat = tempstat;
1932 tempfree(x);
1933 tempfree(y);
1934 x = gettemp();
1935 x->tval = NUM;
1936 x->fval = num;
1937 free(buf);
1938 return(x);
1941 void backsub(char **pb_ptr, char **sptr_ptr) /* handle \\& variations */
1942 { /* sptr[0] == '\\' */
1943 char *pb = *pb_ptr, *sptr = *sptr_ptr;
1945 if (sptr[1] == '\\') {
1946 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
1947 *pb++ = '\\';
1948 *pb++ = '&';
1949 sptr += 4;
1950 } else if (sptr[2] == '&') { /* \\& -> \ + matched */
1951 *pb++ = '\\';
1952 sptr += 2;
1953 } else { /* \\x -> \\x */
1954 *pb++ = *sptr++;
1955 *pb++ = *sptr++;
1957 } else if (sptr[1] == '&') { /* literal & */
1958 sptr++;
1959 *pb++ = *sptr++;
1960 } else /* literal \ */
1961 *pb++ = *sptr++;
1963 *pb_ptr = pb;
1964 *sptr_ptr = sptr;