modified: src1/input.c
[GalaxyCodeBases.git] / tools / bioawk / lib.c
blob85356391e393f9b5cf2e0e02879be5fb4672b56c
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
25 #define DEBUG
26 #include <stdio.h>
27 #include <string.h>
28 #include <ctype.h>
29 #include <errno.h>
30 #include <stdlib.h>
31 #include <stdarg.h>
32 #include "awk.h"
33 #include "ytab.h"
35 FILE *infile = NULL;
36 char *file = "";
37 char *record;
38 int recsize = RECSIZE;
39 char *fields;
40 int fieldssize = RECSIZE;
42 Cell **fldtab; /* pointers to Cells */
43 char inputFS[100] = " ";
45 #define MAXFLD 2
46 int nfields = MAXFLD; /* last allocated slot for $i */
48 int donefld; /* 1 = implies rec broken into fields */
49 int donerec; /* 1 = record is valid (no flds have changed) */
51 int lastfld = 0; /* last used field */
52 int argno = 1; /* current input argument number */
53 extern Awkfloat *ARGC;
55 static Cell dollar0 = { OCELL, CFLD, NULL, "", 0.0, REC|STR|DONTFREE };
56 static Cell dollar1 = { OCELL, CFLD, NULL, "", 0.0, FLD|STR|DONTFREE };
58 void recinit(unsigned int n)
60 if ( (record = (char *) malloc(n)) == NULL
61 || (fields = (char *) malloc(n+1)) == NULL
62 || (fldtab = (Cell **) malloc((nfields+1) * sizeof(Cell *))) == NULL
63 || (fldtab[0] = (Cell *) malloc(sizeof(Cell))) == NULL )
64 FATAL("out of space for $0 and fields");
65 *fldtab[0] = dollar0;
66 fldtab[0]->sval = record;
67 fldtab[0]->nval = tostring("0");
68 makefields(1, nfields);
71 void makefields(int n1, int n2) /* create $n1..$n2 inclusive */
73 char temp[50];
74 int i;
76 for (i = n1; i <= n2; i++) {
77 fldtab[i] = (Cell *) malloc(sizeof (struct Cell));
78 if (fldtab[i] == NULL)
79 FATAL("out of space in makefields %d", i);
80 *fldtab[i] = dollar1;
81 sprintf(temp, "%d", i);
82 fldtab[i]->nval = tostring(temp);
86 void initgetrec(void)
88 int i;
89 char *p;
91 for (i = 1; i < *ARGC; i++) {
92 p = getargv(i); /* find 1st real filename */
93 if (p == NULL || *p == '\0') { /* deleted or zapped */
94 argno++;
95 continue;
97 if (!isclvar(p)) {
98 setsval(lookup("FILENAME", symtab), p);
99 return;
101 setclvar(p); /* a commandline assignment before filename */
102 argno++;
104 infile = stdin; /* no filenames, so use stdin */
107 static int firsttime = 1;
109 int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */
110 { /* note: cares whether buf == record */
111 int c;
112 char *buf = *pbuf;
113 uschar saveb0;
114 int bufsize = *pbufsize, savebufsize = bufsize;
116 if (bio_fmt != BIO_NULL) return bio_getrec(pbuf, pbufsize, isrecord);
118 if (firsttime) {
119 firsttime = 0;
120 initgetrec();
122 dprintf( ("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
123 *RS, *FS, *ARGC, *FILENAME) );
124 if (isrecord) {
125 donefld = 0;
126 donerec = 1;
128 saveb0 = buf[0];
129 buf[0] = 0;
130 while (argno < *ARGC || infile == stdin) {
131 dprintf( ("argno=%d, file=|%s|\n", argno, file) );
132 if (infile == NULL) { /* have to open a new file */
133 file = getargv(argno);
134 if (file == NULL || *file == '\0') { /* deleted or zapped */
135 argno++;
136 continue;
138 if (isclvar(file)) { /* a var=value arg */
139 setclvar(file);
140 argno++;
141 continue;
143 *FILENAME = file;
144 dprintf( ("opening file %s\n", file) );
145 if (*file == '-' && *(file+1) == '\0')
146 infile = stdin;
147 else if ((infile = fopen(file, "r")) == NULL)
148 FATAL("can't open file %s", file);
149 setfval(fnrloc, 0.0);
151 c = readrec(&buf, &bufsize, infile);
152 if (c != 0 || buf[0] != '\0') { /* normal record */
153 if (isrecord) {
154 if (freeable(fldtab[0]))
155 xfree(fldtab[0]->sval);
156 fldtab[0]->sval = buf; /* buf == record */
157 fldtab[0]->tval = REC | STR | DONTFREE;
158 if (is_number(fldtab[0]->sval)) {
159 fldtab[0]->fval = atof(fldtab[0]->sval);
160 fldtab[0]->tval |= NUM;
163 setfval(nrloc, nrloc->fval+1);
164 setfval(fnrloc, fnrloc->fval+1);
165 *pbuf = buf;
166 *pbufsize = bufsize;
167 return 1;
169 /* EOF arrived on this file; set up next */
170 if (infile != stdin)
171 fclose(infile);
172 infile = NULL;
173 argno++;
175 buf[0] = saveb0;
176 *pbuf = buf;
177 *pbufsize = savebufsize;
178 return 0; /* true end of file */
181 void nextfile(void)
183 if (infile != NULL && infile != stdin)
184 fclose(infile);
185 infile = NULL;
186 argno++;
189 int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf */
191 int sep, c;
192 char *rr, *buf = *pbuf;
193 int bufsize = *pbufsize;
195 if (strlen(*FS) >= sizeof(inputFS))
196 FATAL("field separator %.10s... is too long", *FS);
197 /*fflush(stdout); avoids some buffering problem but makes it 25% slower*/
198 strcpy(inputFS, *FS); /* for subsequent field splitting */
199 if ((sep = **RS) == 0) {
200 sep = '\n';
201 while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
203 if (c != EOF)
204 ungetc(c, inf);
206 for (rr = buf; ; ) {
207 for (; (c=getc(inf)) != sep && c != EOF; ) {
208 if (rr-buf+1 > bufsize)
209 if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 1"))
210 FATAL("input record `%.30s...' too long", buf);
211 *rr++ = c;
213 if (**RS == sep || c == EOF)
214 break;
215 if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
216 break;
217 if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr, "readrec 2"))
218 FATAL("input record `%.30s...' too long", buf);
219 *rr++ = '\n';
220 *rr++ = c;
222 if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
223 FATAL("input record `%.30s...' too long", buf);
224 *rr = 0;
225 dprintf( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) );
226 *pbuf = buf;
227 *pbufsize = bufsize;
228 return c == EOF && rr == buf ? 0 : 1;
231 char *getargv(int n) /* get ARGV[n] */
233 Cell *x;
234 char *s, temp[50];
235 extern Array *ARGVtab;
237 sprintf(temp, "%d", n);
238 if (lookup(temp, ARGVtab) == NULL)
239 return NULL;
240 x = setsymtab(temp, "", 0.0, STR, ARGVtab);
241 s = getsval(x);
242 dprintf( ("getargv(%d) returns |%s|\n", n, s) );
243 return s;
246 void setclvar(char *s) /* set var=value from s */
248 char *p;
249 Cell *q;
251 for (p=s; *p != '='; p++)
253 *p++ = 0;
254 p = qstring(p, '\0');
255 q = setsymtab(s, p, 0.0, STR, symtab);
256 setsval(q, p);
257 if (is_number(q->sval)) {
258 q->fval = atof(q->sval);
259 q->tval |= NUM;
261 dprintf( ("command line set %s to |%s|\n", s, p) );
265 void fldbld(void) /* create fields from current record */
267 /* this relies on having fields[] the same length as $0 */
268 /* the fields are all stored in this one array with \0's */
269 /* possibly with a final trailing \0 not associated with any field */
270 char *r, *fr, sep;
271 Cell *p;
272 int i, j, n;
274 if (donefld)
275 return;
276 if (!isstr(fldtab[0]))
277 getsval(fldtab[0]);
278 r = fldtab[0]->sval;
279 n = strlen(r);
280 if (n > fieldssize) {
281 xfree(fields);
282 if ((fields = (char *) malloc(n+2)) == NULL) /* possibly 2 final \0s */
283 FATAL("out of space for fields in fldbld %d", n);
284 fieldssize = n;
286 fr = fields;
287 i = 0; /* number of fields accumulated here */
288 strcpy(inputFS, *FS);
289 if (strlen(inputFS) > 1) { /* it's a regular expression */
290 i = refldbld(r, inputFS);
291 } else if ((sep = *inputFS) == ' ') { /* default whitespace */
292 for (i = 0; ; ) {
293 while (*r == ' ' || *r == '\t' || *r == '\n')
294 r++;
295 if (*r == 0)
296 break;
297 i++;
298 if (i > nfields)
299 growfldtab(i);
300 if (freeable(fldtab[i]))
301 xfree(fldtab[i]->sval);
302 fldtab[i]->sval = fr;
303 fldtab[i]->tval = FLD | STR | DONTFREE;
305 *fr++ = *r++;
306 while (*r != ' ' && *r != '\t' && *r != '\n' && *r != '\0');
307 *fr++ = 0;
309 *fr = 0;
310 } else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */
311 for (i = 0; *r != 0; r++) {
312 char buf[2];
313 i++;
314 if (i > nfields)
315 growfldtab(i);
316 if (freeable(fldtab[i]))
317 xfree(fldtab[i]->sval);
318 buf[0] = *r;
319 buf[1] = 0;
320 fldtab[i]->sval = tostring(buf);
321 fldtab[i]->tval = FLD | STR;
323 *fr = 0;
324 } else if (*r != 0) { /* if 0, it's a null field */
325 /* subtlecase : if length(FS) == 1 && length(RS > 0)
326 * \n is NOT a field separator (cf awk book 61,84).
327 * this variable is tested in the inner while loop.
329 int rtest = '\n'; /* normal case */
330 if (strlen(*RS) > 0)
331 rtest = '\0';
332 for (;;) {
333 i++;
334 if (i > nfields)
335 growfldtab(i);
336 if (freeable(fldtab[i]))
337 xfree(fldtab[i]->sval);
338 fldtab[i]->sval = fr;
339 fldtab[i]->tval = FLD | STR | DONTFREE;
340 while (*r != sep && *r != rtest && *r != '\0') /* \n is always a separator */
341 *fr++ = *r++;
342 *fr++ = 0;
343 if (*r++ == 0)
344 break;
346 *fr = 0;
348 if (i > nfields)
349 FATAL("record `%.30s...' has too many fields; can't happen", r);
350 cleanfld(i+1, lastfld); /* clean out junk from previous record */
351 lastfld = i;
352 donefld = 1;
353 for (j = 1; j <= lastfld; j++) {
354 p = fldtab[j];
355 if(is_number(p->sval)) {
356 p->fval = atof(p->sval);
357 p->tval |= NUM;
360 setfval(nfloc, (Awkfloat) lastfld);
361 if (dbg) {
362 for (j = 0; j <= lastfld; j++) {
363 p = fldtab[j];
364 printf("field %d (%s): |%s|\n", j, p->nval, p->sval);
369 void cleanfld(int n1, int n2) /* clean out fields n1 .. n2 inclusive */
370 { /* nvals remain intact */
371 Cell *p;
372 int i;
374 for (i = n1; i <= n2; i++) {
375 p = fldtab[i];
376 if (freeable(p))
377 xfree(p->sval);
378 p->sval = "";
379 p->tval = FLD | STR | DONTFREE;
383 void newfld(int n) /* add field n after end of existing lastfld */
385 if (n > nfields)
386 growfldtab(n);
387 cleanfld(lastfld+1, n);
388 lastfld = n;
389 setfval(nfloc, (Awkfloat) n);
392 Cell *fieldadr(int n) /* get nth field */
394 if (n < 0)
395 FATAL("trying to access out of range field %d", n);
396 if (n > nfields) /* fields after NF are empty */
397 growfldtab(n); /* but does not increase NF */
398 return(fldtab[n]);
401 void growfldtab(int n) /* make new fields up to at least $n */
403 int nf = 2 * nfields;
404 size_t s;
406 if (n > nf)
407 nf = n;
408 s = (nf+1) * (sizeof (struct Cell *)); /* freebsd: how much do we need? */
409 if (s / sizeof(struct Cell *) - 1 == nf) /* didn't overflow */
410 fldtab = (Cell **) realloc(fldtab, s);
411 else /* overflow sizeof int */
412 xfree(fldtab); /* make it null */
413 if (fldtab == NULL)
414 FATAL("out of space creating %d fields", nf);
415 makefields(nfields+1, nf);
416 nfields = nf;
419 int refldbld(const char *rec, const char *fs) /* build fields from reg expr in FS */
421 /* this relies on having fields[] the same length as $0 */
422 /* the fields are all stored in this one array with \0's */
423 char *fr;
424 int i, tempstat, n;
425 fa *pfa;
427 n = strlen(rec);
428 if (n > fieldssize) {
429 xfree(fields);
430 if ((fields = (char *) malloc(n+1)) == NULL)
431 FATAL("out of space for fields in refldbld %d", n);
432 fieldssize = n;
434 fr = fields;
435 *fr = '\0';
436 if (*rec == '\0')
437 return 0;
438 pfa = makedfa(fs, 1);
439 dprintf( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) );
440 tempstat = pfa->initstat;
441 for (i = 1; ; i++) {
442 if (i > nfields)
443 growfldtab(i);
444 if (freeable(fldtab[i]))
445 xfree(fldtab[i]->sval);
446 fldtab[i]->tval = FLD | STR | DONTFREE;
447 fldtab[i]->sval = fr;
448 dprintf( ("refldbld: i=%d\n", i) );
449 if (nematch(pfa, rec)) {
450 pfa->initstat = 2; /* horrible coupling to b.c */
451 dprintf( ("match %s (%d chars)\n", patbeg, patlen) );
452 strncpy(fr, rec, patbeg-rec);
453 fr += patbeg - rec + 1;
454 *(fr-1) = '\0';
455 rec = patbeg + patlen;
456 } else {
457 dprintf( ("no match %s\n", rec) );
458 strcpy(fr, rec);
459 pfa->initstat = tempstat;
460 break;
463 return i;
466 void recbld(void) /* create $0 from $1..$NF if necessary */
468 int i;
469 char *r, *p;
471 if (donerec == 1)
472 return;
473 r = record;
474 for (i = 1; i <= *NF; i++) {
475 p = getsval(fldtab[i]);
476 if (!adjbuf(&record, &recsize, 1+strlen(p)+r-record, recsize, &r, "recbld 1"))
477 FATAL("created $0 `%.30s...' too long", record);
478 while ((*r = *p++) != 0)
479 r++;
480 if (i < *NF) {
481 if (!adjbuf(&record, &recsize, 2+strlen(*OFS)+r-record, recsize, &r, "recbld 2"))
482 FATAL("created $0 `%.30s...' too long", record);
483 for (p = *OFS; (*r = *p++) != 0; )
484 r++;
487 if (!adjbuf(&record, &recsize, 2+r-record, recsize, &r, "recbld 3"))
488 FATAL("built giant record `%.30s...'", record);
489 *r = '\0';
490 dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]) );
492 if (freeable(fldtab[0]))
493 xfree(fldtab[0]->sval);
494 fldtab[0]->tval = REC | STR | DONTFREE;
495 fldtab[0]->sval = record;
497 dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]) );
498 dprintf( ("recbld = |%s|\n", record) );
499 donerec = 1;
502 int errorflag = 0;
504 void yyerror(const char *s)
506 SYNTAX("%s", s);
509 void SYNTAX(const char *fmt, ...)
511 extern char *cmdname, *curfname;
512 static int been_here = 0;
513 va_list varg;
515 if (been_here++ > 2)
516 return;
517 fprintf(stderr, "%s: ", cmdname);
518 va_start(varg, fmt);
519 vfprintf(stderr, fmt, varg);
520 va_end(varg);
521 fprintf(stderr, " at source line %d", lineno);
522 if (curfname != NULL)
523 fprintf(stderr, " in function %s", curfname);
524 if (compile_time == 1 && cursource() != NULL)
525 fprintf(stderr, " source file %s", cursource());
526 fprintf(stderr, "\n");
527 errorflag = 2;
528 eprint();
531 void fpecatch(int n)
533 FATAL("floating point exception %d", n);
536 extern int bracecnt, brackcnt, parencnt;
538 void bracecheck(void)
540 int c;
541 static int beenhere = 0;
543 if (beenhere++)
544 return;
545 while ((c = input()) != EOF && c != '\0')
546 bclass(c);
547 bcheck2(bracecnt, '{', '}');
548 bcheck2(brackcnt, '[', ']');
549 bcheck2(parencnt, '(', ')');
552 void bcheck2(int n, int c1, int c2)
554 if (n == 1)
555 fprintf(stderr, "\tmissing %c\n", c2);
556 else if (n > 1)
557 fprintf(stderr, "\t%d missing %c's\n", n, c2);
558 else if (n == -1)
559 fprintf(stderr, "\textra %c\n", c2);
560 else if (n < -1)
561 fprintf(stderr, "\t%d extra %c's\n", -n, c2);
564 void FATAL(const char *fmt, ...)
566 extern char *cmdname;
567 va_list varg;
569 fflush(stdout);
570 fprintf(stderr, "%s: ", cmdname);
571 va_start(varg, fmt);
572 vfprintf(stderr, fmt, varg);
573 va_end(varg);
574 error();
575 if (dbg > 1) /* core dump if serious debugging on */
576 abort();
577 exit(2);
580 void WARNING(const char *fmt, ...)
582 extern char *cmdname;
583 va_list varg;
585 fflush(stdout);
586 fprintf(stderr, "%s: ", cmdname);
587 va_start(varg, fmt);
588 vfprintf(stderr, fmt, varg);
589 va_end(varg);
590 error();
593 void error()
595 extern Node *curnode;
597 fprintf(stderr, "\n");
598 if (compile_time != 2 && NR && *NR > 0) {
599 fprintf(stderr, " input record number %d", (int) (*FNR));
600 if (strcmp(*FILENAME, "-") != 0)
601 fprintf(stderr, ", file %s", *FILENAME);
602 fprintf(stderr, "\n");
604 if (compile_time != 2 && curnode)
605 fprintf(stderr, " source line number %d", curnode->lineno);
606 else if (compile_time != 2 && lineno)
607 fprintf(stderr, " source line number %d", lineno);
608 if (compile_time == 1 && cursource() != NULL)
609 fprintf(stderr, " source file %s", cursource());
610 fprintf(stderr, "\n");
611 eprint();
614 void eprint(void) /* try to print context around error */
616 char *p, *q;
617 int c;
618 static int been_here = 0;
619 extern char ebuf[], *ep;
621 if (compile_time == 2 || compile_time == 0 || been_here++ > 0)
622 return;
623 p = ep - 1;
624 if (p > ebuf && *p == '\n')
625 p--;
626 for ( ; p > ebuf && *p != '\n' && *p != '\0'; p--)
628 while (*p == '\n')
629 p++;
630 fprintf(stderr, " context is\n\t");
631 for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--)
633 for ( ; p < q; p++)
634 if (*p)
635 putc(*p, stderr);
636 fprintf(stderr, " >>> ");
637 for ( ; p < ep; p++)
638 if (*p)
639 putc(*p, stderr);
640 fprintf(stderr, " <<< ");
641 if (*ep)
642 while ((c = input()) != '\n' && c != '\0' && c != EOF) {
643 putc(c, stderr);
644 bclass(c);
646 putc('\n', stderr);
647 ep = ebuf;
650 void bclass(int c)
652 switch (c) {
653 case '{': bracecnt++; break;
654 case '}': bracecnt--; break;
655 case '[': brackcnt++; break;
656 case ']': brackcnt--; break;
657 case '(': parencnt++; break;
658 case ')': parencnt--; break;
662 double errcheck(double x, const char *s)
665 if (errno == EDOM) {
666 errno = 0;
667 WARNING("%s argument out of domain", s);
668 x = 1;
669 } else if (errno == ERANGE) {
670 errno = 0;
671 WARNING("%s result out of range", s);
672 x = 1;
674 return x;
677 int isclvar(const char *s) /* is s of form var=something ? */
679 const char *os = s;
681 if (!isalpha((uschar) *s) && *s != '_')
682 return 0;
683 for ( ; *s; s++)
684 if (!(isalnum((uschar) *s) || *s == '_'))
685 break;
686 return *s == '=' && s > os && *(s+1) != '=';
689 /* strtod is supposed to be a proper test of what's a valid number */
690 /* appears to be broken in gcc on linux: thinks 0x123 is a valid FP number */
691 /* wrong: violates 4.10.1.4 of ansi C standard */
693 #include <math.h>
694 int is_number(const char *s)
696 double r;
697 char *ep;
698 errno = 0;
699 r = strtod(s, &ep);
700 if (ep == s || r == HUGE_VAL || errno == ERANGE)
701 return 0;
702 while (*ep == ' ' || *ep == '\t' || *ep == '\n')
703 ep++;
704 if (*ep == '\0')
705 return 1;
706 else
707 return 0;