4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * Copyright 1986, 1994 by Mortice Kern Systems Inc. All rights reserved.
30 #pragma ident "%Z%%M% %I% %E% SMI"
33 * awk -- process input files, field extraction, output
35 * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes
41 static FILE *awkinfp
; /* Input file pointer */
42 static int reclen
; /* Length of last record */
43 static int exstat
; /* Exit status */
45 static FILE *openfile(NODE
*np
, int flag
, int fatal
);
46 static FILE *newfile(void);
47 static NODE
*nextarg(NODE
**npp
);
48 static void adjust_buf(wchar_t **, int *, wchar_t **, char *, size_t);
49 static void awk_putwc(wchar_t, FILE *);
52 * mainline for awk execution
59 while (nextrecord(linebuf
, awkinfp
) > 0)
65 * "cp" is the buffer to fill. There is a special case if this buffer is
67 * Return 1 if OK, zero on EOF, -1 on error.
70 nextrecord(wchar_t *cp
, FILE *fp
)
75 if (fp
== FNULL
&& (fp
= newfile()) == FNULL
)
77 if ((*awkrecord
)(ep
, NLINE
, fp
) == NULL
) {
80 (void) fclose(awkinfp
);
89 if (varNR
->n_flags
& FINT
)
92 (void) exprreduce(incNR
);
93 if (varFNR
->n_flags
& FINT
)
96 (void) exprreduce(incFNR
);
104 /* if record length is too long then bail out */
105 if (reclen
> NLINE
- 2) {
106 awkerr(gettext("Record too long (LIMIT: %d bytes)"),
116 * Returns 1 if the input string, arg, is a variable assignment,
117 * otherwise returns 0.
119 * An argument to awk can be either a pathname of a file, or a variable
120 * assignment. An operand that begins with an undersore or alphabetic
121 * character from the portable character set, followed by a sequence of
122 * underscores, digits, and alphabetics from the portable character set,
123 * followed by the '=' character, shall specify a variable assignment
124 * rather than a pathname.
127 isclvar(wchar_t *arg
)
129 wchar_t *tmpptr
= arg
;
131 if (tmpptr
!= NULL
) {
133 /* Begins with an underscore or alphabetic character */
134 if (iswalpha(*tmpptr
) || *tmpptr
== '_') {
137 * followed by a sequence of underscores, digits,
140 for (tmpptr
++; *tmpptr
; tmpptr
++) {
141 if (!(iswalnum(*tmpptr
) || (*tmpptr
== '_'))) {
145 return (*tmpptr
== '=');
153 * Return the next file from the command line.
154 * Return FNULL when no more files.
155 * Sets awkinfp variable to the new current input file.
160 static int argindex
= 1;
165 extern void strescape(wchar_t *);
167 argc
= (int)exprint(varARGC
);
169 if (argindex
>= argc
) {
177 constant
->n_int
= argindex
++;
178 arg
= (wchar_t *)exprstring(ARGVsubi
);
180 * If the argument contains a '=', determine if the
181 * argument needs to be treated as a variable assignment
182 * or as the pathname of a file.
184 if (((ap
= wcschr(arg
, '=')) != NULL
) && isclvar(arg
)) {
187 strassign(vlook(arg
), linebuf
, FALLOC
|FSENSE
,
195 if (arg
[0] == '-' && arg
[1] == '\0') {
199 if ((awkinfp
= fopen(mbunconvert(arg
), r
)) == FNULL
) {
200 (void) fprintf(stderr
, gettext("input file \"%s\""),
207 strassign(varFILENAME
, arg
, FALLOC
, wcslen(arg
));
208 if (varFNR
->n_flags
& FINT
)
211 (void) exprreduce(clrFNR
);
216 * Default record reading code
217 * Uses fgets for potential speedups found in some (e.g. MKS)
221 defrecord(wchar_t *bp
, int lim
, FILE *fp
)
225 if (fgetws(bp
, lim
, fp
) == NULL
) {
231 * switch (fgetws(bp, lim, fp)) {
235 * case M_FGETS_BINARY:
236 * awkerr(gettext("file is binary"));
238 * awkerr(gettext("line too long: limit %d"),
240 * case M_FGETS_ERROR:
241 * awkperr(gettext("error reading file"));
245 if (*(endp
= (bp
+ (reclen
= wcslen(bp
))-1)) == '\n') {
253 * Read a record separated by one character in the RS.
254 * Compatible calling sequence with fgets, but don't include
255 * record separator character in string.
258 charrecord(wchar_t *abp
, int alim
, FILE *fp
)
266 endc
= *(wchar_t *)varRS
->n_string
;
267 while (--limit
> 0 && (c
= getwc(fp
)) != endc
&& c
!= WEOF
)
271 return (c
== WEOF
&& bp
== abp
? NULL
: abp
);
275 * Special routine for multiple line records.
278 multirecord(wchar_t *abp
, int limit
, FILE *fp
)
283 while ((c
= getwc(fp
)) == '\n')
289 if (c
== '\n' && bp
[-1] == '\n')
293 } while ((c
= getwc(fp
)) != WEOF
);
298 return (c
== WEOF
&& bp
== abp
? NULL
: abp
);
302 * Look for fields separated by spaces, tabs or newlines.
303 * Extract the next field, given pointer to start address.
304 * Return pointer to beginning of field or NULL.
305 * Reset end of field reference, which is the beginning of the
309 whitefield(wchar_t **endp
)
315 while (*sp
== ' ' || *sp
== '\t' || *sp
== '\n')
319 for (ep
= sp
; *ep
!= ' ' && *ep
!= '\0' && *ep
!= '\t' &&
327 * Look for fields separated by non-whitespace characters.
328 * Same calling sequence as whitefield().
331 blackfield(wchar_t **endp
)
336 endc
= *(wchar_t *)varFS
->n_string
;
340 if (*cp
== endc
&& fcount
!= 0)
342 if ((*endp
= wcschr(cp
, endc
)) == NULL
)
343 *endp
= wcschr(cp
, '\0');
348 * This field separation routine uses the same logic as
349 * blackfield but uses a regular expression to separate
353 refield(wchar_t **endpp
)
357 static REGWMATCH_T match
[10];
362 match
[0].rm_ep
= NULL
;
365 if (match
[0].rm_ep
!= NULL
) {
367 cp
= (wchar_t *)match
[0].rm_ep
;
372 switch ((result
= REGWEXEC(resep
, cp
, 10, match
, flags
))) {
375 * Check to see if a null string was matched. If this is the
376 * case, then move the current pointer beyond this position.
378 if (match
[0].rm_sp
== match
[0].rm_ep
) {
379 cp
= (wchar_t *)match
[0].rm_sp
;
384 *endpp
= (wchar_t *)match
[0].rm_sp
;
387 match
[0].rm_ep
= NULL
;
388 *endpp
= wcschr(cp
, '\0');
391 (void) REGWERROR(result
, resep
, (char *)linebuf
,
393 awkerr(gettext("error splitting record: %s"),
400 * do begin processing
406 * Free all keyword nodes to save space.
415 while ((knp
= symwalk(&nbuck
, &np
)) != NNULL
)
416 if (knp
->n_type
== KEYWORD
)
420 * Copy ENVIRON array only if needed.
421 * Note the convoluted work to assign to an array
422 * and that the temporary nodes will be freed by
423 * freetemps() because we are "running".
427 wchar_t *name
, *value
;
428 NODE
*namep
= stringnode(_null
, FSTATIC
, 0);
429 NODE
*valuep
= stringnode(_null
, FSTATIC
, 0);
430 NODE
*ENVsubname
= node(INDEX
, varENVIRON
, namep
);
431 extern char **environ
;
433 /* (void) m_setenv(); XXX what's this do? */
434 for (app
= environ
; *app
!= NULL
; /* empty */) {
435 name
= mbstowcsdup(*app
++);
437 if ((value
= wcschr(name
, '=')) != NULL
) {
439 valuep
->n_strlen
= wcslen(value
);
440 valuep
->n_string
= value
;
442 valuep
->n_strlen
= 0;
443 valuep
->n_string
= _null
;
445 namep
->n_strlen
= wcslen(namep
->n_string
= name
);
446 (void) assign(ENVsubname
, valuep
);
457 * Delete all pattern/action rules that are BEGIN at this
458 * point to save space.
459 * NOTE: this is not yet implemented.
477 for (op
= &ofiles
[0]; op
< &ofiles
[NIOSTREAM
]; op
++)
478 if (op
->f_fp
!= FNULL
)
480 if (awkinfp
== stdin
)
481 (void) fflush(awkinfp
);
496 fp
= openfile(np
->n_right
, 1, 1);
497 if (np
->n_left
== NNULL
)
498 (void) fputs(mbunconvert(linebuf
), fp
);
500 ofs
= wcstombsdup((isstring(varOFS
->n_flags
)) ?
501 (wchar_t *)varOFS
->n_string
:
502 (wchar_t *)exprstring(varOFS
));
504 while ((np
= getlist(&listp
)) != NNULL
) {
506 (void) fputs(ofs
, fp
);
508 if (np
->n_flags
& FINT
)
509 (void) fprintf(fp
, "%lld", (INT
)np
->n_int
);
510 else if (isstring(np
->n_flags
))
511 (void) fprintf(fp
, "%S", np
->n_string
);
514 mbunconvert((wchar_t *)exprstring(varOFMT
)),
519 (void) fputs(mbunconvert(isstring(varORS
->n_flags
) ?
520 (wchar_t *)varORS
->n_string
: (wchar_t *)exprstring(varORS
)),
523 awkperr("error on print");
534 fp
= openfile(np
->n_right
, 1, 1);
535 (void) xprintf(np
->n_left
, fp
, (wchar_t **)NULL
);
537 awkperr("error on printf");
541 * Get next input line.
542 * Read into variable on left of node (or $0 if NULL).
543 * Read from pipe or file on right of node (or from regular
545 * This is an oddball inasmuch as it is a function
546 * but parses more like the keywords print, etc.
556 if (np
->n_right
== NULL
&& phase
== END
) {
557 /* Pretend we've reached end of (the non-existant) file. */
561 if ((fp
= openfile(np
->n_right
, 0, 0)) != FNULL
) {
562 if (np
->n_left
== NNULL
) {
563 ret
= nextrecord(linebuf
, fp
);
565 cp
= emalloc(NLINE
* sizeof (wchar_t));
566 ret
= nextrecord(cp
, fp
);
569 cp
= erealloc(cp
, (len
+1)*sizeof (wchar_t));
570 if (isleaf(np
->n_flags
)) {
571 if (np
->n_type
== PARM
)
573 strassign(np
, cp
, FNOALLOC
, len
);
575 (void) assign(np
, stringnode(cp
,
580 return (intnode(ret
));
584 * Open a file. Flag is non-zero for output.
587 openfile(NODE
*np
, int flag
, int fatal
)
598 if (awkinfp
== FNULL
)
602 if ((type
= np
->n_type
) == APPEND
)
604 cp
= mbunconvert(exprstring(np
->n_left
));
606 for (op
= &ofiles
[0]; op
< &ofiles
[NIOSTREAM
]; op
++) {
607 if (op
->f_fp
== FNULL
) {
608 if (fop
== (OFILE
*)NULL
)
612 if (op
->f_mode
== type
&& strcmp(op
->f_name
, cp
) == 0)
615 if (fop
== (OFILE
*)NULL
)
616 awkerr(gettext("too many open streams to %s onto \"%s\""),
617 flag
? "print/printf" : "getline", cp
);
618 (void) fflush(stdout
);
620 if (cp
[0] == '-' && cp
[1] == '\0') {
621 fp
= flag
? stdout
: stdin
;
623 switch (np
->n_type
) {
625 if ((fp
= fopen(cp
, w
)) != FNULL
) {
626 if (isatty(fileno(fp
)))
627 (void) setvbuf(fp
, 0, _IONBF
, 0);
637 (void) setvbuf(fp
, (char *)0, _IOLBF
, 0);
649 awkerr(interr
, "openfile");
653 op
->f_name
= strdup(cp
);
657 awkperr(flag
? gettext("output file \"%s\"") :
658 gettext("input file \"%s\""), cp
);
669 if (op
->f_mode
== PIPE
|| op
->f_mode
== PIPESYM
)
670 (void) pclose(op
->f_fp
);
671 else if (fclose(op
->f_fp
) == EOF
)
672 awkperr("error on stream \"%s\"", op
->f_name
);
679 * Internal routine common to printf, sprintf.
680 * The node is that describing the arguments.
681 * Returns the number of characters written to file
682 * pointer `fp' or the length of the string return
683 * in cp. If cp is NULL then the file pointer is used. If
684 * cp points to a string pointer, a pointer to an allocated
685 * buffer will be returned in it.
688 xprintf(NODE
*np
, FILE *fp
, wchar_t **cp
)
692 wchar_t *bptr
= (wchar_t *)NULL
;
702 if (isleaf(fnp
->n_flags
) && fnp
->n_type
== PARM
)
704 if (isstring(fnp
->n_flags
)) {
708 fmtsave
= fmt
= (wchar_t *)strsave(exprstring(fnp
));
711 * if a char * pointer has been passed in then allocate an initial
712 * buffer for the string. Make it LINE_MAX plus the length of
713 * the format string but do reallocs only based LINE_MAX.
715 if (cp
!= (wchar_t **)NULL
) {
717 bptr
= *cp
= emalloc(sizeof (wchar_t) * (cplen
+ wcslen(fmt
)));
720 while ((c
= *fmt
++) != '\0') {
722 if (bptr
== (wchar_t *)NULL
)
732 switch (c
= *fmt
++) {
734 if (bptr
== (wchar_t *)NULL
)
745 fnp
= exprreduce(nextarg(&np
));
746 if (isnumber(fnp
->n_flags
))
749 c
= *(wchar_t *)exprstring(fnp
);
750 if (bptr
== (wchar_t *)NULL
)
751 length
+= fprintf(fp
, fmtbuf
, c
);
754 * Make sure that the buffer is long
755 * enough to hold the formatted string.
757 adjust_buf(cp
, &cplen
, &bptr
, fmtbuf
, 0);
759 * Since the call to adjust_buf() has already
760 * guaranteed that the buffer will be long
761 * enough, just pass in INT_MAX as
764 (void) wsprintf(bptr
, (const char *) fmtbuf
, c
);
765 bptr
+= (slen
= wcslen(bptr
));
769 /* XXXX Is this bogus? Figure out what s & S mean - look at original code */
775 if (bptr
== (wchar_t *)NULL
)
776 length
+= fprintf(fp
, fmtbuf
,
777 (wchar_t *)exprstring(nextarg(&np
)));
779 wchar_t *ts
= exprstring(nextarg(&np
));
781 adjust_buf(cp
, &cplen
, &bptr
, fmtbuf
,
783 (void) wsprintf(bptr
, (const char *) fmtbuf
,
785 bptr
+= (slen
= wcslen(bptr
));
800 *ofmtp
++ = 'l'; /* now dealing with long longs */
803 if (bptr
== (wchar_t *)NULL
)
804 length
+= fprintf(fp
, fmtbuf
,
805 exprint(nextarg(&np
)));
807 adjust_buf(cp
, &cplen
, &bptr
, fmtbuf
, 0);
808 (void) wsprintf(bptr
, (const char *) fmtbuf
,
809 exprint(nextarg(&np
)));
810 bptr
+= (slen
= wcslen(bptr
));
823 if (bptr
== (wchar_t *)NULL
)
824 length
+= fprintf(fp
, fmtbuf
,
825 exprreal(nextarg(&np
)));
827 adjust_buf(cp
, &cplen
, &bptr
, fmtbuf
, 0);
828 (void) wsprintf(bptr
, (const char *) fmtbuf
,
829 exprreal(nextarg(&np
)));
830 bptr
+= (slen
= wcslen(bptr
));
841 sprintf(ofmtp
, "%lld", (INT
)exprint(nextarg(&np
)));
842 ofmtp
+= strlen(ofmtp
);
844 ofmtp
+= sprintf(ofmtp
, "%lld",
845 (INT
)exprint(nextarg(&np
)));
851 *ofmtp
= (wchar_t)NULL
;
852 (void) fprintf(fp
, "%s", fmtbuf
);
855 *ofmtp
++ = (wchar_t)c
;
864 * If printing to a character buffer then make sure it is
865 * null-terminated and only uses as much space as required.
867 if (bptr
!= (wchar_t *)NULL
) {
869 *cp
= erealloc(*cp
, (length
+1) * sizeof (wchar_t));
875 * Return the next argument from the list.
882 if ((np
= getlist(npp
)) == NNULL
)
883 awkerr(gettext("insufficient arguments to printf or sprintf"));
884 if (isleaf(np
->n_flags
) && np
->n_type
== PARM
)
891 * Check and adjust the length of the buffer that has been passed in
892 * to make sure that it has space to accomodate the sequence string
893 * described in fmtstr. This routine is used by xprintf() to allow
894 * for arbitrarily long sprintf() strings.
896 * bp = start of current buffer
897 * len = length of current buffer
898 * offset = offset in current buffer
899 * fmtstr = format string to check
900 * slen = size of string for %s formats
903 adjust_buf(wchar_t **bp
, int *len
, wchar_t **offset
, char *fmtstr
, size_t slen
)
911 } while (strchr("-+ 0", *fmtstr
) != (char *)0 || *fmtstr
== ('#'));
912 if (*fmtstr
!= '*') {
913 if (isdigit(*fmtstr
)) {
915 while (isdigit(*++fmtstr
))
916 width
= width
* 10 + *fmtstr
- '0';
920 if (*fmtstr
== '.') {
921 if (*++fmtstr
!= '*') {
923 while (isdigit(*++fmtstr
))
924 prec
= prec
* 10 + *fmtstr
- '0';
928 if (strchr("Llh", *fmtstr
) != (char *)0)
930 if (*fmtstr
== 'S') {
931 if (width
&& slen
< width
)
933 if (prec
&& slen
> prec
)
940 if (*offset
+ width
> *bp
+ *len
) {
943 *bp
= erealloc(*bp
, *len
* sizeof (wchar_t));
949 awk_putwc(wchar_t c
, FILE *fp
)
954 if ((mbl
= wctomb(mb
, c
)) > 0) {
956 (void) fputs(mb
, fp
);
958 awkerr(gettext("invalid wide character %x"), c
);