4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * Copyright 1986, 1994 by Mortice Kern Systems Inc. All rights reserved.
31 * awk -- mainline, yylex, etc.
33 * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes
43 static char *progfiles
[NPFILE
]; /* Programmes files for yylex */
44 static char **progfilep
= &progfiles
[0]; /* Pointer to last file */
45 static wchar_t *progptr
; /* In-memory programme */
46 static int proglen
; /* Length of progptr */
47 static wchar_t context
[NCONTEXT
]; /* Circular buffer of context */
48 static wchar_t *conptr
= &context
[0]; /* context ptr */
49 static FILE *progfp
; /* Stdio stream for programme */
50 static char *filename
;
55 #define AWK_EXEC_MAGIC "<MKS AWKC>"
56 #define LEN_EXEC_MAGIC 10
58 static char unbal
[] = "unbalanced E char";
60 static void awkarginit(int c
, char **av
);
61 static int lexid(wint_t c
);
62 static int lexnumber(wint_t c
);
63 static int lexstring(wint_t endc
);
64 static int lexregexp(wint_t endc
);
66 static void awkvarinit(void);
67 static wint_t lexgetc(void);
68 static void lexungetc(wint_t c
);
69 static size_t lexescape(wint_t endc
, int regx
, int cmd_line_operand
);
70 static void awkierr(int perr
, char *fmt
, va_list ap
);
71 static int usage(void);
72 void strescape(wchar_t *str
);
73 static const char *toprint(wint_t);
75 static wchar_t *mbconvert(char *str
);
77 extern int isclvar(wchar_t *arg
);
83 main(int argc
, char *argv
[])
91 linebuf
= emalloc(NLINE
* sizeof (wchar_t));
94 * At this point only messaging should be internationalized.
95 * numbers are still scanned as in the Posix locale.
97 (void) setlocale(LC_ALL
, "");
98 (void) setlocale(LC_NUMERIC
, "C");
99 #if !defined(TEXT_DOMAIN)
100 #define TEXT_DOMAIN "SYS_TEST"
102 (void) textdomain(TEXT_DOMAIN
);
106 while (argc
> 1 && *argv
[1] == '-') {
107 void *save_ptr
= NULL
;
108 ap
= mbstowcsdup(&argv
[1][1]);
115 save_ptr
= (void *) ap
;
118 if (*ap
== '-' && ap
[1] == '\0')
120 for (; *ap
!= '\0'; ++ap
) {
130 (void) fprintf(stderr
,
131 gettext("Missing script file\n"));
134 *progfilep
++ = argv
[1];
142 (void) fprintf(stderr
,
143 gettext("Missing field separator\n"));
146 ap
= mbstowcsdup(argv
[1]);
152 strassign(varFS
, linebuf
, FALLOC
,
161 (void) fprintf(stderr
,
162 gettext("Missing variable assignment\n"));
165 arg
= mbconvert(argv
[1]);
167 * Ensure the variable expression
168 * is valid (correct form).
170 if (((vp
= wcschr(arg
, '=')) != NULL
) &&
174 strassign(vlook(arg
), linebuf
,
179 (void) fprintf(stderr
, gettext(
180 "Invalid form for variable "
181 "assignment: %S\n"), arg
);
190 (void) fprintf(stderr
,
191 gettext("Unknown option \"-%S\"\n"), ap
);
199 if (progfilep
== &progfiles
[0]) {
202 filename
= "[command line]"; /* BUG: NEEDS TRANSLATION */
203 progptr
= mbstowcsdup(argv
[1]);
204 proglen
= wcslen(progptr
);
211 awkarginit(argc
, argv
);
218 * Ok, done parsing, so now activate the rest of the nls stuff, set
219 * the radix character.
221 (void) setlocale(LC_ALL
, "");
222 radixpoint
= *localeconv()->decimal_point
;
229 * Do initial setup of buffers, etc.
230 * This must be called before most processing
231 * and especially before lexical analysis.
232 * Variables initialised here will be overruled by command
233 * line parameter initialisation.
240 (void) setvbuf(stderr
, NULL
, _IONBF
, 0);
242 if ((NIOSTREAM
= sysconf(_SC_OPEN_MAX
) - 4) <= 0) {
243 (void) fprintf(stderr
,
244 gettext("not enough available file descriptors"));
247 ofiles
= (OFILE
*)emalloc(sizeof (OFILE
)*NIOSTREAM
);
248 #ifdef A_ZERO_POINTERS
249 (void) memset((wchar_t *)ofiles
, 0, sizeof (OFILE
) * NIOSTREAM
);
252 /* initialize file descriptor table */
254 for (fp
= ofiles
; fp
< &ofiles
[NIOSTREAM
]; fp
+= 1) {
257 fp
->f_name
= (char *)0;
261 constant
= intnode((INT
)0);
263 const0
= intnode((INT
)0);
264 const1
= intnode((INT
)1);
265 constundef
= emptynode(CONSTANT
, 0);
266 constundef
->n_flags
= FSTRING
|FVINT
;
267 constundef
->n_string
= _null
;
268 constundef
->n_strlen
= 0;
269 inc_oper
= emptynode(ADD
, 0);
270 inc_oper
->n_right
= const1
;
271 asn_oper
= emptynode(ADD
, 0);
272 field0
= node(FIELD
, const0
, NNULL
);
277 for (rp
= &resfuncs
[0]; rp
->rf_name
!= (LOCCHARP
)NULL
; ++rp
) {
278 np
= finstall(rp
->rf_name
, rp
->rf_func
, rp
->rf_type
);
284 for (rp
= &reserved
[0]; rp
->r_name
!= (LOCCHARP
)NULL
; ++rp
) {
285 switch (rp
->r_type
) {
289 np
= vlook(rp
->r_name
);
290 if (rp
->r_type
== SVAR
)
291 np
->n_flags
|= FSPECIAL
;
292 if (rp
->r_svalue
!= NULL
)
293 strassign(np
, rp
->r_svalue
, FSTATIC
,
294 (size_t)rp
->r_ivalue
);
296 constant
->n_int
= rp
->r_ivalue
;
297 (void) assign(np
, constant
);
303 kinstall(rp
->r_name
, (int)rp
->r_ivalue
);
310 varFNR
= vlook(s_FNR
);
312 varOFMT
= vlook(s_OFMT
);
313 varCONVFMT
= vlook(s_CONVFMT
);
314 varOFS
= vlook(s_OFS
);
315 varORS
= vlook(s_ORS
);
318 varARGC
= vlook(s_ARGC
);
319 varSUBSEP
= vlook(s_SUBSEP
);
320 varENVIRON
= vlook(s_ENVIRON
);
321 varFILENAME
= vlook(s_FILENAME
);
322 varSYMTAB
= vlook(s_SYMTAB
);
323 incNR
= node(ASG
, varNR
, node(ADD
, varNR
, const1
));
324 incFNR
= node(ASG
, varFNR
, node(ADD
, varFNR
, const1
));
325 clrFNR
= node(ASG
, varFNR
, const0
);
329 * Initialise awk ARGC, ARGV variables.
332 awkarginit(int ac
, char **av
)
337 ARGVsubi
= node(INDEX
, vlook(s_ARGV
), constant
);
339 constant
->n_int
= ac
;
340 (void) assign(varARGC
, constant
);
341 for (i
= 0; i
< ac
; ++i
) {
342 cp
= mbstowcsdup(av
[i
]);
344 strassign(exprreduce(ARGVsubi
), cp
,
345 FSTATIC
|FSENSE
, wcslen(cp
));
351 * Clean up when done parsing a function.
352 * All formal parameters, because of a deal (funparm) in
353 * yylex, get put into the symbol table in front of any
354 * global variable of the same name. When the entire
355 * function is parsed, remove these formal dummy nodes
356 * from the symbol table but retain the nodes because
357 * the generated tree points at them.
364 while ((formal
= getlist(&np
)) != NNULL
)
365 delsymtab(formal
, 0);
369 * The lexical analyzer.
376 static int savetoken
= 0;
378 static int isfuncdef
;
379 static int nbrace
, nparen
, nbracket
;
380 static struct ctosymstruct
{
383 { '|', BAR
}, { '^', CARAT
},
384 { '~', TILDE
}, { '<', LANGLE
},
385 { '>', RANGLE
}, { '+', PLUSC
},
386 { '-', HYPHEN
}, { '*', STAR
},
387 { '/', SLASH
}, { '%', PERCENT
},
388 { '!', EXCLAMATION
}, { '$', DOLLAR
},
389 { '[', LSQUARE
}, { ']', RSQUARE
},
390 { '(', LPAREN
}, { ')', RPAREN
},
391 { ';', SEMI
}, { '{', LBRACE
},
392 { '}', RBRACE
}, { 0, 0 }
398 } else if (redelim
!= '\0') {
403 c
= lexlast
= lexregexp(c
);
405 } else while ((c
= lexgetc()) != WEOF
) {
406 if (iswalpha(c
) || c
== '_') {
408 } else if (iswdigit(c
) || c
== '.') {
410 } else if (isWblank(c
)) {
423 while ((c
= lexgetc()) != '\n' && c
!= WEOF
)
429 if ((c1
= lexgetc()) == '+')
438 if ((c1
= lexgetc()) == '-')
447 if ((c1
= lexgetc()) == '=')
449 else if (c1
== '*') {
450 if ((c1
= lexgetc()) == '=')
461 if ((c1
= lexgetc()) == '=') {
470 if ((c1
= lexgetc()) == '=' &&
471 lexlast
!= RE
&& lexlast
!= NRE
&&
472 lexlast
!= ';' && lexlast
!= '\n' &&
473 lexlast
!= ',' && lexlast
!= '(')
480 if ((c1
= lexgetc()) == '=')
487 if ((c1
= lexgetc()) == '&')
494 if ((c1
= lexgetc()) == '|')
504 if ((c1
= lexgetc()) == '=')
510 if (nparen
== 0 && inprint
)
516 if ((c1
= lexgetc()) == '=')
523 if ((c1
= lexgetc()) == '=')
532 if ((c1
= lexgetc()) == '=')
543 if (catterm
|| inprint
) {
568 if (lexlast
!= ';') {
596 if (lexlast
!= ';') {
607 if (--nbracket
< 0) {
618 if ((c1
= lexgetc()) == '\n')
638 gettext("invalid character \"%s\""),
655 } else if (!isfuncdef
) {
656 if ((c1
= lexgetc()) != '(')
678 if (!catterm
|| lexlast
!= CONSTANT
|| wasfield
)
711 * Map character constants to symbolic names.
713 for (i
= 0; ctosym
[i
].c
!= 0; i
++)
714 if (c
== ctosym
[i
].c
) {
721 (void) printf("%d\n", (int)c
);
727 * Read a number for the lexical analyzer.
728 * Input is the first character of the number.
729 * Return value is the lexical type.
746 } else if (c
== 'e' || c
== 'E') {
747 if ((c
= lexgetc()) != '-' && c
!= '+') {
757 } while ((c
= lexgetc()) != WEOF
);
759 if (dotfound
&& cp
== linebuf
+1)
763 if (!dotfound
&& !efound
&&
764 ((number
= wcstol(linebuf
, (wchar_t **)0, 10)), errno
!= ERANGE
))
765 yylval
.node
= intnode(number
);
767 yylval
.node
= realnode((REAL
)wcstod(linebuf
, (wchar_t **)0));
772 * Read an identifier.
773 * Input is first character of identifier.
787 } while (iswalpha(c
) || iswdigit(c
) || c
== '_');
790 yylval
.node
= np
= vlook(linebuf
);
792 switch (np
->n_type
) {
794 switch (np
->n_keywtype
) {
799 return ((int)np
->n_keywtype
);
806 * If reading the argument list, create a dummy node
807 * for the duration of that function. These variables
808 * can be removed from the symbol table at function end
809 * but they must still exist because the execution tree
814 np
= emptynode(PARM
, i
= (cp
-linebuf
));
815 np
->n_flags
= FSTRING
;
816 np
->n_string
= _null
;
818 (void) memcpy(np
->n_name
, linebuf
,
819 (i
+1) * sizeof (wchar_t));
822 } else if (np
== varNF
|| (np
== varFS
&&
823 (!doing_begin
|| begin_getline
))) {
825 * If the user program references NF or sets
826 * FS either outside of a begin block or
827 * in a begin block after a getline then the
828 * input line will be split immediately upon read
829 * rather than when a field is first referenced.
832 } else if (np
== varENVIRON
)
840 * It is ok to redefine functions as parameters
842 if (funparm
) goto do_funparm
;
847 * When a getline is encountered, clear the 'doing_begin' flag.
848 * This will force the 'needsplit' flag to be set, even inside
849 * a begin block, if FS is altered. (See VAR case above)
860 * Read a string for the lexical analyzer.
861 * `endc' terminates the string.
864 lexstring(wint_t endc
)
866 size_t length
= lexescape(endc
, 0, 0);
868 yylval
.node
= stringnode(linebuf
, FALLOC
, length
);
873 * Read a regular expression.
876 lexregexp(wint_t endc
)
878 (void) lexescape(endc
, 1, 0);
879 yylval
.node
= renode(linebuf
);
884 * Process a string, converting the escape characters as required by
885 * 1003.2. The processed string ends up in the global linebuf[]. This
886 * routine also changes the value of 'progfd' - the program file
887 * descriptor, so it should be used with some care. It is presently used to
888 * process -v (awk1.c) and var=str type arguments (awk2.c, nextrecord()).
891 strescape(wchar_t *str
)
894 proglen
= wcslen(str
) + 1; /* Include \0 */
895 (void) lexescape('\0', 0, 1);
900 * Read a string or regular expression, terminated by ``endc'',
901 * for lexical analyzer, processing escape sequences.
902 * Return string length.
905 lexescape(wint_t endc
, int regx
, int cmd_line_operand
)
907 static char nlre
[256];
908 static char nlstr
[256];
909 static char eofre
[256];
910 static char eofstr
[256];
916 if (first_time
== 1) {
917 (void) strcpy(nlre
, gettext("Newline in regular expression\n"));
918 (void) strcpy(nlstr
, gettext("Newline in string\n"));
919 (void) strcpy(eofre
, gettext("EOF in regular expression\n"));
920 (void) strcpy(eofstr
, gettext("EOF in string\n"));
925 while ((c
= lexgetc()) != endc
) {
927 awkerr(regx
? nlre
: nlstr
);
929 switch (c
= lexgetc(), c
) {
969 while (iswxdigit(c
= lexgetc())) {
972 else if (iswupper(c
))
992 * Posix.2 draft 10 disallows the use of back-referencing - it explicitly
993 * requires processing of the octal escapes both in strings and
994 * regular expressions. The following code is disabled instead of
995 * removed as back-referencing may be reintroduced in a future draft
999 * For regular expressions, we disallow
1000 * \ooo to mean octal character, in favour
1001 * of back referencing.
1012 if ((c
= lexgetc()) > '7' || c
< '0')
1017 * an octal escape sequence must have at least
1018 * 2 digits after the backslash, otherwise
1019 * it gets passed straight thru for possible
1020 * use in backreferencing.
1033 if (c
!= endc
|| cmd_line_operand
) {
1041 awkerr(regx
? eofre
: eofstr
);
1045 return (cp
- linebuf
);
1049 * Build a regular expression NODE.
1050 * Argument is the string holding the expression.
1058 np
= emptynode(RE
, 0);
1059 np
->n_left
= np
->n_right
= NNULL
;
1060 if ((n
= REGWCOMP(&np
->n_regexp
, s
)) != REG_OK
) {
1064 m
= REGWERROR(n
, np
->n_regexp
, NULL
, 0);
1065 p
= (char *)emalloc(m
);
1066 REGWERROR(n
, np
->n_regexp
, p
, m
);
1067 awkerr("/%S/: %s", s
, p
);
1072 * Get a character for the lexical analyser routine.
1078 static char **files
= &progfiles
[0];
1080 if (progfp
!= FNULL
&& (c
= fgetwc(progfp
)) != WEOF
)
1083 if (progptr
!= NULL
) {
1089 if (progfp
!= FNULL
) {
1090 if (progfp
!= stdin
)
1091 (void) fclose(progfp
);
1096 if (files
< progfilep
) {
1097 filename
= *files
++;
1099 if (filename
[0] == '-' && filename
[1] == '\0')
1101 else if ((progfp
= fopen(filename
, r
))
1103 (void) fprintf(stderr
,
1104 gettext("script file \"%s\""), filename
);
1113 if (conptr
>= &context
[NCONTEXT
])
1114 conptr
= &context
[0];
1121 * Return a character for lexical analyser.
1122 * Only one returned character is (not enforced) legitimite.
1130 if (conptr
== &context
[0])
1131 conptr
= &context
[NCONTEXT
];
1134 if (progfp
!= FNULL
) {
1135 (void) ungetwc(c
, progfp
);
1145 * Syntax errors during parsing.
1148 yyerror(char *s
, ...)
1150 if (lexlast
== FUNC
|| lexlast
== GETLINE
|| lexlast
== KEYWORD
)
1151 if (lexlast
== KEYWORD
)
1152 awkerr(gettext("inadmissible use of reserved keyword"));
1154 awkerr(gettext("attempt to redefine builtin function"));
1159 * Error routine for all awk errors.
1163 awkerr(char *fmt
, ...)
1167 va_start(args
, fmt
);
1168 awkierr(0, fmt
, args
);
1173 * Error routine like "awkerr" except that it prints out
1174 * a message that includes an errno-specific indication.
1178 awkperr(char *fmt
, ...)
1182 va_start(args
, fmt
);
1183 awkierr(1, fmt
, args
);
1188 * Common internal routine for awkerr, awkperr
1191 awkierr(int perr
, char *fmt
, va_list ap
)
1193 static char sep1
[] = "\n>>>\t";
1194 static char sep2
[] = "\t<<<";
1195 int saveerr
= errno
;
1197 (void) fprintf(stderr
, "%s: ", _cmdname
);
1199 (void) fprintf(stderr
, gettext("line %u ("),
1200 curnode
== NNULL
? 0 : curnode
->n_lineno
);
1202 (void) fprintf(stderr
, "NR=%lld): ",
1203 (INT
)exprint(varNR
));
1205 (void) fprintf(stderr
, "%s): ",
1206 phase
== BEGIN
? s_BEGIN
: s_END
);
1207 } else if (lineno
!= 0) {
1208 (void) fprintf(stderr
, gettext("file \"%s\": "), filename
);
1209 (void) fprintf(stderr
, gettext("line %u: "), lineno
);
1211 (void) vfprintf(stderr
, gettext(fmt
), ap
);
1213 (void) fprintf(stderr
, ": %s", strerror(saveerr
));
1214 if (perr
!= 2 && !running
) {
1219 (void) fprintf(stderr
, gettext(" Context is:%s"), sep1
);
1223 if (cp
>= &context
[NCONTEXT
])
1225 if ((c
= *cp
++) != '\0')
1226 (void) fputs(c
== '\n' ? sep1
: toprint(c
),
1229 (void) fputs(sep2
, stderr
);
1231 (void) fprintf(stderr
, "\n");
1240 if ((cp
= malloc(n
)) == NULL
)
1246 erealloc(wchar_t *p
, unsigned n
)
1250 if ((cp
= realloc(p
, n
)) == NULL
)
1257 * usage message for awk
1262 (void) fprintf(stderr
, gettext(
1263 "Usage: awk [-F ERE] [-v var=val] 'program' [var=val ...] [file ...]\n"
1264 " awk [-F ERE] -f progfile ... [-v var=val] [var=val ...] [file ...]\n"));
1270 mbconvert(char *str
)
1272 static wchar_t *op
= 0;
1276 return (op
= mbstowcsdup(str
));
1280 mbunconvert(wchar_t *str
)
1282 static char *op
= 0;
1286 return (op
= wcstombsdup(str
));
1290 * Solaris port - following functions are typical MKS functions written
1291 * to work for Solaris.
1295 mbstowcsdup(char *s
)
1301 if ((w
= (wchar_t *)malloc(n
* sizeof (wchar_t))) == NULL
)
1304 if (mbstowcs(w
, s
, n
) == (size_t)-1)
1311 wcstombsdup(wchar_t *w
)
1316 /* Fetch memory for worst case string length */
1319 if ((mb
= (char *)malloc(n
)) == NULL
) {
1323 /* Convert the string */
1324 if ((n
= wcstombs(mb
, w
, n
)) == -1) {
1332 /* Shrink the string down */
1333 if ((mb
= (char *)realloc(mb
, strlen(mb
)+1)) == NULL
) {
1340 * The upe_ctrls[] table contains the printable 'control-sequences' for the
1341 * character values 0..31 and 127. The first entry is for value 127, thus the
1342 * entries for the remaining character values are from 1..32.
1344 static const char *const upe_ctrls
[] =
1347 "^@", "^A", "^B", "^C", "^D", "^E", "^F", "^G",
1348 "^H", "^I", "^J", "^K", "^L", "^M", "^N", "^O",
1349 "^P", "^Q", "^R", "^S", "^T", "^U", "^V", "^W",
1350 "^X", "^Y", "^Z", "^[", "^\\", "^]", "^^", "^_"
1355 * Return a printable string corresponding to the given character value. If
1356 * the character is printable, simply return it as the string. If it is in
1357 * the range specified by table 5-101 in the UPE, return the corresponding
1358 * string. Otherwise, return an octal escape sequence.
1365 static char mbch
[MB_LEN_MAX
+1];
1366 static char buf
[5 * MB_LEN_MAX
+ 1];
1368 if ((n
= wctomb(mbch
, c
)) == -1) {
1369 /* Should never happen */
1370 (void) sprintf(buf
, "\\%x", c
);
1376 } else if (c
== 127) {
1377 return (upe_ctrls
[0]);
1378 } else if (c
< 32) {
1379 /* Print as in Table 5-101 in the UPE */
1380 return (upe_ctrls
[c
+1]);
1382 /* Print as an octal escape sequence */
1383 for (len
= 0, ptr
= (unsigned char *) mbch
; 0 < n
; --n
, ++ptr
)
1384 len
+= sprintf(buf
+len
, "\\%03o", *ptr
);
1390 wcoff(const wchar_t *astring
, const int off
)
1392 const wchar_t *s
= astring
;
1394 char mb
[MB_LEN_MAX
];
1398 if ((n
= wctomb(mb
, *s
)) == 0)
1406 return (s
- astring
);
1410 #define NREGHOLD 1024 /* max number unused entries */
1412 static int nregunref
;
1416 struct regcache
*regcachep
;
1424 struct reghashq hash
;
1427 static struct qelem reghash
[NREGHASH
], reglink
;
1430 * Generate a hash value of the given wchar string.
1431 * The hashing method is similar to what Java does for strings.
1434 regtxthash(const wchar_t *str
)
1438 while (*str
!= L
'\0')
1439 k
= (31 * k
) + *str
++;
1446 return (k
% NREGHASH
);
1450 int_regwcomp(REGEXP
*r
, const wchar_t *pattern
)
1457 struct regcache
*rcp
;
1459 key
= regtxthash(pattern
);
1460 for (qp
= reghash
[key
].q_forw
; qp
!= NULL
; qp
= qp
->q_forw
) {
1461 rcp
= ((struct reghashq
*)qp
)->regcachep
;
1462 if (*rcp
->pattern
== *pattern
&&
1463 wcscmp(rcp
->pattern
, pattern
) == 0)
1467 /* update link. put this one at the beginning */
1468 if (rcp
!= (struct regcache
*)reglink
.q_forw
) {
1470 insque(&rcp
->lq
, ®link
);
1472 if (rcp
->refcnt
== 0)
1473 nregunref
--; /* no longer unref'ed */
1475 *(struct regcache
**)r
= rcp
;
1479 if ((mbpattern
= wcstombsdup((wchar_t *)pattern
)) == NULL
)
1480 return (REG_ESPACE
);
1482 ret
= regcomp(&re
, mbpattern
, REG_EXTENDED
);
1489 if ((rcp
= malloc(sizeof (struct regcache
))) == NULL
)
1490 return (REG_ESPACE
);
1492 if ((rcp
->pattern
= wsdup(pattern
)) == NULL
) {
1495 return (REG_ESPACE
);
1498 insque(&rcp
->lq
, ®link
);
1499 insque(&rcp
->hash
.hq
, ®hash
[key
]);
1500 rcp
->hash
.regcachep
= rcp
;
1502 *(struct regcache
**)r
= rcp
;
1507 int_regwfree(REGEXP r
)
1510 struct qelem
*qp
, *nqp
;
1511 struct regcache
*rcp
;
1513 rcp
= (struct regcache
*)r
;
1515 if (--rcp
->refcnt
!= 0)
1518 /* this cache has no reference */
1519 if (++nregunref
< NREGHOLD
)
1523 * We've got too much unref'ed regex. Free half of least
1527 for (qp
= reglink
.q_forw
; qp
!= NULL
; qp
= nqp
) {
1529 rcp
= (struct regcache
*)qp
;
1530 if (rcp
->refcnt
!= 0)
1533 /* free half of them */
1534 if (++cnt
< (NREGHOLD
/ 2))
1537 /* detach and free */
1539 remque(&rcp
->hash
.hq
);
1551 int_regwerror(int errcode
, REGEXP r
, char *errbuf
, size_t bufsiz
)
1553 struct regcache
*rcp
;
1555 rcp
= (struct regcache
*)r
;
1556 return (regerror(errcode
, &rcp
->re
, errbuf
, bufsiz
));
1560 int_regwexec(REGEXP r
, /* compiled RE */
1561 const wchar_t *astring
, /* subject string */
1562 size_t nsub
, /* number of subexpressions */
1563 int_regwmatch_t
*sub
, /* subexpression pointers */
1567 regmatch_t
*mbsub
= NULL
;
1569 struct regcache
*rcp
;
1571 if ((mbs
= wcstombsdup((wchar_t *)astring
)) == NULL
)
1572 return (REG_ESPACE
);
1574 if (nsub
> 0 && sub
) {
1575 if ((mbsub
= malloc(nsub
* sizeof (regmatch_t
))) == NULL
)
1576 return (REG_ESPACE
);
1579 rcp
= (struct regcache
*)r
;
1581 i
= regexec(&rcp
->re
, mbs
, nsub
, mbsub
, flags
);
1583 /* Now, adjust the pointers/counts in sub */
1584 if (i
== REG_OK
&& nsub
> 0 && mbsub
) {
1587 for (j
= 0; j
< nsub
; j
++) {
1588 regmatch_t
*ms
= &mbsub
[j
];
1589 int_regwmatch_t
*ws
= &sub
[j
];
1591 if ((k
= ms
->rm_so
) >= 0) {
1592 ws
->rm_so
= wcoff(astring
, k
);
1593 ws
->rm_sp
= astring
+ ws
->rm_so
;
1595 if ((k
= ms
->rm_eo
) >= 0) {
1596 ws
->rm_eo
= wcoff(astring
, k
);
1597 ws
->rm_ep
= astring
+ ws
->rm_eo
;
1609 int_regwdosuba(REGEXP rp
, /* compiled RE: Pattern */
1610 const wchar_t *rpl
, /* replacement string: /rpl/ */
1611 const wchar_t *src
, /* source string */
1612 wchar_t **dstp
, /* destination string */
1613 int len
, /* destination length */
1614 int *globp
) /* IN: occurence, 0 for all; OUT: substitutions */
1616 wchar_t *dst
, *odst
;
1617 const wchar_t *ip
, *xp
;
1621 int glob
, iglob
= *globp
, oglob
= 0;
1623 int_regwmatch_t rm
[NSUB
], *rmp
;
1628 /* handle overflow of dst. we need "i" more bytes */
1631 #define OVERFLOW(i) { \
1632 int pos = op - dst; \
1633 dst = (wchar_t *)realloc(odst = dst, \
1634 (len += len + i) * sizeof (wchar_t)); \
1642 *dstp
= dst
= (wchar_t *)malloc(len
* sizeof (wchar_t));
1644 return (REG_ESPACE
);
1646 if (rp
== NULL
|| rpl
== NULL
|| src
== NULL
|| dst
== NULL
)
1647 return (REG_EFATAL
);
1649 glob
= 0; /* match count */
1650 ip
= src
; /* source position */
1651 op
= dst
; /* destination position */
1655 while ((regerr
= int_regwexec(rp
, ip
, NSUB
, rm
, flags
)) == REG_OK
) {
1656 /* Copy text preceding match */
1657 if (op
+ (i
= rm
[0].rm_sp
- ip
) >= end
)
1662 if (iglob
== 0 || ++glob
== iglob
) {
1664 xp
= rpl
; /* do substitute */
1666 xp
= L
"&"; /* preserve text */
1668 /* Perform replacement of matched substing */
1669 while ((c
= *xp
++) != '\0') {
1673 else if (c
== '\\') {
1674 if ('0' <= *xp
&& *xp
<= '9')
1675 rmp
= &rm
[*xp
++ - '0'];
1676 else if (*xp
!= '\0')
1680 if (rmp
== NULL
) { /* Ordinary character. */
1684 } else if (rmp
->rm_sp
!= NULL
&& rmp
->rm_ep
!= NULL
) {
1686 if (op
+ (i
= rmp
->rm_ep
- rmp
->rm_sp
) >= end
)
1694 if (*ip
== '\0') /* If at end break */
1696 else if (rm
[0].rm_sp
== rm
[0].rm_ep
) {
1697 /* If empty match copy next char */
1705 if (regerr
!= REG_OK
&& regerr
!= REG_NOMATCH
)
1708 /* Copy rest of text */
1709 if (op
+ (i
= wcslen(ip
)) >= end
)
1715 if ((*dstp
= dst
= (wchar_t *)realloc(odst
= dst
,
1716 sizeof (wchar_t) * (size_t)(op
- dst
))) == NULL
) {
1719 return (REG_ESPACE
);
1724 return ((oglob
== 0) ? REG_NOMATCH
: REG_OK
);