4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 1995-2003 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright (c) 2016 by Delphix. All rights reserved.
29 * xcompile, xstep, xadvance - simulate compile(3g), step(3g), advance(3g)
30 * using regcomp(3c), regexec(3c) interfaces. This is an XCU4
31 * porting aid. switches out to libgen compile/step if collation
34 * Goal is to work with vi and sed/ed.
35 * Returns expbuf in dhl format (encoding of first two bytes).
36 * Note also that this is profoundly single threaded. You
37 * cannot call compile twice with two separate search strings
38 * because the second call will wipe out the earlier stored string.
39 * This must be fixed, plus a general cleanup should be performed
40 * if this is to be integrated into libc.
44 #pragma ident "%Z%%M% %I% %E% SMI"
48 #include <sys/types.h>
58 * psuedo compile/step/advance global variables
61 extern char *locs
; /* for stopping execess recursion */
62 extern char *loc1
; /* 1st character which matched RE */
63 extern char *loc2
; /* char after lst char in matched RE */
64 extern char *braslist
[]; /* start of nbra subexp */
65 extern char *braelist
[]; /* end of nbra subexp */
69 int regcomp_flags
; /* interface to specify cflags for regcomp */
71 void regex_comp_free(void *a
);
72 static int dhl_step(const char *str
, const char *ep
);
73 static int dhl_advance(const char *str
, const char *ep
);
74 static int map_errnos(int); /* Convert regcomp error */
75 static int dhl_doit(const char *, const regex_t
*, const int flags
);
76 static char * dhl_compile(const char *instr
, char *ep
, char *endbuf
);
79 * # of sub re's: NOTE: For now limit on bra list defined here
80 * but fix is to add maxbra define to to regex.h
81 * One problem is that a bigger number is a performance hit since
82 * regexec() has a slow initialization loop that goes around SEPSIZE times
85 static regmatch_t rm
[SEPSIZE
]; /* ptr to list of RE matches */
88 * Structure to contain dl encoded first two bytes for vi, plus hold two
89 * regex structures, one for advance and one for step.
91 static struct regex_comp
{
92 char r_head
[2]; /* Header for DL encoding for vi */
93 regex_t r_stp
; /* For use by step */
94 regex_t r_adv
; /* For use by advance */
98 * global value for the size of a regex_comp structure:
100 size_t regexc_size
= sizeof (reg_comp
);
104 compile(const char *instr
, char *expbuf
, char *endbuf
)
106 return (dhl_compile(instr
, expbuf
, endbuf
));
110 step(const char *instr
, const char *expbuf
)
112 return (dhl_step(instr
, expbuf
));
116 advance(const char *instr
, const char *expbuf
)
118 return (dhl_advance(instr
, expbuf
));
123 * the compile and step routines here simulate the old libgen routines of
124 * compile/step Re: regexpr(3G). in order to do this, we must assume
125 * that expbuf[] consists of the following format:
126 * 1) the first two bytes consist of a special encoding - see below.
127 * 2) the next part is a regex_t used by regexec()/regcomp() for step
128 * 3) the final part is a regex_t used by regexec()/regcomp() for advance
130 * the special encoding of the first two bytes is referenced throughout
131 * vi. apparently expbuf[0] is set to:
132 * = 0 upon initialization
133 * = 1 if the first char of the RE is a ^
134 * = 0 if the first char of the RE isn't a ^
135 * and expbuf[1-35+] = bitmap of the type of RE chars in the expression.
136 * this is apparently 0 if there's no RE.
137 * Here, we use expbuf[0] in a similar fashion; and expbuf[1] is non-zero
138 * if there's at least 1 RE in the string.
139 * I say "apparently" as the code to compile()/step() is poorly written.
142 dhl_compile(instr
, expbuf
, endbuf
)
143 const char *instr
; /* the regular expression */
144 char *expbuf
; /* where the compiled RE gets placed */
145 char *endbuf
; /* ending addr of expbuf */
149 char adv_instr
[4096]; /* PLENTY big temp buffer */
150 char *instrp
; /* PLENTY big temp buffer */
152 if (*instr
== (char) NULL
) {
158 * Check values of expbuf and endbuf
160 if (expbuf
== NULL
) {
161 if ((expbuf
= malloc(regexc_size
)) == NULL
) {
165 memset(®_comp
, 0, regexc_size
);
167 endbuf
= expbuf
+ regexc_size
;
168 } else { /* Check if enough memory was allocated */
169 if (expbuf
+ regexc_size
> endbuf
) {
173 memcpy(®_comp
, expbuf
, regexc_size
);
183 * Free any data being held for previous search strings
185 regex_comp_free(®_comp
);
188 * We call regcomp twice, once to get a regex_t for use by step()
189 * and then again with for use by advance()
191 if ((rv
= regcomp(®_comp
.r_stp
, instr
, regcomp_flags
)) != 0) {
192 regerrno
= map_errnos(rv
); /* Convert regcomp error */
196 * To support advance, which assumes an implicit ^ to match at start
197 * of line we prepend a ^ to the pattern by copying to a temp buffer
201 instrp
= (char *) instr
; /* String already has leading ^ */
204 strncpy(&adv_instr
[1], instr
, 2048);
208 if ((rv
= regcomp(®_comp
.r_adv
, instrp
, regcomp_flags
)) != 0) {
209 regerrno
= map_errnos(rv
); /* Convert regcomp error */
214 * update global variables
216 nbra
= (int) reg_comp
.r_adv
.re_nsub
> 0 ?
217 (int) reg_comp
.r_adv
.re_nsub
: 0;
221 * Set the header flags for use by vi
223 if (instr
[0] == '^') /* if beginning of string, */
224 reg_comp
.r_head
[0] = 1; /* set special flag */
226 reg_comp
.r_head
[0] = 0; /* clear special flag */
228 * note that for a single BRE, nbra will be 0 here.
229 * we're guaranteed that, at this point, a RE has been found.
231 reg_comp
.r_head
[1] = 1; /* set special flag */
233 * Copy our reg_comp structure to expbuf
235 (void) memcpy(expbuf
, (char *) ®_comp
, regexc_size
);
239 * Return code from libgen regcomp with mods. Note weird return
240 * value - if space is malloc'd return pointer to start of space,
241 * if user provided their own space, return pointer to 1+last byte
249 reglength
= regexc_size
;
254 return (expbuf
+ regexc_size
);
259 * dhl_step: step through a string until a RE match is found, or end of str
263 const char *str
; /* characters to be checked for a match */
264 const char *ep
; /* compiled RE from dhl_compile() */
267 * Check if we're passed a null ep
270 regerrno
= 41; /* No remembered search string error */
274 * Call common routine with r_stp (step) structure
276 return (dhl_doit(str
, &(((struct regex_comp
*) ep
)->r_stp
),
277 ((locs
!= NULL
) ? REG_NOTBOL
: 0)));
281 * dhl_advance: implement advance
285 const char *str
; /* characters to be checked for a match */
286 const char *ep
; /* compiled RE from dhl_compile() */
290 * Check if we're passed a null ep
293 regerrno
= 41; /* No remembered search string error */
297 * Call common routine with r_adv (advance) structure
299 rv
= dhl_doit(str
, &(((struct regex_comp
*) ep
)->r_adv
), 0);
300 loc1
= NULL
; /* Clear it per the compile man page */
305 * dhl_doit - common code for step and advance
308 dhl_doit(str
, rep
, flags
)
309 const char *str
; /* characters to be checked for a match */
311 const int flags
; /* flags to be passed to regexec directly */
315 regmatch_t
*prm
; /* ptr to current regmatch_t */
318 * Check if we're passed a null regex_t
321 regerrno
= 41; /* No remembered search string error */
328 if ((rv
= regexec(rep
, str
, SEPSIZE
, prm
, flags
)) != REG_OK
) {
329 if (rv
== REG_NOMATCH
)
331 regerrno
= map_errnos(rv
);
335 loc1
= (char *)str
+ prm
->rm_so
;
336 loc2
= (char *)str
+ prm
->rm_eo
;
339 * Now we need to fill up the bra lists with all of the sub re's
340 * Note we subtract nsub -1, and preincrement prm.
342 for (i
= 0; i
<= rep
->re_nsub
; i
++) {
343 prm
++; /* XXX inc past first subexp */
344 braslist
[i
] = (char *)str
+ prm
->rm_so
;
345 braelist
[i
] = (char *)str
+ prm
->rm_eo
;
347 regerrno
= 50; /* regex overflow */
353 * Inverse logic, a zero from regexec - success, is a 1
362 * regerrno to compile/step error mapping:
363 * This is really a big compromise. Some errors don't map at all
364 * like regcomp error 15 is generated by both compile() error types
365 * 44 & 46. So which one should we map to?
366 * Note REG_ESUB Can't happen- 9 is no longer max num of subexpressions
367 * To do your errors right use xregerr() to get the regcomp error
368 * string and print that.
370 * | regcomp/regexec | Compile/step/advance |
371 * +---------------------------------+--------------------------------------+
372 * 0 REG_OK Pattern matched 1 - Pattern matched
373 * 1 REG_NOMATCH No match 0 - Pattern didn't match
374 * 2 REG_ECOLLATE Bad collation elmnt. 67 - Returned by compile on mbtowc err
375 * 3 REG_EESCAPE trailing \ in patrn 45 - } expected after \.
376 * 4 REG_ENEWLINE \n before end pattrn 36 - Illegal or missing delimiter.
377 * 5 REG_ENSUB Over 9 \( \) pairs 43 - Too many \(
378 * 6 REG_ESUBREG Bad number in \[0-9] 25 - ``\digit'' out of range.
379 * 7 REG_EBRACK [ ] inbalance 49 - [ ] imbalance.
380 * 8 REG_EPAREN ( ) inbalance 42 - \(~\) imbalance.
381 * 9 REG_EBRACE \{ \} inbalance 45 - } expected after \.
382 * 10 REG_ERANGE bad range endpoint 11 - Range endpoint too large.
383 * 11 REG_ESPACE no memory for pattern 50 - Regular expression overflow.
384 * 12 REG_BADRPT invalid repetition 36 - Illegal or missing delimiter.
385 * 13 REG_ECTYPE invalid char-class 67 - illegal byte sequence
386 * 14 REG_BADPAT syntax error 50 - Regular expression overflow.
387 * 15 REG_BADBR \{ \} contents bad 46 - First number exceeds 2nd in \{~\}
388 * 16 REG_EFATAL internal error 50 - Regular expression overflow.
389 * 17 REG_ECHAR bad mulitbyte char 67 - illegal byte sequence
390 * 18 REG_STACK stack overflow 50 - Regular expression overflow.
391 * 19 REG_ENOSYS function not supported 50- Regular expression overflow.
393 * For reference here's the compile/step errno's. We don't generate
394 * 41 here - it's done earlier, nor 44 since we can't tell if from 46.
396 * 11 - Range endpoint too large.
398 * 25 - ``\digit'' out of range.
399 * 36 - Illegal or missing delimiter.
400 * 41 - No remembered search string.
401 * 42 - \(~\) imbalance.
403 * 44 - More than 2 numbers given in "\{~\}"
404 * 45 - } expected after \.
405 * 46 - First number exceeds 2nd in "\{~\}"
406 * 49 - [ ] imbalance.
407 * 50 - Regular expression overflow.
411 map_errnos(int Errno
)
476 * This is a routine to clean up the subtle substructure of the struct
477 * regex_comp type for use by clients of this module. Since the struct
478 * type is private, we use a generic interface, and trust the
479 * application to be damn sure that this operation is valid for the
484 regex_comp_free(void * a
)
487 * Free any data being held for previous search strings
490 if (((struct regex_comp
*) a
) == NULL
) {
494 regfree(&((struct regex_comp
*)a
)->r_stp
);
495 regfree(&((struct regex_comp
*)a
)->r_adv
);