2 * re_*comp and friends - compile REs
3 * This file #includes several others (see the bottom).
5 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
7 * Development of this software was funded, in part, by Cray Research Inc.,
8 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
9 * Corporation, none of whom are responsible for the results. The author
12 * Redistribution and use in source and binary forms -- with or without
13 * modification -- are permitted for any purpose, provided that
14 * redistributions in source form retain this entire copyright notice and
15 * indicate the origin and nature of any modifications.
17 * I'd appreciate being given credit for this package in the documentation
18 * of software which uses it, but that is not a requirement.
20 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
21 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
22 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include "regex/regguts.h"
38 * forward declarations, up here so forward datatypes etc. are defined early
40 /* === regcomp.c === */
41 static void moresubs(struct vars
*, int);
42 static int freev(struct vars
*, int);
43 static void makesearch(struct vars
*, struct nfa
*);
44 static struct subre
*parse(struct vars
*, int, int, struct state
*, struct state
*);
45 static struct subre
*parsebranch(struct vars
*, int, int, struct state
*, struct state
*, int);
46 static void parseqatom(struct vars
*, int, int, struct state
*, struct state
*, struct subre
*);
47 static void nonword(struct vars
*, int, struct state
*, struct state
*);
48 static void word(struct vars
*, int, struct state
*, struct state
*);
49 static int scannum(struct vars
*);
50 static void repeat(struct vars
*, struct state
*, struct state
*, int, int);
51 static void bracket(struct vars
*, struct state
*, struct state
*);
52 static void cbracket(struct vars
*, struct state
*, struct state
*);
53 static void brackpart(struct vars
*, struct state
*, struct state
*);
54 static const chr
*scanplain(struct vars
*);
55 static void onechr(struct vars
*, chr
, struct state
*, struct state
*);
56 static void dovec(struct vars
*, struct cvec
*, struct state
*, struct state
*);
57 static void wordchrs(struct vars
*);
58 static struct subre
*subre(struct vars
*, int, int, struct state
*, struct state
*);
59 static void freesubre(struct vars
*, struct subre
*);
60 static void freesrnode(struct vars
*, struct subre
*);
61 static void optst(struct vars
*, struct subre
*);
62 static int numst(struct subre
*, int);
63 static void markst(struct subre
*);
64 static void cleanst(struct vars
*);
65 static long nfatree(struct vars
*, struct subre
*, FILE *);
66 static long nfanode(struct vars
*, struct subre
*, FILE *);
67 static int newlacon(struct vars
*, struct state
*, struct state
*, int);
68 static void freelacons(struct subre
*, int);
69 static void rfree(regex_t
*);
72 static void dump(regex_t
*, FILE *);
73 static void dumpst(struct subre
*, FILE *, int);
74 static void stdump(struct subre
*, FILE *, int);
75 static const char *stid(struct subre
*, char *, size_t);
77 /* === regc_lex.c === */
78 static void lexstart(struct vars
*);
79 static void prefixes(struct vars
*);
80 static void lexnest(struct vars
*, const chr
*, const chr
*);
81 static void lexword(struct vars
*);
82 static int next(struct vars
*);
83 static int lexescape(struct vars
*);
84 static chr
lexdigits(struct vars
*, int, int, int);
85 static int brenext(struct vars
*, chr
);
86 static void skip(struct vars
*);
87 static chr
newline(void);
88 static chr
chrnamed(struct vars
*, const chr
*, const chr
*, chr
);
90 /* === regc_color.c === */
91 static void initcm(struct vars
*, struct colormap
*);
92 static void freecm(struct colormap
*);
93 static void cmtreefree(struct colormap
*, union tree
*, int);
94 static color
setcolor(struct colormap
*, chr
, pcolor
);
95 static color
maxcolor(struct colormap
*);
96 static color
newcolor(struct colormap
*);
97 static void freecolor(struct colormap
*, pcolor
);
98 static color
pseudocolor(struct colormap
*);
99 static color
subcolor(struct colormap
*, chr c
);
100 static color
newsub(struct colormap
*, pcolor
);
101 static void subrange(struct vars
*, chr
, chr
, struct state
*, struct state
*);
102 static void subblock(struct vars
*, chr
, struct state
*, struct state
*);
103 static void okcolors(struct nfa
*, struct colormap
*);
104 static void colorchain(struct colormap
*, struct arc
*);
105 static void uncolorchain(struct colormap
*, struct arc
*);
106 static void rainbow(struct nfa
*, struct colormap
*, int, pcolor
, struct state
*, struct state
*);
107 static void colorcomplement(struct nfa
*, struct colormap
*, int, struct state
*, struct state
*, struct state
*);
110 static void dumpcolors(struct colormap
*, FILE *);
111 static void fillcheck(struct colormap
*, union tree
*, int, FILE *);
112 static void dumpchr(chr
, FILE *);
114 /* === regc_nfa.c === */
115 static struct nfa
*newnfa(struct vars
*, struct colormap
*, struct nfa
*);
116 static void freenfa(struct nfa
*);
117 static struct state
*newstate(struct nfa
*);
118 static struct state
*newfstate(struct nfa
*, int flag
);
119 static void dropstate(struct nfa
*, struct state
*);
120 static void freestate(struct nfa
*, struct state
*);
121 static void destroystate(struct nfa
*, struct state
*);
122 static void newarc(struct nfa
*, int, pcolor
, struct state
*, struct state
*);
123 static struct arc
*allocarc(struct nfa
*, struct state
*);
124 static void freearc(struct nfa
*, struct arc
*);
125 static struct arc
*findarc(struct state
*, int, pcolor
);
126 static void cparc(struct nfa
*, struct arc
*, struct state
*, struct state
*);
127 static void moveins(struct nfa
*, struct state
*, struct state
*);
128 static void copyins(struct nfa
*, struct state
*, struct state
*);
129 static void moveouts(struct nfa
*, struct state
*, struct state
*);
130 static void copyouts(struct nfa
*, struct state
*, struct state
*);
131 static void cloneouts(struct nfa
*, struct state
*, struct state
*, struct state
*, int);
132 static void delsub(struct nfa
*, struct state
*, struct state
*);
133 static void deltraverse(struct nfa
*, struct state
*, struct state
*);
134 static void dupnfa(struct nfa
*, struct state
*, struct state
*, struct state
*, struct state
*);
135 static void duptraverse(struct nfa
*, struct state
*, struct state
*);
136 static void cleartraverse(struct nfa
*, struct state
*);
137 static void specialcolors(struct nfa
*);
138 static long optimize(struct nfa
*, FILE *);
139 static void pullback(struct nfa
*, FILE *);
140 static int pull(struct nfa
*, struct arc
*);
141 static void pushfwd(struct nfa
*, FILE *);
142 static int push(struct nfa
*, struct arc
*);
144 #define INCOMPATIBLE 1 /* destroys arc */
145 #define SATISFIED 2 /* constraint satisfied */
146 #define COMPATIBLE 3 /* compatible but not satisfied yet */
147 static int combine(struct arc
*, struct arc
*);
148 static void fixempties(struct nfa
*, FILE *);
149 static int unempty(struct nfa
*, struct arc
*);
150 static void cleanup(struct nfa
*);
151 static void markreachable(struct nfa
*, struct state
*, struct state
*, struct state
*);
152 static void markcanreach(struct nfa
*, struct state
*, struct state
*, struct state
*);
153 static long analyze(struct nfa
*);
154 static void compact(struct nfa
*, struct cnfa
*);
155 static void carcsort(struct carc
*, struct carc
*);
156 static void freecnfa(struct cnfa
*);
157 static void dumpnfa(struct nfa
*, FILE *);
160 static void dumpstate(struct state
*, FILE *);
161 static void dumparcs(struct state
*, FILE *);
162 static int dumprarcs(struct arc
*, struct state
*, FILE *, int);
163 static void dumparc(struct arc
*, struct state
*, FILE *);
164 static void dumpcnfa(struct cnfa
*, FILE *);
165 static void dumpcstate(int, struct carc
*, struct cnfa
*, FILE *);
167 /* === regc_cvec.c === */
168 static struct cvec
*newcvec(int, int);
169 static struct cvec
*clearcvec(struct cvec
*);
170 static void addchr(struct cvec
*, chr
);
171 static void addrange(struct cvec
*, chr
, chr
);
172 static struct cvec
*getcvec(struct vars
*, int, int);
173 static void freecvec(struct cvec
*);
175 /* === regc_locale.c === */
176 static int pg_wc_isdigit(pg_wchar c
);
177 static int pg_wc_isalpha(pg_wchar c
);
178 static int pg_wc_isalnum(pg_wchar c
);
179 static int pg_wc_isupper(pg_wchar c
);
180 static int pg_wc_islower(pg_wchar c
);
181 static int pg_wc_isgraph(pg_wchar c
);
182 static int pg_wc_isprint(pg_wchar c
);
183 static int pg_wc_ispunct(pg_wchar c
);
184 static int pg_wc_isspace(pg_wchar c
);
185 static pg_wchar
pg_wc_toupper(pg_wchar c
);
186 static pg_wchar
pg_wc_tolower(pg_wchar c
);
187 static celt
element(struct vars
*, const chr
*, const chr
*);
188 static struct cvec
*range(struct vars
*, celt
, celt
, int);
189 static int before(celt
, celt
);
190 static struct cvec
*eclass(struct vars
*, celt
, int);
191 static struct cvec
*cclass(struct vars
*, const chr
*, const chr
*, int);
192 static struct cvec
*allcases(struct vars
*, chr
);
193 static int cmp(const chr
*, const chr
*, size_t);
194 static int casecmp(const chr
*, const chr
*, size_t);
197 /* internal variables, bundled for easy passing around */
201 const chr
*now
; /* scan pointer into string */
202 const chr
*stop
; /* end of string */
203 const chr
*savenow
; /* saved now and stop for "subroutine call" */
205 int err
; /* error code (0 if none) */
206 int cflags
; /* copy of compile flags */
207 int lasttype
; /* type of previous token */
208 int nexttype
; /* type of next token */
209 chr nextvalue
; /* value (if any) of next token */
210 int lexcon
; /* lexical context type (see lex.c) */
211 int nsubexp
; /* subexpression count */
212 struct subre
**subs
; /* subRE pointer vector */
213 size_t nsubs
; /* length of vector */
214 struct subre
*sub10
[10]; /* initial vector, enough for most */
215 struct nfa
*nfa
; /* the NFA */
216 struct colormap
*cm
; /* character color map */
217 color nlcolor
; /* color of newline */
218 struct state
*wordchrs
; /* state in nfa holding word-char outarcs */
219 struct subre
*tree
; /* subexpression tree */
220 struct subre
*treechain
; /* all tree nodes allocated */
221 struct subre
*treefree
; /* any free tree nodes */
222 int ntree
; /* number of tree nodes */
223 struct cvec
*cv
; /* interface cvec */
224 struct cvec
*cv2
; /* utility cvec */
225 struct subre
*lacons
; /* lookahead-constraint vector */
226 int nlacons
; /* size of lacons */
229 /* parsing macros; most know that `v' is the struct vars pointer */
230 #define NEXT() (next(v)) /* advance by one token */
231 #define SEE(t) (v->nexttype == (t)) /* is next token this? */
232 #define EAT(t) (SEE(t) && next(v)) /* if next is this, swallow it */
233 #define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */
234 #define ISERR() VISERR(v)
235 #define VERR(vv,e) ((vv)->nexttype = EOS, ((vv)->err) ? (vv)->err :\
237 #define ERR(e) VERR(v, e) /* record an error */
238 #define NOERR() {if (ISERR()) return;} /* if error seen, return */
239 #define NOERRN() {if (ISERR()) return NULL;} /* NOERR with retval */
240 #define NOERRZ() {if (ISERR()) return 0;} /* NOERR with retval */
241 #define INSIST(c, e) ((c) ? 0 : ERR(e)) /* if condition false, error */
242 #define NOTE(b) (v->re->re_info |= (b)) /* note visible condition */
243 #define EMPTYARC(x, y) newarc(v->nfa, EMPTY, 0, x, y)
245 /* token type codes, some also used as NFA arc types */
246 #define EMPTY 'n' /* no token present */
247 #define EOS 'e' /* end of string */
248 #define PLAIN 'p' /* ordinary character */
249 #define DIGIT 'd' /* digit (in bound) */
250 #define BACKREF 'b' /* back reference */
251 #define COLLEL 'I' /* start of [. */
252 #define ECLASS 'E' /* start of [= */
253 #define CCLASS 'C' /* start of [: */
254 #define END 'X' /* end of [. [= [: */
255 #define RANGE 'R' /* - within [] which might be range delim. */
256 #define LACON 'L' /* lookahead constraint subRE */
257 #define AHEAD 'a' /* color-lookahead arc */
258 #define BEHIND 'r' /* color-lookbehind arc */
259 #define WBDRY 'w' /* word boundary constraint */
260 #define NWBDRY 'W' /* non-word-boundary constraint */
261 #define SBEGIN 'A' /* beginning of string (even if not BOL) */
262 #define SEND 'Z' /* end of string (even if not EOL) */
263 #define PREFER 'P' /* length preference */
265 /* is an arc colored, and hence on a color chain? */
267 ((a)->type == PLAIN || (a)->type == AHEAD || (a)->type == BEHIND)
270 /* static function list */
271 static struct fns functions
= {
272 rfree
, /* regfree insides */
278 * pg_regcomp - compile regular expression
281 pg_regcomp(regex_t
*re
,
287 struct vars
*v
= &var
;
293 FILE *debug
= (flags
& REG_PROGRESS
) ? stdout
: (FILE *) NULL
;
295 FILE *debug
= (FILE *) NULL
;
298 #define CNOERR() { if (ISERR()) return freev(v, v->err); }
302 if (re
== NULL
|| string
== NULL
)
304 if ((flags
& REG_QUOTE
) &&
305 (flags
& (REG_ADVANCED
| REG_EXPANDED
| REG_NEWLINE
)))
307 if (!(flags
& REG_EXTENDED
) && (flags
& REG_ADVF
))
310 /* initial setup (after which freev() is callable) */
313 v
->stop
= v
->now
+ len
;
314 v
->savenow
= v
->savestop
= NULL
;
320 for (j
= 0; j
< v
->nsubs
; j
++)
324 v
->nlcolor
= COLORLESS
;
333 re
->re_magic
= REMAGIC
;
334 re
->re_info
= 0; /* bits get set during parse */
335 re
->re_csize
= sizeof(chr
);
337 re
->re_fns
= VS(&functions
);
339 /* more complex setup, malloced things */
340 re
->re_guts
= VS(MALLOC(sizeof(struct guts
)));
341 if (re
->re_guts
== NULL
)
342 return freev(v
, REG_ESPACE
);
343 g
= (struct guts
*) re
->re_guts
;
350 v
->nfa
= newnfa(v
, v
->cm
, (struct nfa
*) NULL
);
352 v
->cv
= newcvec(100, 20);
354 return freev(v
, REG_ESPACE
);
357 lexstart(v
); /* also handles prefixes */
358 if ((v
->cflags
& REG_NLSTOP
) || (v
->cflags
& REG_NLANCH
))
360 /* assign newline a unique color */
361 v
->nlcolor
= subcolor(v
->cm
, newline());
362 okcolors(v
->nfa
, v
->cm
);
365 v
->tree
= parse(v
, EOS
, PLAIN
, v
->nfa
->init
, v
->nfa
->final
);
366 assert(SEE(EOS
)); /* even if error; ISERR() => SEE(EOS) */
368 assert(v
->tree
!= NULL
);
370 /* finish setup of nfa and its subre tree */
371 specialcolors(v
->nfa
);
376 fprintf(debug
, "\n\n\n========= RAW ==========\n");
377 dumpnfa(v
->nfa
, debug
);
378 dumpst(v
->tree
, debug
, 1);
382 v
->ntree
= numst(v
->tree
, 1);
388 fprintf(debug
, "\n\n\n========= TREE FIXED ==========\n");
389 dumpst(v
->tree
, debug
, 1);
393 /* build compacted NFAs for tree and lacons */
394 re
->re_info
|= nfatree(v
, v
->tree
, debug
);
396 assert(v
->nlacons
== 0 || v
->lacons
!= NULL
);
397 for (i
= 1; i
< v
->nlacons
; i
++)
401 fprintf(debug
, "\n\n\n========= LA%d ==========\n", i
);
403 nfanode(v
, &v
->lacons
[i
], debug
);
406 if (v
->tree
->flags
& SHORTER
)
409 /* build compacted NFAs for tree, lacons, fast search */
412 fprintf(debug
, "\n\n\n========= SEARCH ==========\n");
414 /* can sacrifice main NFA now, so use it as work area */
415 (DISCARD
) optimize(v
->nfa
, debug
);
417 makesearch(v
, v
->nfa
);
419 compact(v
->nfa
, &g
->search
);
422 /* looks okay, package it up */
423 re
->re_nsub
= v
->nsubexp
;
424 v
->re
= NULL
; /* freev no longer frees re */
425 g
->magic
= GUTSMAGIC
;
426 g
->cflags
= v
->cflags
;
427 g
->info
= re
->re_info
;
428 g
->nsub
= re
->re_nsub
;
432 g
->compare
= (v
->cflags
& REG_ICASE
) ? casecmp
: cmp
;
433 g
->lacons
= v
->lacons
;
435 g
->nlacons
= v
->nlacons
;
438 if (flags
& REG_DUMP
)
447 * moresubs - enlarge subRE vector
450 moresubs(struct vars
* v
,
451 int wanted
) /* want enough room for this one */
456 assert(wanted
> 0 && (size_t) wanted
>= v
->nsubs
);
457 n
= (size_t) wanted
*3 / 2 + 1;
459 if (v
->subs
== v
->sub10
)
461 p
= (struct subre
**) MALLOC(n
* sizeof(struct subre
*));
463 memcpy(VS(p
), VS(v
->subs
),
464 v
->nsubs
* sizeof(struct subre
*));
467 p
= (struct subre
**) REALLOC(v
->subs
, n
* sizeof(struct subre
*));
474 for (p
= &v
->subs
[v
->nsubs
]; v
->nsubs
< n
; p
++, v
->nsubs
++)
476 assert(v
->nsubs
== n
);
477 assert((size_t) wanted
< v
->nsubs
);
481 * freev - free vars struct's substructures where necessary
483 * Optionally does error-number setting, and always returns error code
484 * (if any), to make error-handling code terser.
487 freev(struct vars
* v
,
492 if (v
->subs
!= v
->sub10
)
497 freesubre(v
, v
->tree
);
498 if (v
->treechain
!= NULL
)
504 if (v
->lacons
!= NULL
)
505 freelacons(v
->lacons
, v
->nlacons
);
506 ERR(err
); /* nop if err==0 */
512 * makesearch - turn an NFA into a search NFA (implicit prepend of .*?)
513 * NFA must have been optimize()d already.
516 makesearch(struct vars
* v
,
521 struct state
*pre
= nfa
->pre
;
526 /* no loops are needed if it's anchored */
527 for (a
= pre
->outs
; a
!= NULL
; a
= a
->outchain
)
529 assert(a
->type
== PLAIN
);
530 if (a
->co
!= nfa
->bos
[0] && a
->co
!= nfa
->bos
[1])
535 /* add implicit .* in front */
536 rainbow(nfa
, v
->cm
, PLAIN
, COLORLESS
, pre
, pre
);
538 /* and ^* and \A* too -- not always necessary, but harmless */
539 newarc(nfa
, PLAIN
, nfa
->bos
[0], pre
, pre
);
540 newarc(nfa
, PLAIN
, nfa
->bos
[1], pre
, pre
);
544 * Now here's the subtle part. Because many REs have no lookback
545 * constraints, often knowing when you were in the pre state tells you
546 * little; it's the next state(s) that are informative. But some of them
547 * may have other inarcs, i.e. it may be possible to make actual progress
548 * and then return to one of them. We must de-optimize such cases,
549 * splitting each such state into progress and no-progress states.
552 /* first, make a list of the states */
554 for (a
= pre
->outs
; a
!= NULL
; a
= a
->outchain
)
557 for (b
= s
->ins
; b
!= NULL
; b
= b
->inchain
)
560 if (b
!= NULL
&& s
->tmp
== NULL
)
563 * Must be split if not already in the list (fixes bugs 505048,
564 * 230589, 840258, 504785).
572 for (s
= slist
; s
!= NULL
; s
= s2
)
575 copyouts(nfa
, s
, s2
);
576 for (a
= s
->ins
; a
!= NULL
; a
= b
)
581 cparc(nfa
, a
, a
->from
, s2
);
586 s
->tmp
= NULL
; /* clean up while we're at it */
591 * parse - parse an RE
593 * This is actually just the top level, which parses a bunch of branches
594 * tied together with '|'. They appear in the tree as the left children
595 * of a chain of '|' subres.
597 static struct subre
*
598 parse(struct vars
* v
,
599 int stopper
, /* EOS or ')' */
600 int type
, /* LACON (lookahead subRE) or PLAIN */
601 struct state
* init
, /* initial state */
602 struct state
* final
) /* final state */
604 struct state
*left
; /* scaffolding for branch */
606 struct subre
*branches
; /* top level */
607 struct subre
*branch
; /* current branch */
608 struct subre
*t
; /* temporary */
609 int firstbranch
; /* is this the first branch? */
611 assert(stopper
== ')' || stopper
== EOS
);
613 branches
= subre(v
, '|', LONGER
, init
, final
);
621 /* need a place to hang it */
622 branch
->right
= subre(v
, '|', LONGER
, init
, final
);
624 branch
= branch
->right
;
627 left
= newstate(v
->nfa
);
628 right
= newstate(v
->nfa
);
630 EMPTYARC(init
, left
);
631 EMPTYARC(right
, final
);
633 branch
->left
= parsebranch(v
, stopper
, type
, left
, right
, 0);
635 branch
->flags
|= UP(branch
->flags
| branch
->left
->flags
);
636 if ((branch
->flags
& ~branches
->flags
) != 0) /* new flags */
637 for (t
= branches
; t
!= branch
; t
= t
->right
)
638 t
->flags
|= branch
->flags
;
640 assert(SEE(stopper
) || SEE(EOS
));
644 assert(stopper
== ')' && SEE(EOS
));
648 /* optimize out simple cases */
649 if (branch
== branches
)
650 { /* only one branch */
651 assert(branch
->right
== NULL
);
654 freesubre(v
, branches
);
657 else if (!MESSY(branches
->flags
))
658 { /* no interesting innards */
659 freesubre(v
, branches
->left
);
660 branches
->left
= NULL
;
661 freesubre(v
, branches
->right
);
662 branches
->right
= NULL
;
670 * parsebranch - parse one branch of an RE
672 * This mostly manages concatenation, working closely with parseqatom().
673 * Concatenated things are bundled up as much as possible, with separate
674 * ',' nodes introduced only when necessary due to substructure.
676 static struct subre
*
677 parsebranch(struct vars
* v
,
678 int stopper
, /* EOS or ')' */
679 int type
, /* LACON (lookahead subRE) or PLAIN */
680 struct state
* left
, /* leftmost state */
681 struct state
* right
, /* rightmost state */
682 int partial
) /* is this only part of a branch? */
684 struct state
*lp
; /* left end of current construct */
685 int seencontent
; /* is there anything in this branch yet? */
690 t
= subre(v
, '=', 0, left
, right
); /* op '=' is tentative */
692 while (!SEE('|') && !SEE(stopper
) && !SEE(EOS
))
695 { /* implicit concat operator */
696 lp
= newstate(v
->nfa
);
698 moveins(v
->nfa
, right
, lp
);
702 /* NB, recursion in parseqatom() may swallow rest of branch */
703 parseqatom(v
, stopper
, type
, lp
, right
, t
);
711 EMPTYARC(left
, right
);
718 * parseqatom - parse one quantified atom or constraint of an RE
720 * The bookkeeping near the end cooperates very closely with parsebranch();
721 * in particular, it contains a recursion that can involve parsing the rest
722 * of the branch, making this function's name somewhat inaccurate.
725 parseqatom(struct vars
* v
,
726 int stopper
, /* EOS or ')' */
727 int type
, /* LACON (lookahead subRE) or PLAIN */
728 struct state
* lp
, /* left state to hang it on */
729 struct state
* rp
, /* right state to hang it on */
730 struct subre
* top
) /* subtree top */
732 struct state
*s
; /* temporaries for new states */
735 #define ARCV(t, val) newarc(v->nfa, t, val, lp, rp)
738 struct subre
*atom
; /* atom's subtree */
740 int cap
; /* capturing parens? */
741 int pos
; /* positive lookahead? */
742 int subno
; /* capturing-parens or backref number */
744 int qprefer
; /* quantifier short/long preference */
746 struct subre
**atomp
; /* where the pointer to atom is */
748 /* initial bookkeeping */
750 assert(lp
->nouts
== 0); /* must string new code */
751 assert(rp
->nins
== 0); /* between lp and rp */
752 subno
= 0; /* just to shut lint up */
754 /* an atom or constraint... */
755 atomtype
= v
->nexttype
;
758 /* first, constraints, which end by returning */
761 if (v
->cflags
& REG_NLANCH
)
762 ARCV(BEHIND
, v
->nlcolor
);
768 if (v
->cflags
& REG_NLANCH
)
769 ARCV(AHEAD
, v
->nlcolor
);
774 ARCV('^', 1); /* BOL */
775 ARCV('^', 0); /* or BOS */
780 ARCV('$', 1); /* EOL */
781 ARCV('$', 0); /* or EOS */
786 wordchrs(v
); /* does NEXT() */
787 s
= newstate(v
->nfa
);
789 nonword(v
, BEHIND
, lp
, s
);
790 word(v
, AHEAD
, s
, rp
);
794 wordchrs(v
); /* does NEXT() */
795 s
= newstate(v
->nfa
);
797 word(v
, BEHIND
, lp
, s
);
798 nonword(v
, AHEAD
, s
, rp
);
802 wordchrs(v
); /* does NEXT() */
803 s
= newstate(v
->nfa
);
805 nonword(v
, BEHIND
, lp
, s
);
806 word(v
, AHEAD
, s
, rp
);
807 s
= newstate(v
->nfa
);
809 word(v
, BEHIND
, lp
, s
);
810 nonword(v
, AHEAD
, s
, rp
);
814 wordchrs(v
); /* does NEXT() */
815 s
= newstate(v
->nfa
);
817 word(v
, BEHIND
, lp
, s
);
818 word(v
, AHEAD
, s
, rp
);
819 s
= newstate(v
->nfa
);
821 nonword(v
, BEHIND
, lp
, s
);
822 nonword(v
, AHEAD
, s
, rp
);
825 case LACON
: /* lookahead constraint */
828 s
= newstate(v
->nfa
);
829 s2
= newstate(v
->nfa
);
831 t
= parse(v
, ')', LACON
, s
, s2
);
832 freesubre(v
, t
); /* internal structure irrelevant */
833 assert(SEE(')') || ISERR());
835 n
= newlacon(v
, s
, s2
, pos
);
840 /* then errors, to get them out of the way */
852 /* then plain characters, and minor variants on that theme */
853 case ')': /* unbalanced paren */
854 if ((v
->cflags
& REG_ADVANCED
) != REG_EXTENDED
)
859 /* legal in EREs due to specification botch */
861 /* fallthrough into case PLAIN */
863 onechr(v
, v
->nextvalue
, lp
, rp
);
864 okcolors(v
->nfa
, v
->cm
);
869 if (v
->nextvalue
== 1)
873 assert(SEE(']') || ISERR());
877 rainbow(v
->nfa
, v
->cm
, PLAIN
,
878 (v
->cflags
& REG_NLSTOP
) ? v
->nlcolor
: COLORLESS
,
882 /* and finally the ugly stuff */
883 case '(': /* value flags as capturing or non */
884 cap
= (type
== LACON
) ? 0 : v
->nextvalue
;
889 if ((size_t) subno
>= v
->nsubs
)
891 assert((size_t) subno
< v
->nsubs
);
894 atomtype
= PLAIN
; /* something that's not '(' */
896 /* need new endpoints because tree will contain pointers */
897 s
= newstate(v
->nfa
);
898 s2
= newstate(v
->nfa
);
903 atom
= parse(v
, ')', PLAIN
, s
, s2
);
904 assert(SEE(')') || ISERR());
909 v
->subs
[subno
] = atom
;
910 t
= subre(v
, '(', atom
->flags
| CAP
, lp
, rp
);
916 /* postpone everything else pending possible {0} */
918 case BACKREF
: /* the Feature From The Black Lagoon */
919 INSIST(type
!= LACON
, REG_ESUBREG
);
920 INSIST(v
->nextvalue
< v
->nsubs
, REG_ESUBREG
);
921 INSIST(v
->subs
[v
->nextvalue
] != NULL
, REG_ESUBREG
);
923 assert(v
->nextvalue
> 0);
924 atom
= subre(v
, 'b', BACKR
, lp
, rp
);
925 subno
= v
->nextvalue
;
927 EMPTYARC(lp
, rp
); /* temporarily, so there's something */
932 /* ...and an atom may be followed by a quantifier */
938 qprefer
= (v
->nextvalue
) ? LONGER
: SHORTER
;
944 qprefer
= (v
->nextvalue
) ? LONGER
: SHORTER
;
950 qprefer
= (v
->nextvalue
) ? LONGER
: SHORTER
;
967 /* {m,n} exercises preference, even if it's {m,m} */
968 qprefer
= (v
->nextvalue
) ? LONGER
: SHORTER
;
973 /* {m} passes operand's preference through */
977 { /* catches errors too */
983 default: /* no quantifier */
989 /* annoying special case: {0} or {0,0} cancels everything */
990 if (m
== 0 && n
== 0)
995 v
->subs
[subno
] = NULL
;
996 delsub(v
->nfa
, lp
, rp
);
1001 /* if not a messy case, avoid hard part */
1002 assert(!MESSY(top
->flags
));
1003 f
= top
->flags
| qprefer
| ((atom
!= NULL
) ? atom
->flags
: 0);
1004 if (atomtype
!= '(' && atomtype
!= BACKREF
&& !MESSY(UP(f
)))
1006 if (!(m
== 1 && n
== 1))
1007 repeat(v
, lp
, rp
, m
, n
);
1015 * hard part: something messy That is, capturing parens, back reference,
1016 * short/long clash, or an atom with substructure containing one of those.
1019 /* now we'll need a subre for the contents even if they're boring */
1022 atom
= subre(v
, '=', 0, lp
, rp
);
1027 * prepare a general-purpose state skeleton
1029 * ---> [s] ---prefix---> [begin] ---atom---> [end] ----rest---> [rp] / /
1030 * [lp] ----> [s2] ----bypass---------------------
1032 * where bypass is an empty, and prefix is some repetitions of atom
1034 s
= newstate(v
->nfa
); /* first, new endpoints for the atom */
1035 s2
= newstate(v
->nfa
);
1037 moveouts(v
->nfa
, lp
, s
);
1038 moveins(v
->nfa
, rp
, s2
);
1042 s
= newstate(v
->nfa
); /* and spots for prefix and bypass */
1043 s2
= newstate(v
->nfa
);
1049 /* break remaining subRE into x{...} and what follows */
1050 t
= subre(v
, '.', COMBINE(qprefer
, atom
->flags
), lp
, rp
);
1053 /* here we should recurse... but we must postpone that to the end */
1055 /* split top into prefix and remaining */
1056 assert(top
->op
== '=' && top
->left
== NULL
&& top
->right
== NULL
);
1057 top
->left
= subre(v
, '=', top
->flags
, top
->begin
, lp
);
1061 /* if it's a backref, now is the time to replicate the subNFA */
1062 if (atomtype
== BACKREF
)
1064 assert(atom
->begin
->nouts
== 1); /* just the EMPTY */
1065 delsub(v
->nfa
, atom
->begin
, atom
->end
);
1066 assert(v
->subs
[subno
] != NULL
);
1067 /* and here's why the recursion got postponed: it must */
1068 /* wait until the skeleton is filled in, because it may */
1069 /* hit a backref that wants to copy the filled-in skeleton */
1070 dupnfa(v
->nfa
, v
->subs
[subno
]->begin
, v
->subs
[subno
]->end
,
1071 atom
->begin
, atom
->end
);
1075 /* it's quantifier time; first, turn x{0,...} into x{1,...}|empty */
1078 EMPTYARC(s2
, atom
->end
); /* the bypass */
1079 assert(PREF(qprefer
) != 0);
1080 f
= COMBINE(qprefer
, atom
->flags
);
1081 t
= subre(v
, '|', f
, lp
, atom
->end
);
1084 t
->right
= subre(v
, '|', PREF(f
), s2
, atom
->end
);
1086 t
->right
->left
= subre(v
, '=', 0, s2
, atom
->end
);
1093 /* deal with the rest of the quantifier */
1094 if (atomtype
== BACKREF
)
1096 /* special case: backrefs have internal quantifiers */
1097 EMPTYARC(s
, atom
->begin
); /* empty prefix */
1098 /* just stuff everything into atom */
1099 repeat(v
, atom
->begin
, atom
->end
, m
, n
);
1100 atom
->min
= (short) m
;
1101 atom
->max
= (short) n
;
1102 atom
->flags
|= COMBINE(qprefer
, atom
->flags
);
1104 else if (m
== 1 && n
== 1)
1106 /* no/vacuous quantifier: done */
1107 EMPTYARC(s
, atom
->begin
); /* empty prefix */
1111 /* turn x{m,n} into x{m-1,n-1}x, with capturing */
1112 /* parens in only second x */
1113 dupnfa(v
->nfa
, atom
->begin
, atom
->end
, s
, atom
->begin
);
1114 assert(m
>= 1 && m
!= INFINITY
&& n
>= 1);
1115 repeat(v
, s
, atom
->begin
, m
- 1, (n
== INFINITY
) ? n
: n
- 1);
1116 f
= COMBINE(qprefer
, atom
->flags
);
1117 t
= subre(v
, '.', f
, s
, atom
->end
); /* prefix and atom */
1119 t
->left
= subre(v
, '=', PREF(f
), s
, atom
->begin
);
1125 /* and finally, look after that postponed recursion */
1127 if (!(SEE('|') || SEE(stopper
) || SEE(EOS
)))
1128 t
->right
= parsebranch(v
, stopper
, type
, atom
->end
, rp
, 1);
1131 EMPTYARC(atom
->end
, rp
);
1132 t
->right
= subre(v
, '=', 0, atom
->end
, rp
);
1134 assert(SEE('|') || SEE(stopper
) || SEE(EOS
));
1135 t
->flags
|= COMBINE(t
->flags
, t
->right
->flags
);
1136 top
->flags
|= COMBINE(top
->flags
, t
->flags
);
1140 * nonword - generate arcs for non-word-character ahead or behind
1143 nonword(struct vars
* v
,
1144 int dir
, /* AHEAD or BEHIND */
1148 int anchor
= (dir
== AHEAD
) ? '$' : '^';
1150 assert(dir
== AHEAD
|| dir
== BEHIND
);
1151 newarc(v
->nfa
, anchor
, 1, lp
, rp
);
1152 newarc(v
->nfa
, anchor
, 0, lp
, rp
);
1153 colorcomplement(v
->nfa
, v
->cm
, dir
, v
->wordchrs
, lp
, rp
);
1154 /* (no need for special attention to \n) */
1158 * word - generate arcs for word character ahead or behind
1161 word(struct vars
* v
,
1162 int dir
, /* AHEAD or BEHIND */
1166 assert(dir
== AHEAD
|| dir
== BEHIND
);
1167 cloneouts(v
->nfa
, v
->wordchrs
, lp
, rp
, dir
);
1168 /* (no need for special attention to \n) */
1172 * scannum - scan a number
1174 static int /* value, <= DUPMAX */
1175 scannum(struct vars
* v
)
1179 while (SEE(DIGIT
) && n
< DUPMAX
)
1181 n
= n
* 10 + v
->nextvalue
;
1184 if (SEE(DIGIT
) || n
> DUPMAX
)
1193 * repeat - replicate subNFA for quantifiers
1195 * The duplication sequences used here are chosen carefully so that any
1196 * pointers starting out pointing into the subexpression end up pointing into
1197 * the last occurrence. (Note that it may not be strung between the same
1198 * left and right end states, however!) This used to be important for the
1199 * subRE tree, although the important bits are now handled by the in-line
1200 * code in parse(), and when this is called, it doesn't matter any more.
1203 repeat(struct vars
* v
,
1211 #define PAIR(x, y) ((x)*4 + (y))
1212 #define REDUCE(x) ( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) )
1213 const int rm
= REDUCE(m
);
1214 const int rn
= REDUCE(n
);
1218 switch (PAIR(rm
, rn
))
1220 case PAIR(0, 0): /* empty string */
1221 delsub(v
->nfa
, lp
, rp
);
1224 case PAIR(0, 1): /* do as x| */
1227 case PAIR(0, SOME
): /* do as x{1,n}| */
1228 repeat(v
, lp
, rp
, 1, n
);
1232 case PAIR(0, INF
): /* loop x around */
1233 s
= newstate(v
->nfa
);
1235 moveouts(v
->nfa
, lp
, s
);
1236 moveins(v
->nfa
, rp
, s
);
1240 case PAIR(1, 1): /* no action required */
1242 case PAIR(1, SOME
): /* do as x{0,n-1}x = (x{1,n-1}|)x */
1243 s
= newstate(v
->nfa
);
1245 moveouts(v
->nfa
, lp
, s
);
1246 dupnfa(v
->nfa
, s
, rp
, lp
, s
);
1248 repeat(v
, lp
, s
, 1, n
- 1);
1252 case PAIR(1, INF
): /* add loopback arc */
1253 s
= newstate(v
->nfa
);
1254 s2
= newstate(v
->nfa
);
1256 moveouts(v
->nfa
, lp
, s
);
1257 moveins(v
->nfa
, rp
, s2
);
1262 case PAIR(SOME
, SOME
): /* do as x{m-1,n-1}x */
1263 s
= newstate(v
->nfa
);
1265 moveouts(v
->nfa
, lp
, s
);
1266 dupnfa(v
->nfa
, s
, rp
, lp
, s
);
1268 repeat(v
, lp
, s
, m
- 1, n
- 1);
1270 case PAIR(SOME
, INF
): /* do as x{m-1,}x */
1271 s
= newstate(v
->nfa
);
1273 moveouts(v
->nfa
, lp
, s
);
1274 dupnfa(v
->nfa
, s
, rp
, lp
, s
);
1276 repeat(v
, lp
, s
, m
- 1, n
);
1285 * bracket - handle non-complemented bracket expression
1286 * Also called from cbracket for complemented bracket expressions.
1289 bracket(struct vars
* v
,
1295 while (!SEE(']') && !SEE(EOS
))
1296 brackpart(v
, lp
, rp
);
1297 assert(SEE(']') || ISERR());
1298 okcolors(v
->nfa
, v
->cm
);
1302 * cbracket - handle complemented bracket expression
1303 * We do it by calling bracket() with dummy endpoints, and then complementing
1304 * the result. The alternative would be to invoke rainbow(), and then delete
1305 * arcs as the b.e. is seen... but that gets messy.
1308 cbracket(struct vars
* v
,
1312 struct state
*left
= newstate(v
->nfa
);
1313 struct state
*right
= newstate(v
->nfa
);
1316 bracket(v
, left
, right
);
1317 if (v
->cflags
& REG_NLSTOP
)
1318 newarc(v
->nfa
, PLAIN
, v
->nlcolor
, left
, right
);
1321 assert(lp
->nouts
== 0); /* all outarcs will be ours */
1324 * Easy part of complementing, and all there is to do since the MCCE code
1327 colorcomplement(v
->nfa
, v
->cm
, PLAIN
, left
, lp
, rp
);
1329 dropstate(v
->nfa
, left
);
1330 assert(right
->nins
== 0);
1331 freestate(v
->nfa
, right
);
1335 * brackpart - handle one item (or range) within a bracket expression
1338 brackpart(struct vars
* v
,
1349 /* parse something, get rid of special cases, take shortcuts */
1350 switch (v
->nexttype
)
1352 case RANGE
: /* a-b-c or other botch */
1357 c
[0] = v
->nextvalue
;
1359 /* shortcut for ordinary chr (not range) */
1362 onechr(v
, c
[0], lp
, rp
);
1365 startc
= element(v
, c
, c
+ 1);
1370 endp
= scanplain(v
);
1371 INSIST(startp
< endp
, REG_ECOLLATE
);
1373 startc
= element(v
, startp
, endp
);
1378 endp
= scanplain(v
);
1379 INSIST(startp
< endp
, REG_ECOLLATE
);
1381 startc
= element(v
, startp
, endp
);
1383 cv
= eclass(v
, startc
, (v
->cflags
& REG_ICASE
));
1385 dovec(v
, cv
, lp
, rp
);
1390 endp
= scanplain(v
);
1391 INSIST(startp
< endp
, REG_ECTYPE
);
1393 cv
= cclass(v
, startp
, endp
, (v
->cflags
& REG_ICASE
));
1395 dovec(v
, cv
, lp
, rp
);
1407 switch (v
->nexttype
)
1411 c
[0] = v
->nextvalue
;
1413 endc
= element(v
, c
, c
+ 1);
1418 endp
= scanplain(v
);
1419 INSIST(startp
< endp
, REG_ECOLLATE
);
1421 endc
= element(v
, startp
, endp
);
1434 * Ranges are unportable. Actually, standard C does guarantee that digits
1435 * are contiguous, but making that an exception is just too complicated.
1439 cv
= range(v
, startc
, endc
, (v
->cflags
& REG_ICASE
));
1441 dovec(v
, cv
, lp
, rp
);
1445 * scanplain - scan PLAIN contents of [. etc.
1447 * Certain bits of trickery in lex.c know that this code does not try
1448 * to look past the final bracket of the [. etc.
1450 static const chr
* /* just after end of sequence */
1451 scanplain(struct vars
* v
)
1455 assert(SEE(COLLEL
) || SEE(ECLASS
) || SEE(CCLASS
));
1465 assert(SEE(END
) || ISERR());
1472 * onechr - fill in arcs for a plain character, and possible case complements
1473 * This is mostly a shortcut for efficient handling of the common case.
1476 onechr(struct vars
* v
,
1481 if (!(v
->cflags
& REG_ICASE
))
1483 newarc(v
->nfa
, PLAIN
, subcolor(v
->cm
, c
), lp
, rp
);
1487 /* rats, need general case anyway... */
1488 dovec(v
, allcases(v
, c
), lp
, rp
);
1492 * dovec - fill in arcs for each element of a cvec
1495 dovec(struct vars
* v
,
1506 /* ordinary characters */
1507 for (p
= cv
->chrs
, i
= cv
->nchrs
; i
> 0; p
++, i
--)
1510 newarc(v
->nfa
, PLAIN
, subcolor(v
->cm
, ch
), lp
, rp
);
1513 /* and the ranges */
1514 for (p
= cv
->ranges
, i
= cv
->nranges
; i
> 0; p
+= 2, i
--)
1519 subrange(v
, from
, to
, lp
, rp
);
1524 * wordchrs - set up word-chr list for word-boundary stuff, if needed
1526 * The list is kept as a bunch of arcs between two dummy states; it's
1527 * disposed of by the unreachable-states sweep in NFA optimization.
1528 * Does NEXT(). Must not be called from any unusual lexical context.
1529 * This should be reconciled with the \w etc. handling in lex.c, and
1530 * should be cleaned up to reduce dependencies on input scanning.
1533 wordchrs(struct vars
* v
)
1536 struct state
*right
;
1538 if (v
->wordchrs
!= NULL
)
1540 NEXT(); /* for consistency */
1544 left
= newstate(v
->nfa
);
1545 right
= newstate(v
->nfa
);
1547 /* fine point: implemented with [::], and lexer will set REG_ULOCALE */
1550 assert(v
->savenow
!= NULL
&& SEE('['));
1551 bracket(v
, left
, right
);
1552 assert((v
->savenow
!= NULL
&& SEE(']')) || ISERR());
1559 * subre - allocate a subre
1561 static struct subre
*
1562 subre(struct vars
* v
,
1565 struct state
* begin
,
1568 struct subre
*ret
= v
->treefree
;
1571 v
->treefree
= ret
->left
;
1574 ret
= (struct subre
*) MALLOC(sizeof(struct subre
));
1580 ret
->chain
= v
->treechain
;
1584 assert(strchr("|.b(=", op
) != NULL
);
1590 ret
->min
= ret
->max
= 1;
1601 * freesubre - free a subRE subtree
1604 freesubre(struct vars
* v
, /* might be NULL */
1610 if (sr
->left
!= NULL
)
1611 freesubre(v
, sr
->left
);
1612 if (sr
->right
!= NULL
)
1613 freesubre(v
, sr
->right
);
1619 * freesrnode - free one node in a subRE subtree
1622 freesrnode(struct vars
* v
, /* might be NULL */
1628 if (!NULLCNFA(sr
->cnfa
))
1629 freecnfa(&sr
->cnfa
);
1634 sr
->left
= v
->treefree
;
1642 * optst - optimize a subRE subtree
1645 optst(struct vars
* v
,
1649 * DGP (2007-11-13): I assume it was the programmer's intent to eventually
1650 * come back and add code to optimize subRE trees, but the routine coded
1651 * just spends effort traversing the tree and doing nothing. We can do
1652 * nothing with less effort.
1658 * numst - number tree nodes (assigning retry indexes)
1660 static int /* next number */
1661 numst(struct subre
* t
,
1662 int start
) /* starting point for subtree numbers */
1669 t
->retry
= (short) i
++;
1670 if (t
->left
!= NULL
)
1671 i
= numst(t
->left
, i
);
1672 if (t
->right
!= NULL
)
1673 i
= numst(t
->right
, i
);
1678 * markst - mark tree nodes as INUSE
1681 markst(struct subre
* t
)
1686 if (t
->left
!= NULL
)
1688 if (t
->right
!= NULL
)
1693 * cleanst - free any tree nodes not marked INUSE
1696 cleanst(struct vars
* v
)
1701 for (t
= v
->treechain
; t
!= NULL
; t
= next
)
1704 if (!(t
->flags
& INUSE
))
1707 v
->treechain
= NULL
;
1708 v
->treefree
= NULL
; /* just on general principles */
1712 * nfatree - turn a subRE subtree into a tree of compacted NFAs
1714 static long /* optimize results from top node */
1715 nfatree(struct vars
* v
,
1717 FILE *f
) /* for debug output */
1719 assert(t
!= NULL
&& t
->begin
!= NULL
);
1721 if (t
->left
!= NULL
)
1722 (DISCARD
) nfatree(v
, t
->left
, f
);
1723 if (t
->right
!= NULL
)
1724 (DISCARD
) nfatree(v
, t
->right
, f
);
1726 return nfanode(v
, t
, f
);
1730 * nfanode - do one NFA for nfatree
1732 static long /* optimize results */
1733 nfanode(struct vars
* v
,
1735 FILE *f
) /* for debug output */
1740 assert(t
->begin
!= NULL
);
1747 fprintf(f
, "\n\n\n========= TREE NODE %s ==========\n",
1748 stid(t
, idbuf
, sizeof(idbuf
)));
1751 nfa
= newnfa(v
, v
->cm
, v
->nfa
);
1753 dupnfa(nfa
, t
->begin
, t
->end
, nfa
->init
, nfa
->final
);
1757 ret
= optimize(nfa
, f
);
1760 compact(nfa
, &t
->cnfa
);
1767 * newlacon - allocate a lookahead-constraint subRE
1769 static int /* lacon number */
1770 newlacon(struct vars
* v
,
1771 struct state
* begin
,
1778 if (v
->nlacons
== 0)
1780 v
->lacons
= (struct subre
*) MALLOC(2 * sizeof(struct subre
));
1781 n
= 1; /* skip 0th */
1786 v
->lacons
= (struct subre
*) REALLOC(v
->lacons
,
1787 (v
->nlacons
+ 1) * sizeof(struct subre
));
1790 if (v
->lacons
== NULL
)
1795 sub
= &v
->lacons
[n
];
1804 * freelacons - free lookahead-constraint subRE vector
1807 freelacons(struct subre
* subs
,
1814 for (sub
= subs
+ 1, i
= n
- 1; i
> 0; sub
++, i
--) /* no 0th */
1815 if (!NULLCNFA(sub
->cnfa
))
1816 freecnfa(&sub
->cnfa
);
1821 * rfree - free a whole RE (insides of regfree)
1828 if (re
== NULL
|| re
->re_magic
!= REMAGIC
)
1831 re
->re_magic
= 0; /* invalidate RE */
1832 g
= (struct guts
*) re
->re_guts
;
1837 if (g
->tree
!= NULL
)
1838 freesubre((struct vars
*) NULL
, g
->tree
);
1839 if (g
->lacons
!= NULL
)
1840 freelacons(g
->lacons
, g
->nlacons
);
1841 if (!NULLCNFA(g
->search
))
1842 freecnfa(&g
->search
);
1849 * dump - dump an RE in human-readable form
1858 if (re
->re_magic
!= REMAGIC
)
1859 fprintf(f
, "bad magic number (0x%x not 0x%x)\n", re
->re_magic
,
1861 if (re
->re_guts
== NULL
)
1863 fprintf(f
, "NULL guts!!!\n");
1866 g
= (struct guts
*) re
->re_guts
;
1867 if (g
->magic
!= GUTSMAGIC
)
1868 fprintf(f
, "bad guts magic number (0x%x not 0x%x)\n", g
->magic
,
1871 fprintf(f
, "\n\n\n========= DUMP ==========\n");
1872 fprintf(f
, "nsub %d, info 0%lo, csize %d, ntree %d\n",
1873 (int) re
->re_nsub
, re
->re_info
, re
->re_csize
, g
->ntree
);
1875 dumpcolors(&g
->cmap
, f
);
1876 if (!NULLCNFA(g
->search
))
1878 printf("\nsearch:\n");
1879 dumpcnfa(&g
->search
, f
);
1881 for (i
= 1; i
< g
->nlacons
; i
++)
1883 fprintf(f
, "\nla%d (%s):\n", i
,
1884 (g
->lacons
[i
].subno
) ? "positive" : "negative");
1885 dumpcnfa(&g
->lacons
[i
].cnfa
, f
);
1888 dumpst(g
->tree
, f
, 0);
1892 * dumpst - dump a subRE tree
1895 dumpst(struct subre
* t
,
1897 int nfapresent
) /* is the original NFA still around? */
1900 fprintf(f
, "null tree\n");
1902 stdump(t
, f
, nfapresent
);
1907 * stdump - recursive guts of dumpst
1910 stdump(struct subre
* t
,
1912 int nfapresent
) /* is the original NFA still around? */
1916 fprintf(f
, "%s. `%c'", stid(t
, idbuf
, sizeof(idbuf
)), t
->op
);
1917 if (t
->flags
& LONGER
)
1918 fprintf(f
, " longest");
1919 if (t
->flags
& SHORTER
)
1920 fprintf(f
, " shortest");
1921 if (t
->flags
& MIXED
)
1922 fprintf(f
, " hasmixed");
1924 fprintf(f
, " hascapture");
1925 if (t
->flags
& BACKR
)
1926 fprintf(f
, " hasbackref");
1927 if (!(t
->flags
& INUSE
))
1928 fprintf(f
, " UNUSED");
1930 fprintf(f
, " (#%d)", t
->subno
);
1931 if (t
->min
!= 1 || t
->max
!= 1)
1933 fprintf(f
, " {%d,", t
->min
);
1934 if (t
->max
!= INFINITY
)
1935 fprintf(f
, "%d", t
->max
);
1939 fprintf(f
, " %ld-%ld", (long) t
->begin
->no
, (long) t
->end
->no
);
1940 if (t
->left
!= NULL
)
1941 fprintf(f
, " L:%s", stid(t
->left
, idbuf
, sizeof(idbuf
)));
1942 if (t
->right
!= NULL
)
1943 fprintf(f
, " R:%s", stid(t
->right
, idbuf
, sizeof(idbuf
)));
1944 if (!NULLCNFA(t
->cnfa
))
1947 dumpcnfa(&t
->cnfa
, f
);
1950 if (t
->left
!= NULL
)
1951 stdump(t
->left
, f
, nfapresent
);
1952 if (t
->right
!= NULL
)
1953 stdump(t
->right
, f
, nfapresent
);
1957 * stid - identify a subtree node for dumping
1959 static const char * /* points to buf or constant string */
1960 stid(struct subre
* t
,
1964 /* big enough for hex int or decimal t->retry? */
1965 if (bufsize
< sizeof(void *) * 2 + 3 || bufsize
< sizeof(t
->retry
) * 3 + 1)
1968 sprintf(buf
, "%d", t
->retry
);
1970 sprintf(buf
, "%p", t
);
1973 #endif /* REG_DEBUG */
1976 #include "regc_lex.c"
1977 #include "regc_color.c"
1978 #include "regc_nfa.c"
1979 #include "regc_cvec.c"
1980 #include "regc_locale.c"