4 * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
5 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
7 * This file is part of the device-mapper userspace tools.
9 * This copyrighted material is made available to anyone wishing to use,
10 * modify, copy, or redistribute it subject to the terms and conditions
11 * of the GNU Lesser General Public License v.2.1.
13 * You should have received a copy of the GNU Lesser General Public License
14 * along with this program; if not, write to the Free Software Foundation,
15 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 struct parse_sp
{ /* scratch pad for the parsing process */
23 int type
; /* token type, 0 indicates a charset */
24 dm_bitset_t charset
; /* The current charset */
25 const char *cursor
; /* where we are in the regex */
26 const char *rx_end
; /* 1pte for the expression being parsed */
29 static struct rx_node
*_or_term(struct parse_sp
*ps
);
31 static void _single_char(struct parse_sp
*ps
, unsigned int c
, const char *ptr
)
35 dm_bit_clear_all(ps
->charset
);
36 dm_bit_set(ps
->charset
, c
);
40 * Get the next token from the regular expression.
41 * Returns: 1 success, 0 end of input, -1 error.
43 static int _rx_get_token(struct parse_sp
*ps
)
45 int neg
= 0, range
= 0;
47 const char *ptr
= ps
->cursor
;
48 if (ptr
== ps
->rx_end
) { /* end of input ? */
54 /* charsets and ncharsets */
58 dm_bit_set_all(ps
->charset
);
60 /* never transition on zero */
61 dm_bit_clear(ps
->charset
, 0);
66 dm_bit_clear_all(ps
->charset
);
68 while ((ptr
< ps
->rx_end
) && (*ptr
!= ']')) {
70 /* an escaped character */
85 } else if (*ptr
== '-' && lc
) {
86 /* we've got a range on our hands */
89 if (ptr
== ps
->rx_end
) {
90 log_error("Incomplete range"
99 /* add lc - c into the bitset */
106 for (; lc
<= c
; lc
++) {
108 dm_bit_clear(ps
->charset
, lc
);
110 dm_bit_set(ps
->charset
, lc
);
114 /* add c into the bitset */
116 dm_bit_clear(ps
->charset
, c
);
118 dm_bit_set(ps
->charset
, c
);
124 if (ptr
>= ps
->rx_end
) {
130 ps
->cursor
= ptr
+ 1;
133 /* These characters are special, we just return their ASCII
134 codes as the type. Sorted into ascending order to help the
142 ps
->type
= (int) *ptr
;
143 ps
->cursor
= ptr
+ 1;
147 _single_char(ps
, HAT_CHAR
, ptr
);
151 _single_char(ps
, DOLLAR_CHAR
, ptr
);
155 /* The 'all but newline' character set */
157 ps
->cursor
= ptr
+ 1;
158 dm_bit_set_all(ps
->charset
);
159 dm_bit_clear(ps
->charset
, (int) '\n');
160 dm_bit_clear(ps
->charset
, (int) '\r');
161 dm_bit_clear(ps
->charset
, 0);
165 /* escaped character */
167 if (ptr
>= ps
->rx_end
) {
168 log_error("Badly quoted character at end "
175 ps
->cursor
= ptr
+ 1;
176 dm_bit_clear_all(ps
->charset
);
179 dm_bit_set(ps
->charset
, (int) '\n');
182 dm_bit_set(ps
->charset
, (int) '\r');
185 dm_bit_set(ps
->charset
, (int) '\t');
188 dm_bit_set(ps
->charset
, (int) *ptr
);
193 /* add a single character to the bitset */
195 ps
->cursor
= ptr
+ 1;
196 dm_bit_clear_all(ps
->charset
);
197 dm_bit_set(ps
->charset
, (int) *ptr
);
204 static struct rx_node
*_node(struct dm_pool
*mem
, int type
,
205 struct rx_node
*l
, struct rx_node
*r
)
207 struct rx_node
*n
= dm_pool_zalloc(mem
, sizeof(*n
));
210 if (!(n
->charset
= dm_bitset_create(mem
, 256))) {
211 dm_pool_free(mem
, n
);
223 static struct rx_node
*_term(struct parse_sp
*ps
)
229 if (!(n
= _node(ps
->mem
, CHARSET
, NULL
, NULL
))) {
234 dm_bit_copy(n
->charset
, ps
->charset
);
235 _rx_get_token(ps
); /* match charset */
239 _rx_get_token(ps
); /* match '(' */
241 if (ps
->type
!= ')') {
242 log_error("missing ')' in regular expression");
245 _rx_get_token(ps
); /* match ')' */
255 static struct rx_node
*_closure_term(struct parse_sp
*ps
)
257 struct rx_node
*l
, *n
;
259 if (!(l
= _term(ps
)))
265 n
= _node(ps
->mem
, STAR
, l
, NULL
);
269 n
= _node(ps
->mem
, PLUS
, l
, NULL
);
273 n
= _node(ps
->mem
, QUEST
, l
, NULL
);
292 static struct rx_node
*_cat_term(struct parse_sp
*ps
)
294 struct rx_node
*l
, *r
, *n
;
296 if (!(l
= _closure_term(ps
)))
302 if (!(r
= _cat_term(ps
)))
305 if (!(n
= _node(ps
->mem
, CAT
, l
, r
)))
311 static struct rx_node
*_or_term(struct parse_sp
*ps
)
313 struct rx_node
*l
, *r
, *n
;
315 if (!(l
= _cat_term(ps
)))
321 _rx_get_token(ps
); /* match '|' */
323 if (!(r
= _or_term(ps
))) {
324 log_error("Badly formed 'or' expression");
328 if (!(n
= _node(ps
->mem
, OR
, l
, r
)))
334 struct rx_node
*rx_parse_tok(struct dm_pool
*mem
,
335 const char *begin
, const char *end
)
338 struct parse_sp
*ps
= dm_pool_zalloc(mem
, sizeof(*ps
));
346 ps
->charset
= dm_bitset_create(mem
, 256);
349 _rx_get_token(ps
); /* load the first token */
351 if (!(r
= _or_term(ps
))) {
352 log_error("Parse error in regex");
353 dm_pool_free(mem
, ps
);
359 struct rx_node
*rx_parse_str(struct dm_pool
*mem
, const char *str
)
361 return rx_parse_tok(mem
, str
, str
+ strlen(str
));