4 * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
5 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
7 * This file is part of the device-mapper userspace tools.
9 * This copyrighted material is made available to anyone wishing to use,
10 * modify, copy, or redistribute it subject to the terms and conditions
11 * of the GNU Lesser General Public License v.2.1.
13 * You should have received a copy of the GNU Lesser General Public License
14 * along with this program; if not, write to the Free Software Foundation,
15 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 struct dfa_state
*lookup
[256];
31 struct state_queue
*next
;
34 struct dm_regex
{ /* Instance variables for the lexer */
35 struct dfa_state
*start
;
38 struct rx_node
**nodes
;
39 struct dm_pool
*scratch
, *mem
;
42 #define TARGET_TRANS '\0'
44 static int _count_nodes(struct rx_node
*rx
)
49 r
+= _count_nodes(rx
->left
);
52 r
+= _count_nodes(rx
->right
);
57 static void _fill_table(struct dm_regex
*m
, struct rx_node
*rx
)
59 assert((rx
->type
!= OR
) || (rx
->left
&& rx
->right
));
62 _fill_table(m
, rx
->left
);
65 _fill_table(m
, rx
->right
);
67 m
->nodes
[m
->nodes_entered
++] = rx
;
70 static void _create_bitsets(struct dm_regex
*m
)
74 for (i
= 0; i
< m
->num_nodes
; i
++) {
75 struct rx_node
*n
= m
->nodes
[i
];
76 n
->firstpos
= dm_bitset_create(m
->scratch
, m
->num_nodes
);
77 n
->lastpos
= dm_bitset_create(m
->scratch
, m
->num_nodes
);
78 n
->followpos
= dm_bitset_create(m
->scratch
, m
->num_nodes
);
82 static void _calc_functions(struct dm_regex
*m
)
85 struct rx_node
*rx
, *c1
, *c2
;
87 for (i
= 0; i
< m
->num_nodes
; i
++) {
92 if (dm_bit(rx
->charset
, TARGET_TRANS
))
98 dm_bit_union(rx
->firstpos
,
99 c1
->firstpos
, c2
->firstpos
);
101 dm_bit_copy(rx
->firstpos
, c1
->firstpos
);
104 dm_bit_union(rx
->lastpos
,
105 c1
->lastpos
, c2
->lastpos
);
107 dm_bit_copy(rx
->lastpos
, c2
->lastpos
);
109 rx
->nullable
= c1
->nullable
&& c2
->nullable
;
113 dm_bit_copy(rx
->firstpos
, c1
->firstpos
);
114 dm_bit_copy(rx
->lastpos
, c1
->lastpos
);
115 rx
->nullable
= c1
->nullable
;
119 dm_bit_union(rx
->firstpos
, c1
->firstpos
, c2
->firstpos
);
120 dm_bit_union(rx
->lastpos
, c1
->lastpos
, c2
->lastpos
);
121 rx
->nullable
= c1
->nullable
|| c2
->nullable
;
126 dm_bit_copy(rx
->firstpos
, c1
->firstpos
);
127 dm_bit_copy(rx
->lastpos
, c1
->lastpos
);
132 dm_bit_set(rx
->firstpos
, i
);
133 dm_bit_set(rx
->lastpos
, i
);
138 log_error("Internal error: Unknown calc node type");
142 * followpos has it's own switch
143 * because PLUS and STAR do the
148 for (j
= 0; j
< m
->num_nodes
; j
++) {
149 if (dm_bit(c1
->lastpos
, j
)) {
150 struct rx_node
*n
= m
->nodes
[j
];
151 dm_bit_union(n
->followpos
,
152 n
->followpos
, c2
->firstpos
);
159 for (j
= 0; j
< m
->num_nodes
; j
++) {
160 if (dm_bit(rx
->lastpos
, j
)) {
161 struct rx_node
*n
= m
->nodes
[j
];
162 dm_bit_union(n
->followpos
,
163 n
->followpos
, rx
->firstpos
);
171 static struct dfa_state
*_create_dfa_state(struct dm_pool
*mem
)
173 return dm_pool_zalloc(mem
, sizeof(struct dfa_state
));
176 static struct state_queue
*_create_state_queue(struct dm_pool
*mem
,
177 struct dfa_state
*dfa
,
180 struct state_queue
*r
= dm_pool_alloc(mem
, sizeof(*r
));
188 r
->bits
= dm_bitset_create(mem
, bits
[0]); /* first element is the size */
189 dm_bit_copy(r
->bits
, bits
);
194 static int _calc_states(struct dm_regex
*m
, struct rx_node
*rx
)
196 unsigned iwidth
= (m
->num_nodes
/ DM_BITS_PER_INT
) + 1;
197 struct ttree
*tt
= ttree_create(m
->scratch
, iwidth
);
198 struct state_queue
*h
, *t
, *tmp
;
199 struct dfa_state
*dfa
, *ldfa
;
200 int i
, a
, set_bits
= 0, count
= 0;
201 dm_bitset_t bs
, dfa_bits
;
206 if (!(bs
= dm_bitset_create(m
->scratch
, m
->num_nodes
)))
209 /* create first state */
210 dfa
= _create_dfa_state(m
->mem
);
212 ttree_insert(tt
, rx
->firstpos
+ 1, dfa
);
214 /* prime the queue */
215 h
= t
= _create_state_queue(m
->scratch
, dfa
, rx
->firstpos
);
217 /* pop state off front of the queue */
222 /* iterate through all the inputs for this state */
223 dm_bit_clear_all(bs
);
224 for (a
= 0; a
< 256; a
++) {
225 /* iterate through all the states in firstpos */
226 for (i
= dm_bit_get_first(dfa_bits
);
227 i
>= 0; i
= dm_bit_get_next(dfa_bits
, i
)) {
228 if (dm_bit(m
->nodes
[i
]->charset
, a
)) {
229 if (a
== TARGET_TRANS
)
230 dfa
->final
= m
->nodes
[i
]->final
;
233 m
->nodes
[i
]->followpos
);
239 ldfa
= ttree_lookup(tt
, bs
+ 1);
242 ldfa
= _create_dfa_state(m
->mem
);
243 ttree_insert(tt
, bs
+ 1, ldfa
);
245 _create_state_queue(m
->scratch
,
257 dfa
->lookup
[a
] = ldfa
;
259 dm_bit_clear_all(bs
);
264 log_debug("Matcher built with %d dfa states", count
);
268 struct dm_regex
*dm_regex_create(struct dm_pool
*mem
, const char **patterns
,
269 unsigned num_patterns
)
275 struct dm_pool
*scratch
= dm_pool_create("regex matcher", 10 * 1024);
281 if (!(m
= dm_pool_alloc(mem
, sizeof(*m
)))) {
282 dm_pool_destroy(scratch
);
286 memset(m
, 0, sizeof(*m
));
288 /* join the regexps together, delimiting with zero */
289 for (i
= 0; i
< num_patterns
; i
++)
290 len
+= strlen(patterns
[i
]) + 8;
292 ptr
= all
= dm_pool_alloc(scratch
, len
+ 1);
297 for (i
= 0; i
< num_patterns
; i
++) {
298 ptr
+= sprintf(ptr
, "(.*(%s)%c)", patterns
[i
], TARGET_TRANS
);
299 if (i
< (num_patterns
- 1))
303 /* parse this expression */
304 if (!(rx
= rx_parse_tok(scratch
, all
, ptr
))) {
305 log_error("Couldn't parse regex");
310 m
->scratch
= scratch
;
311 m
->num_nodes
= _count_nodes(rx
);
312 m
->nodes
= dm_pool_alloc(scratch
, sizeof(*m
->nodes
) * m
->num_nodes
);
321 dm_pool_destroy(scratch
);
327 dm_pool_destroy(scratch
);
328 dm_pool_free(mem
, m
);
332 static struct dfa_state
*_step_matcher(int c
, struct dfa_state
*cs
, int *r
)
334 if (!(cs
= cs
->lookup
[(unsigned char) c
]))
337 if (cs
->final
&& (cs
->final
> *r
))
343 int dm_regex_match(struct dm_regex
*regex
, const char *s
)
345 struct dfa_state
*cs
= regex
->start
;
348 if (!(cs
= _step_matcher(HAT_CHAR
, cs
, &r
)))
352 if (!(cs
= _step_matcher(*s
, cs
, &r
)))
355 _step_matcher(DOLLAR_CHAR
, cs
, &r
);
358 /* subtract 1 to get back to zero index */