1 /* Extended regular expression matching and search library.
2 Copyright (C) 2002 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 #if defined HAVE_WCHAR_H || defined _LIBC
30 #endif /* HAVE_WCHAR_H || _LIBC */
31 #if defined HAVE_WCTYPE_H || defined _LIBC
33 #endif /* HAVE_WCTYPE_H || _LIBC */
36 # ifndef _RE_DEFINE_LOCALE_FUNCTIONS
37 # define _RE_DEFINE_LOCALE_FUNCTIONS 1
38 # include <locale/localeinfo.h>
39 # include <locale/elem-hash.h>
40 # include <locale/coll-lookup.h>
44 /* This is for other GNU distributions with internationalized messages. */
45 #if HAVE_LIBINTL_H || defined _LIBC
49 # define gettext(msgid) \
50 INTUSE(__dcgettext) (_libc_intl_domainname_internal, msgid, LC_MESSAGES)
53 # define gettext(msgid) (msgid)
57 /* This define is so xgettext can find the internationalizable
59 # define gettext_noop(String) String
62 #include "_regex.h" /* gnupg */
63 #include "regex_internal.h"
65 static void re_string_construct_common (const char *str
, int len
,
67 RE_TRANSLATE_TYPE trans
, int icase
);
69 static int re_string_skip_chars (re_string_t
*pstr
, int new_raw_idx
);
70 #endif /* RE_ENABLE_I18N */
71 static re_dfastate_t
*create_newstate_common (re_dfa_t
*dfa
,
72 const re_node_set
*nodes
,
74 static reg_errcode_t
register_state (re_dfa_t
*dfa
, re_dfastate_t
*newstate
,
76 static re_dfastate_t
*create_ci_newstate (re_dfa_t
*dfa
,
77 const re_node_set
*nodes
,
79 static re_dfastate_t
*create_cd_newstate (re_dfa_t
*dfa
,
80 const re_node_set
*nodes
,
83 static unsigned int inline calc_state_hash (const re_node_set
*nodes
,
84 unsigned int context
);
86 /* Functions for string operation. */
88 /* This function allocate the buffers. It is necessary to call
89 re_string_reconstruct before using the object. */
92 re_string_allocate (pstr
, str
, len
, init_len
, trans
, icase
)
95 int len
, init_len
, icase
;
96 RE_TRANSLATE_TYPE trans
;
99 int init_buf_len
= (len
+ 1 < init_len
) ? len
+ 1: init_len
;
100 re_string_construct_common (str
, len
, pstr
, trans
, icase
);
101 pstr
->stop
= pstr
->len
;
103 ret
= re_string_realloc_buffers (pstr
, init_buf_len
);
104 if (BE (ret
!= REG_NOERROR
, 0))
107 pstr
->mbs_case
= (MBS_CASE_ALLOCATED (pstr
) ? pstr
->mbs_case
108 : (unsigned char *) str
);
109 pstr
->mbs
= MBS_ALLOCATED (pstr
) ? pstr
->mbs
: pstr
->mbs_case
;
110 pstr
->valid_len
= (MBS_CASE_ALLOCATED (pstr
) || MBS_ALLOCATED (pstr
)
111 || MB_CUR_MAX
> 1) ? pstr
->valid_len
: len
;
115 /* This function allocate the buffers, and initialize them. */
118 re_string_construct (pstr
, str
, len
, trans
, icase
)
122 RE_TRANSLATE_TYPE trans
;
125 re_string_construct_common (str
, len
, pstr
, trans
, icase
);
126 pstr
->stop
= pstr
->len
;
127 /* Set 0 so that this function can initialize whole buffers. */
132 ret
= re_string_realloc_buffers (pstr
, len
+ 1);
133 if (BE (ret
!= REG_NOERROR
, 0))
136 pstr
->mbs_case
= (MBS_CASE_ALLOCATED (pstr
) ? pstr
->mbs_case
137 : (unsigned char *) str
);
138 pstr
->mbs
= MBS_ALLOCATED (pstr
) ? pstr
->mbs
: pstr
->mbs_case
;
142 #ifdef RE_ENABLE_I18N
144 build_wcs_upper_buffer (pstr
);
146 #endif /* RE_ENABLE_I18N */
147 build_upper_buffer (pstr
);
151 #ifdef RE_ENABLE_I18N
153 build_wcs_buffer (pstr
);
155 #endif /* RE_ENABLE_I18N */
158 re_string_translate_buffer (pstr
);
160 pstr
->valid_len
= len
;
164 /* Initialized whole buffers, then valid_len == bufs_len. */
165 pstr
->valid_len
= pstr
->bufs_len
;
169 /* Helper functions for re_string_allocate, and re_string_construct. */
172 re_string_realloc_buffers (pstr
, new_buf_len
)
176 #ifdef RE_ENABLE_I18N
179 pstr
->wcs
= re_realloc (pstr
->wcs
, wint_t, new_buf_len
);
180 if (BE (pstr
->wcs
== NULL
, 0))
183 #endif /* RE_ENABLE_I18N */
184 if (MBS_ALLOCATED (pstr
))
186 pstr
->mbs
= re_realloc (pstr
->mbs
, unsigned char, new_buf_len
);
187 if (BE (pstr
->mbs
== NULL
, 0))
190 if (MBS_CASE_ALLOCATED (pstr
))
192 pstr
->mbs_case
= re_realloc (pstr
->mbs_case
, unsigned char, new_buf_len
);
193 if (BE (pstr
->mbs_case
== NULL
, 0))
195 if (!MBS_ALLOCATED (pstr
))
196 pstr
->mbs
= pstr
->mbs_case
;
198 pstr
->bufs_len
= new_buf_len
;
204 re_string_construct_common (str
, len
, pstr
, trans
, icase
)
208 RE_TRANSLATE_TYPE trans
;
211 memset (pstr
, '\0', sizeof (re_string_t
));
212 pstr
->raw_mbs
= (const unsigned char *) str
;
215 pstr
->icase
= icase
? 1 : 0;
218 #ifdef RE_ENABLE_I18N
220 /* Build wide character buffer PSTR->WCS.
221 If the byte sequence of the string are:
222 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
223 Then wide character buffer will be:
224 <wc1> , WEOF , <wc2> , WEOF , <wc3>
225 We use WEOF for padding, they indicate that the position isn't
226 a first byte of a multibyte character.
228 Note that this function assumes PSTR->VALID_LEN elements are already
229 built and starts from PSTR->VALID_LEN. */
232 build_wcs_buffer (pstr
)
236 int byte_idx
, end_idx
, mbclen
, remain_len
;
237 /* Build the buffers from pstr->valid_len to either pstr->len or
239 end_idx
= (pstr
->bufs_len
> pstr
->len
)? pstr
->len
: pstr
->bufs_len
;
240 for (byte_idx
= pstr
->valid_len
; byte_idx
< end_idx
;)
243 remain_len
= end_idx
- byte_idx
;
244 prev_st
= pstr
->cur_state
;
245 mbclen
= mbrtowc (&wc
, ((const char *) pstr
->raw_mbs
+ pstr
->raw_mbs_idx
246 + byte_idx
), remain_len
, &pstr
->cur_state
);
247 if (BE (mbclen
== (size_t) -2, 0))
249 /* The buffer doesn't have enough space, finish to build. */
250 pstr
->cur_state
= prev_st
;
253 else if (BE (mbclen
== (size_t) -1 || mbclen
== 0, 0))
255 /* We treat these cases as a singlebyte character. */
257 wc
= (wchar_t) pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
];
258 pstr
->cur_state
= prev_st
;
261 /* Apply the translateion if we need. */
262 if (pstr
->trans
!= NULL
&& mbclen
== 1)
264 int ch
= pstr
->trans
[pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
]];
265 pstr
->mbs_case
[byte_idx
] = ch
;
267 /* Write wide character and padding. */
268 pstr
->wcs
[byte_idx
++] = wc
;
269 /* Write paddings. */
270 for (remain_len
= byte_idx
+ mbclen
- 1; byte_idx
< remain_len
;)
271 pstr
->wcs
[byte_idx
++] = WEOF
;
273 pstr
->valid_len
= byte_idx
;
276 /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
277 but for REG_ICASE. */
280 build_wcs_upper_buffer (pstr
)
284 int byte_idx
, end_idx
, mbclen
, remain_len
;
285 /* Build the buffers from pstr->valid_len to either pstr->len or
287 end_idx
= (pstr
->bufs_len
> pstr
->len
)? pstr
->len
: pstr
->bufs_len
;
288 for (byte_idx
= pstr
->valid_len
; byte_idx
< end_idx
;)
291 remain_len
= end_idx
- byte_idx
;
292 prev_st
= pstr
->cur_state
;
293 mbclen
= mbrtowc (&wc
, ((const char *) pstr
->raw_mbs
+ pstr
->raw_mbs_idx
294 + byte_idx
), remain_len
, &pstr
->cur_state
);
295 if (BE (mbclen
== (size_t) -2, 0))
297 /* The buffer doesn't have enough space, finish to build. */
298 pstr
->cur_state
= prev_st
;
301 else if (mbclen
== 1 || mbclen
== (size_t) -1 || mbclen
== 0)
303 /* In case of a singlebyte character. */
304 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
];
305 /* Apply the translateion if we need. */
306 if (pstr
->trans
!= NULL
&& mbclen
== 1)
308 ch
= pstr
->trans
[ch
];
309 pstr
->mbs_case
[byte_idx
] = ch
;
311 pstr
->wcs
[byte_idx
] = iswlower (wc
) ? toupper (wc
) : wc
;
312 pstr
->mbs
[byte_idx
++] = islower (ch
) ? toupper (ch
) : ch
;
313 if (BE (mbclen
== (size_t) -1, 0))
314 pstr
->cur_state
= prev_st
;
316 else /* mbclen > 1 */
319 wcrtomb ((char *) pstr
->mbs
+ byte_idx
, towupper (wc
), &prev_st
);
321 memcpy (pstr
->mbs
+ byte_idx
,
322 pstr
->raw_mbs
+ pstr
->raw_mbs_idx
+ byte_idx
, mbclen
);
323 pstr
->wcs
[byte_idx
++] = iswlower (wc
) ? toupper (wc
) : wc
;
324 /* Write paddings. */
325 for (remain_len
= byte_idx
+ mbclen
- 1; byte_idx
< remain_len
;)
326 pstr
->wcs
[byte_idx
++] = WEOF
;
329 pstr
->valid_len
= byte_idx
;
332 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
336 re_string_skip_chars (pstr
, new_raw_idx
)
341 int rawbuf_idx
, mbclen
;
343 /* Skip the characters which are not necessary to check. */
344 for (rawbuf_idx
= pstr
->raw_mbs_idx
+ pstr
->valid_len
;
345 rawbuf_idx
< new_raw_idx
;)
347 int remain_len
= pstr
->len
- rawbuf_idx
;
348 prev_st
= pstr
->cur_state
;
349 mbclen
= mbrlen ((const char *) pstr
->raw_mbs
+ rawbuf_idx
, remain_len
,
351 if (BE (mbclen
== (size_t) -2 || mbclen
== (size_t) -1 || mbclen
== 0, 0))
353 /* We treat these cases as a singlebyte character. */
355 pstr
->cur_state
= prev_st
;
357 /* Then proceed the next character. */
358 rawbuf_idx
+= mbclen
;
362 #endif /* RE_ENABLE_I18N */
364 /* Build the buffer PSTR->MBS, and apply the translation if we need.
365 This function is used in case of REG_ICASE. */
368 build_upper_buffer (pstr
)
371 int char_idx
, end_idx
;
372 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
374 for (char_idx
= pstr
->valid_len
; char_idx
< end_idx
; ++char_idx
)
376 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ char_idx
];
377 if (pstr
->trans
!= NULL
)
379 ch
= pstr
->trans
[ch
];
380 pstr
->mbs_case
[char_idx
] = ch
;
383 pstr
->mbs
[char_idx
] = toupper (ch
);
385 pstr
->mbs
[char_idx
] = ch
;
387 pstr
->valid_len
= char_idx
;
390 /* Apply TRANS to the buffer in PSTR. */
393 re_string_translate_buffer (pstr
)
396 int buf_idx
, end_idx
;
397 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
399 for (buf_idx
= pstr
->valid_len
; buf_idx
< end_idx
; ++buf_idx
)
401 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ buf_idx
];
402 pstr
->mbs_case
[buf_idx
] = pstr
->trans
[ch
];
405 pstr
->valid_len
= buf_idx
;
408 /* This function re-construct the buffers.
409 Concretely, convert to wide character in case of MB_CUR_MAX > 1,
410 convert to upper case in case of REG_ICASE, apply translation. */
413 re_string_reconstruct (pstr
, idx
, eflags
, newline
)
415 int idx
, eflags
, newline
;
417 int offset
= idx
- pstr
->raw_mbs_idx
;
421 #ifdef RE_ENABLE_I18N
423 memset (&pstr
->cur_state
, '\0', sizeof (mbstate_t));
424 #endif /* RE_ENABLE_I18N */
425 pstr
->len
+= pstr
->raw_mbs_idx
;
426 pstr
->stop
+= pstr
->raw_mbs_idx
;
427 pstr
->valid_len
= pstr
->raw_mbs_idx
= 0;
428 pstr
->tip_context
= ((eflags
& REG_NOTBOL
) ? CONTEXT_BEGBUF
429 : CONTEXT_NEWLINE
| CONTEXT_BEGBUF
);
430 if (!MBS_CASE_ALLOCATED (pstr
))
431 pstr
->mbs_case
= (unsigned char *) pstr
->raw_mbs
;
432 if (!MBS_ALLOCATED (pstr
) && !MBS_CASE_ALLOCATED (pstr
))
433 pstr
->mbs
= (unsigned char *) pstr
->raw_mbs
;
439 pstr
->tip_context
= re_string_context_at (pstr
, offset
- 1, eflags
,
441 /* Are the characters which are already checked remain? */
442 if (offset
< pstr
->valid_len
)
444 /* Yes, move them to the front of the buffer. */
445 #ifdef RE_ENABLE_I18N
447 memmove (pstr
->wcs
, pstr
->wcs
+ offset
,
448 (pstr
->valid_len
- offset
) * sizeof (wint_t));
449 #endif /* RE_ENABLE_I18N */
450 if (MBS_ALLOCATED (pstr
))
451 memmove (pstr
->mbs
, pstr
->mbs
+ offset
,
452 pstr
->valid_len
- offset
);
453 if (MBS_CASE_ALLOCATED (pstr
))
454 memmove (pstr
->mbs_case
, pstr
->mbs_case
+ offset
,
455 pstr
->valid_len
- offset
);
456 pstr
->valid_len
-= offset
;
458 assert (pstr
->valid_len
> 0);
463 /* No, skip all characters until IDX. */
465 #ifdef RE_ENABLE_I18N
469 pstr
->valid_len
= re_string_skip_chars (pstr
, idx
) - idx
;
470 for (wcs_idx
= 0; wcs_idx
< pstr
->valid_len
; ++wcs_idx
)
471 pstr
->wcs
[wcs_idx
] = WEOF
;
473 #endif /* RE_ENABLE_I18N */
475 if (!MBS_CASE_ALLOCATED (pstr
))
477 pstr
->mbs_case
+= offset
;
478 /* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED. */
479 if (!MBS_ALLOCATED (pstr
))
483 pstr
->raw_mbs_idx
= idx
;
485 pstr
->stop
-= offset
;
487 /* Then build the buffers. */
488 #ifdef RE_ENABLE_I18N
492 build_wcs_upper_buffer (pstr
);
494 build_wcs_buffer (pstr
);
497 #endif /* RE_ENABLE_I18N */
500 build_upper_buffer (pstr
);
501 else if (pstr
->trans
!= NULL
)
502 re_string_translate_buffer (pstr
);
510 re_string_destruct (pstr
)
513 #ifdef RE_ENABLE_I18N
515 #endif /* RE_ENABLE_I18N */
516 if (MBS_ALLOCATED (pstr
))
518 if (MBS_CASE_ALLOCATED (pstr
))
519 re_free (pstr
->mbs_case
);
522 /* Return the context at IDX in INPUT. */
525 re_string_context_at (input
, idx
, eflags
, newline_anchor
)
526 const re_string_t
*input
;
527 int idx
, eflags
, newline_anchor
;
530 if (idx
< 0 || idx
== input
->len
)
533 /* In this case, we use the value stored in input->tip_context,
534 since we can't know the character in input->mbs[-1] here. */
535 return input
->tip_context
;
536 else /* (idx == input->len) */
537 return ((eflags
& REG_NOTEOL
) ? CONTEXT_ENDBUF
538 : CONTEXT_NEWLINE
| CONTEXT_ENDBUF
);
540 c
= re_string_byte_at (input
, idx
);
541 if (IS_WORD_CHAR (c
))
543 return (newline_anchor
&& IS_NEWLINE (c
)) ? CONTEXT_NEWLINE
: 0;
546 /* Functions for set operation. */
549 re_node_set_alloc (set
, size
)
555 set
->elems
= re_malloc (int, size
);
556 if (BE (set
->elems
== NULL
, 0))
562 re_node_set_init_1 (set
, elem
)
568 set
->elems
= re_malloc (int, 1);
569 if (BE (set
->elems
== NULL
, 0))
571 set
->elems
[0] = elem
;
576 re_node_set_init_2 (set
, elem1
, elem2
)
581 set
->elems
= re_malloc (int, 2);
582 if (BE (set
->elems
== NULL
, 0))
587 set
->elems
[0] = elem1
;
594 set
->elems
[0] = elem1
;
595 set
->elems
[1] = elem2
;
599 set
->elems
[0] = elem2
;
600 set
->elems
[1] = elem1
;
607 re_node_set_init_copy (dest
, src
)
609 const re_node_set
*src
;
611 dest
->nelem
= src
->nelem
;
614 dest
->alloc
= dest
->nelem
;
615 dest
->elems
= re_malloc (int, dest
->alloc
);
616 if (BE (dest
->elems
== NULL
, 0))
618 memcpy (dest
->elems
, src
->elems
, src
->nelem
* sizeof (int));
621 re_node_set_init_empty (dest
);
625 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
626 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
627 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
630 re_node_set_add_intersect (dest
, src1
, src2
)
632 const re_node_set
*src1
, *src2
;
635 if (src1
->nelem
> 0 && src2
->nelem
> 0)
637 if (src1
->nelem
+ src2
->nelem
+ dest
->nelem
> dest
->alloc
)
639 dest
->alloc
= src1
->nelem
+ src2
->nelem
+ dest
->nelem
;
640 dest
->elems
= re_realloc (dest
->elems
, int, dest
->alloc
);
641 if (BE (dest
->elems
== NULL
, 0))
648 for (i1
= i2
= id
= 0 ; i1
< src1
->nelem
&& i2
< src2
->nelem
;)
650 if (src1
->elems
[i1
] > src2
->elems
[i2
])
655 if (src1
->elems
[i1
] == src2
->elems
[i2
])
657 while (id
< dest
->nelem
&& dest
->elems
[id
] < src2
->elems
[i2
])
659 if (id
< dest
->nelem
&& dest
->elems
[id
] == src2
->elems
[i2
])
663 memmove (dest
->elems
+ id
+ 1, dest
->elems
+ id
,
664 sizeof (int) * (dest
->nelem
- id
));
665 dest
->elems
[id
++] = src2
->elems
[i2
++];
674 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
675 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
678 re_node_set_init_union (dest
, src1
, src2
)
680 const re_node_set
*src1
, *src2
;
683 if (src1
!= NULL
&& src1
->nelem
> 0 && src2
!= NULL
&& src2
->nelem
> 0)
685 dest
->alloc
= src1
->nelem
+ src2
->nelem
;
686 dest
->elems
= re_malloc (int, dest
->alloc
);
687 if (BE (dest
->elems
== NULL
, 0))
692 if (src1
!= NULL
&& src1
->nelem
> 0)
693 return re_node_set_init_copy (dest
, src1
);
694 else if (src2
!= NULL
&& src2
->nelem
> 0)
695 return re_node_set_init_copy (dest
, src2
);
697 re_node_set_init_empty (dest
);
700 for (i1
= i2
= id
= 0 ; i1
< src1
->nelem
&& i2
< src2
->nelem
;)
702 if (src1
->elems
[i1
] > src2
->elems
[i2
])
704 dest
->elems
[id
++] = src2
->elems
[i2
++];
707 if (src1
->elems
[i1
] == src2
->elems
[i2
])
709 dest
->elems
[id
++] = src1
->elems
[i1
++];
711 if (i1
< src1
->nelem
)
713 memcpy (dest
->elems
+ id
, src1
->elems
+ i1
,
714 (src1
->nelem
- i1
) * sizeof (int));
715 id
+= src1
->nelem
- i1
;
717 else if (i2
< src2
->nelem
)
719 memcpy (dest
->elems
+ id
, src2
->elems
+ i2
,
720 (src2
->nelem
- i2
) * sizeof (int));
721 id
+= src2
->nelem
- i2
;
727 /* Calculate the union set of the sets DEST and SRC. And store it to
728 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
731 re_node_set_merge (dest
, src
)
733 const re_node_set
*src
;
736 if (src
== NULL
|| src
->nelem
== 0)
738 if (dest
->alloc
< src
->nelem
+ dest
->nelem
)
740 dest
->alloc
= 2 * (src
->nelem
+ dest
->alloc
);
741 dest
->elems
= re_realloc (dest
->elems
, int, dest
->alloc
);
742 if (BE (dest
->elems
== NULL
, 0))
746 for (si
= 0, di
= 0 ; si
< src
->nelem
&& di
< dest
->nelem
;)
748 int cp_from
, ncp
, mid
, right
, src_elem
= src
->elems
[si
];
749 /* Binary search the spot we will add the new element. */
753 mid
= (di
+ right
) / 2;
754 if (dest
->elems
[mid
] < src_elem
)
759 if (di
>= dest
->nelem
)
762 if (dest
->elems
[di
] == src_elem
)
764 /* Skip since, DEST already has the element. */
770 /* Skip the src elements which are less than dest->elems[di]. */
772 while (si
< src
->nelem
&& src
->elems
[si
] < dest
->elems
[di
])
774 /* Copy these src elements. */
776 memmove (dest
->elems
+ di
+ ncp
, dest
->elems
+ di
,
777 sizeof (int) * (dest
->nelem
- di
));
778 memcpy (dest
->elems
+ di
, src
->elems
+ cp_from
,
780 /* Update counters. */
785 /* Copy remaining src elements. */
788 memcpy (dest
->elems
+ di
, src
->elems
+ si
,
789 sizeof (int) * (src
->nelem
- si
));
790 dest
->nelem
+= src
->nelem
- si
;
795 /* Insert the new element ELEM to the re_node_set* SET.
796 return 0 if SET already has ELEM,
797 return -1 if an error is occured, return 1 otherwise. */
800 re_node_set_insert (set
, elem
)
805 /* In case of the set is empty. */
806 if (set
->elems
== NULL
|| set
->alloc
== 0)
808 if (BE (re_node_set_init_1 (set
, elem
) == REG_NOERROR
, 1))
814 /* Binary search the spot we will add the new element. */
819 mid
= (idx
+ right
) / 2;
820 if (set
->elems
[mid
] < elem
)
826 /* Realloc if we need. */
827 if (set
->alloc
< set
->nelem
+ 1)
830 set
->alloc
= set
->alloc
* 2;
831 new_array
= re_malloc (int, set
->alloc
);
832 if (BE (new_array
== NULL
, 0))
834 /* Copy the elements they are followed by the new element. */
836 memcpy (new_array
, set
->elems
, sizeof (int) * (idx
));
837 /* Copy the elements which follows the new element. */
838 if (set
->nelem
- idx
> 0)
839 memcpy (new_array
+ idx
+ 1, set
->elems
+ idx
,
840 sizeof (int) * (set
->nelem
- idx
));
841 re_free (set
->elems
);
842 set
->elems
= new_array
;
846 /* Move the elements which follows the new element. */
847 if (set
->nelem
- idx
> 0)
848 memmove (set
->elems
+ idx
+ 1, set
->elems
+ idx
,
849 sizeof (int) * (set
->nelem
- idx
));
851 /* Insert the new element. */
852 set
->elems
[idx
] = elem
;
857 /* Compare two node sets SET1 and SET2.
858 return 1 if SET1 and SET2 are equivalent, retrun 0 otherwise. */
861 re_node_set_compare (set1
, set2
)
862 const re_node_set
*set1
, *set2
;
865 if (set1
== NULL
|| set2
== NULL
|| set1
->nelem
!= set2
->nelem
)
867 for (i
= 0 ; i
< set1
->nelem
; i
++)
868 if (set1
->elems
[i
] != set2
->elems
[i
])
873 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
876 re_node_set_contains (set
, elem
)
877 const re_node_set
*set
;
884 /* Binary search the element. */
886 right
= set
->nelem
- 1;
889 mid
= (idx
+ right
) / 2;
890 if (set
->elems
[mid
] < elem
)
895 return set
->elems
[idx
] == elem
? idx
+ 1 : 0;
899 re_node_set_remove_at (set
, idx
)
903 if (idx
< 0 || idx
>= set
->nelem
)
905 if (idx
< set
->nelem
- 1)
906 memmove (set
->elems
+ idx
, set
->elems
+ idx
+ 1,
907 sizeof (int) * (set
->nelem
- idx
- 1));
912 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
913 Or return -1, if an error will be occured. */
916 re_dfa_add_node (dfa
, token
, mode
)
921 if (dfa
->nodes_len
>= dfa
->nodes_alloc
)
923 re_token_t
*new_array
;
924 dfa
->nodes_alloc
*= 2;
925 new_array
= re_realloc (dfa
->nodes
, re_token_t
, dfa
->nodes_alloc
);
926 if (BE (new_array
== NULL
, 0))
929 dfa
->nodes
= new_array
;
932 int *new_firsts
, *new_nexts
;
933 re_node_set
*new_edests
, *new_eclosures
, *new_inveclosures
;
935 new_firsts
= re_realloc (dfa
->firsts
, int, dfa
->nodes_alloc
);
936 new_nexts
= re_realloc (dfa
->nexts
, int, dfa
->nodes_alloc
);
937 new_edests
= re_realloc (dfa
->edests
, re_node_set
, dfa
->nodes_alloc
);
938 new_eclosures
= re_realloc (dfa
->eclosures
, re_node_set
,
940 new_inveclosures
= re_realloc (dfa
->inveclosures
, re_node_set
,
942 if (BE (new_firsts
== NULL
|| new_nexts
== NULL
|| new_edests
== NULL
943 || new_eclosures
== NULL
|| new_inveclosures
== NULL
, 0))
945 dfa
->firsts
= new_firsts
;
946 dfa
->nexts
= new_nexts
;
947 dfa
->edests
= new_edests
;
948 dfa
->eclosures
= new_eclosures
;
949 dfa
->inveclosures
= new_inveclosures
;
952 dfa
->nodes
[dfa
->nodes_len
] = token
;
953 dfa
->nodes
[dfa
->nodes_len
].duplicated
= 0;
954 return dfa
->nodes_len
++;
957 static unsigned int inline
958 calc_state_hash (nodes
, context
)
959 const re_node_set
*nodes
;
960 unsigned int context
;
962 unsigned int hash
= nodes
->nelem
+ context
;
964 for (i
= 0 ; i
< nodes
->nelem
; i
++)
965 hash
+= nodes
->elems
[i
];
969 /* Search for the state whose node_set is equivalent to NODES.
970 Return the pointer to the state, if we found it in the DFA.
971 Otherwise create the new one and return it. In case of an error
972 return NULL and set the error code in ERR.
973 Note: - We assume NULL as the invalid state, then it is possible that
974 return value is NULL and ERR is REG_NOERROR.
975 - We never return non-NULL value in case of any errors, it is for
978 static re_dfastate_t
*
979 re_acquire_state (err
, dfa
, nodes
)
982 const re_node_set
*nodes
;
985 re_dfastate_t
*new_state
;
986 struct re_state_table_entry
*spot
;
988 if (BE (nodes
->nelem
== 0, 0))
993 hash
= calc_state_hash (nodes
, 0);
994 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
996 for (i
= 0 ; i
< spot
->num
; i
++)
998 re_dfastate_t
*state
= spot
->array
[i
];
999 if (hash
!= state
->hash
)
1001 if (re_node_set_compare (&state
->nodes
, nodes
))
1005 /* There are no appropriate state in the dfa, create the new one. */
1006 new_state
= create_ci_newstate (dfa
, nodes
, hash
);
1007 if (BE (new_state
!= NULL
, 1))
1016 /* Search for the state whose node_set is equivalent to NODES and
1017 whose context is equivalent to CONTEXT.
1018 Return the pointer to the state, if we found it in the DFA.
1019 Otherwise create the new one and return it. In case of an error
1020 return NULL and set the error code in ERR.
1021 Note: - We assume NULL as the invalid state, then it is possible that
1022 return value is NULL and ERR is REG_NOERROR.
1023 - We never return non-NULL value in case of any errors, it is for
1026 static re_dfastate_t
*
1027 re_acquire_state_context (err
, dfa
, nodes
, context
)
1030 const re_node_set
*nodes
;
1031 unsigned int context
;
1034 re_dfastate_t
*new_state
;
1035 struct re_state_table_entry
*spot
;
1037 if (nodes
->nelem
== 0)
1042 hash
= calc_state_hash (nodes
, context
);
1043 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1045 for (i
= 0 ; i
< spot
->num
; i
++)
1047 re_dfastate_t
*state
= spot
->array
[i
];
1048 if (hash
!= state
->hash
)
1050 if (re_node_set_compare (state
->entrance_nodes
, nodes
)
1051 && state
->context
== context
)
1054 /* There are no appropriate state in `dfa', create the new one. */
1055 new_state
= create_cd_newstate (dfa
, nodes
, context
, hash
);
1056 if (BE (new_state
!= NULL
, 1))
1065 /* Allocate memory for DFA state and initialize common properties.
1066 Return the new state if succeeded, otherwise return NULL. */
1068 static re_dfastate_t
*
1069 create_newstate_common (dfa
, nodes
, hash
)
1071 const re_node_set
*nodes
;
1074 re_dfastate_t
*newstate
;
1075 newstate
= (re_dfastate_t
*) calloc (sizeof (re_dfastate_t
), 1);
1076 if (BE (newstate
== NULL
, 0))
1078 re_node_set_init_copy (&newstate
->nodes
, nodes
);
1079 newstate
->trtable
= NULL
;
1080 newstate
->trtable_search
= NULL
;
1081 newstate
->hash
= hash
;
1085 /* Store the new state NEWSTATE whose hash value is HASH in appropriate
1086 position. Return value indicate the error code if failed. */
1088 static reg_errcode_t
1089 register_state (dfa
, newstate
, hash
)
1091 re_dfastate_t
*newstate
;
1094 struct re_state_table_entry
*spot
;
1095 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1097 if (spot
->alloc
<= spot
->num
)
1099 spot
->alloc
= 2 * spot
->num
+ 2;
1100 spot
->array
= re_realloc (spot
->array
, re_dfastate_t
*, spot
->alloc
);
1101 if (BE (spot
->array
== NULL
, 0))
1104 spot
->array
[spot
->num
++] = newstate
;
1108 /* Create the new state which is independ of contexts.
1109 Return the new state if succeeded, otherwise return NULL. */
1111 static re_dfastate_t
*
1112 create_ci_newstate (dfa
, nodes
, hash
)
1114 const re_node_set
*nodes
;
1119 re_dfastate_t
*newstate
;
1120 newstate
= create_newstate_common (dfa
, nodes
, hash
);
1121 if (BE (newstate
== NULL
, 0))
1123 newstate
->entrance_nodes
= &newstate
->nodes
;
1125 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1127 re_token_t
*node
= dfa
->nodes
+ nodes
->elems
[i
];
1128 re_token_type_t type
= node
->type
;
1129 if (type
== CHARACTER
)
1132 /* If the state has the halt node, the state is a halt state. */
1133 else if (type
== END_OF_RE
)
1135 #ifdef RE_ENABLE_I18N
1136 else if (type
== COMPLEX_BRACKET
1137 || (type
== OP_PERIOD
&& MB_CUR_MAX
> 1))
1138 newstate
->accept_mb
= 1;
1139 #endif /* RE_ENABLE_I18N */
1140 else if (type
== OP_BACK_REF
)
1141 newstate
->has_backref
= 1;
1142 else if (type
== ANCHOR
|| OP_CONTEXT_NODE
)
1144 newstate
->has_constraint
= 1;
1145 if (type
== OP_CONTEXT_NODE
1146 && dfa
->nodes
[node
->opr
.ctx_info
->entity
].type
== END_OF_RE
)
1150 err
= register_state (dfa
, newstate
, hash
);
1151 return (err
!= REG_NOERROR
) ? NULL
: newstate
;
1154 /* Create the new state which is depend on the context CONTEXT.
1155 Return the new state if succeeded, otherwise return NULL. */
1157 static re_dfastate_t
*
1158 create_cd_newstate (dfa
, nodes
, context
, hash
)
1160 const re_node_set
*nodes
;
1161 unsigned int context
, hash
;
1163 int i
, nctx_nodes
= 0;
1165 re_dfastate_t
*newstate
;
1167 newstate
= create_newstate_common (dfa
, nodes
, hash
);
1168 if (BE (newstate
== NULL
, 0))
1170 newstate
->context
= context
;
1171 newstate
->entrance_nodes
= &newstate
->nodes
;
1173 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1175 unsigned int constraint
= 0;
1176 re_token_t
*node
= dfa
->nodes
+ nodes
->elems
[i
];
1177 re_token_type_t type
= node
->type
;
1178 if (type
== CHARACTER
)
1181 /* If the state has the halt node, the state is a halt state. */
1182 else if (type
== END_OF_RE
)
1184 #ifdef RE_ENABLE_I18N
1185 else if (type
== COMPLEX_BRACKET
1186 || (type
== OP_PERIOD
&& MB_CUR_MAX
> 1))
1187 newstate
->accept_mb
= 1;
1188 #endif /* RE_ENABLE_I18N */
1189 else if (type
== OP_BACK_REF
)
1190 newstate
->has_backref
= 1;
1191 else if (type
== ANCHOR
)
1192 constraint
= node
->opr
.ctx_type
;
1193 else if (type
== OP_CONTEXT_NODE
)
1195 re_token_type_t ctype
= dfa
->nodes
[node
->opr
.ctx_info
->entity
].type
;
1196 constraint
= node
->constraint
;
1197 if (ctype
== END_OF_RE
)
1199 else if (ctype
== OP_BACK_REF
)
1200 newstate
->has_backref
= 1;
1201 #ifdef RE_ENABLE_I18N
1202 else if (ctype
== COMPLEX_BRACKET
1203 || (type
== OP_PERIOD
&& MB_CUR_MAX
> 1))
1204 newstate
->accept_mb
= 1;
1205 #endif /* RE_ENABLE_I18N */
1210 if (newstate
->entrance_nodes
== &newstate
->nodes
)
1212 newstate
->entrance_nodes
= re_malloc (re_node_set
, 1);
1213 if (BE (newstate
->entrance_nodes
== NULL
, 0))
1215 re_node_set_init_copy (newstate
->entrance_nodes
, nodes
);
1217 newstate
->has_constraint
= 1;
1220 if (NOT_SATISFY_PREV_CONSTRAINT (constraint
,context
))
1222 re_node_set_remove_at (&newstate
->nodes
, i
- nctx_nodes
);
1227 err
= register_state (dfa
, newstate
, hash
);
1228 return (err
!= REG_NOERROR
) ? NULL
: newstate
;