1 /* Extended regular expression matching and search library.
2 Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21 static void re_string_construct_common (const char *str
, int len
,
23 RE_TRANSLATE_TYPE trans
, int icase
,
24 const re_dfa_t
*dfa
) internal_function
;
26 static int re_string_skip_chars (re_string_t
*pstr
, int new_raw_idx
,
27 wint_t *last_wc
) internal_function
;
28 #endif /* RE_ENABLE_I18N */
29 static reg_errcode_t
register_state (re_dfa_t
*dfa
, re_dfastate_t
*newstate
,
30 unsigned int hash
) internal_function
;
31 static re_dfastate_t
*create_ci_newstate (re_dfa_t
*dfa
,
32 const re_node_set
*nodes
,
33 unsigned int hash
) internal_function
;
34 static re_dfastate_t
*create_cd_newstate (re_dfa_t
*dfa
,
35 const re_node_set
*nodes
,
37 unsigned int hash
) internal_function
;
38 static unsigned int inline calc_state_hash (const re_node_set
*nodes
,
39 unsigned int context
) internal_function
;
41 /* Functions for string operation. */
43 /* This function allocate the buffers. It is necessary to call
44 re_string_reconstruct before using the object. */
47 re_string_allocate (pstr
, str
, len
, init_len
, trans
, icase
, dfa
)
50 int len
, init_len
, icase
;
51 RE_TRANSLATE_TYPE trans
;
57 /* Ensure at least one character fits into the buffers. */
58 if (init_len
< dfa
->mb_cur_max
)
59 init_len
= dfa
->mb_cur_max
;
60 init_buf_len
= (len
+ 1 < init_len
) ? len
+ 1: init_len
;
61 re_string_construct_common (str
, len
, pstr
, trans
, icase
, dfa
);
63 ret
= re_string_realloc_buffers (pstr
, init_buf_len
);
64 if (BE (ret
!= REG_NOERROR
, 0))
67 pstr
->word_char
= dfa
->word_char
;
68 pstr
->word_ops_used
= dfa
->word_ops_used
;
69 pstr
->mbs
= pstr
->mbs_allocated
? pstr
->mbs
: (unsigned char *) str
;
70 pstr
->valid_len
= (pstr
->mbs_allocated
|| dfa
->mb_cur_max
> 1) ? 0 : len
;
71 pstr
->valid_raw_len
= pstr
->valid_len
;
75 /* This function allocate the buffers, and initialize them. */
78 re_string_construct (pstr
, str
, len
, trans
, icase
, dfa
)
82 RE_TRANSLATE_TYPE trans
;
86 memset (pstr
, '\0', sizeof (re_string_t
));
87 re_string_construct_common (str
, len
, pstr
, trans
, icase
, dfa
);
91 ret
= re_string_realloc_buffers (pstr
, len
+ 1);
92 if (BE (ret
!= REG_NOERROR
, 0))
95 pstr
->mbs
= pstr
->mbs_allocated
? pstr
->mbs
: (unsigned char *) str
;
100 if (dfa
->mb_cur_max
> 1)
104 ret
= build_wcs_upper_buffer (pstr
);
105 if (BE (ret
!= REG_NOERROR
, 0))
107 if (pstr
->valid_raw_len
>= len
)
109 if (pstr
->bufs_len
> pstr
->valid_len
+ dfa
->mb_cur_max
)
111 ret
= re_string_realloc_buffers (pstr
, pstr
->bufs_len
* 2);
112 if (BE (ret
!= REG_NOERROR
, 0))
117 #endif /* RE_ENABLE_I18N */
118 build_upper_buffer (pstr
);
122 #ifdef RE_ENABLE_I18N
123 if (dfa
->mb_cur_max
> 1)
124 build_wcs_buffer (pstr
);
126 #endif /* RE_ENABLE_I18N */
129 re_string_translate_buffer (pstr
);
132 pstr
->valid_len
= pstr
->bufs_len
;
133 pstr
->valid_raw_len
= pstr
->bufs_len
;
141 /* Helper functions for re_string_allocate, and re_string_construct. */
144 re_string_realloc_buffers (pstr
, new_buf_len
)
148 #ifdef RE_ENABLE_I18N
149 if (pstr
->mb_cur_max
> 1)
151 wint_t *new_array
= re_realloc (pstr
->wcs
, wint_t, new_buf_len
);
152 if (BE (new_array
== NULL
, 0))
154 pstr
->wcs
= new_array
;
155 if (pstr
->offsets
!= NULL
)
157 int *new_array
= re_realloc (pstr
->offsets
, int, new_buf_len
);
158 if (BE (new_array
== NULL
, 0))
160 pstr
->offsets
= new_array
;
163 #endif /* RE_ENABLE_I18N */
164 if (pstr
->mbs_allocated
)
166 unsigned char *new_array
= re_realloc (pstr
->mbs
, unsigned char,
168 if (BE (new_array
== NULL
, 0))
170 pstr
->mbs
= new_array
;
172 pstr
->bufs_len
= new_buf_len
;
178 re_string_construct_common (str
, len
, pstr
, trans
, icase
, dfa
)
182 RE_TRANSLATE_TYPE trans
;
186 pstr
->raw_mbs
= (const unsigned char *) str
;
189 pstr
->trans
= (unsigned RE_TRANSLATE_TYPE
) trans
;
190 pstr
->icase
= icase
? 1 : 0;
191 pstr
->mbs_allocated
= (trans
!= NULL
|| icase
);
192 pstr
->mb_cur_max
= dfa
->mb_cur_max
;
193 pstr
->is_utf8
= dfa
->is_utf8
;
194 pstr
->map_notascii
= dfa
->map_notascii
;
195 pstr
->stop
= pstr
->len
;
196 pstr
->raw_stop
= pstr
->stop
;
199 #ifdef RE_ENABLE_I18N
201 /* Build wide character buffer PSTR->WCS.
202 If the byte sequence of the string are:
203 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
204 Then wide character buffer will be:
205 <wc1> , WEOF , <wc2> , WEOF , <wc3>
206 We use WEOF for padding, they indicate that the position isn't
207 a first byte of a multibyte character.
209 Note that this function assumes PSTR->VALID_LEN elements are already
210 built and starts from PSTR->VALID_LEN. */
213 build_wcs_buffer (pstr
)
217 unsigned char buf
[MB_LEN_MAX
];
218 assert (MB_LEN_MAX
>= pstr
->mb_cur_max
);
220 unsigned char buf
[64];
223 int byte_idx
, end_idx
, mbclen
, remain_len
;
225 /* Build the buffers from pstr->valid_len to either pstr->len or
227 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
228 for (byte_idx
= pstr
->valid_len
; byte_idx
< end_idx
;)
233 remain_len
= end_idx
- byte_idx
;
234 prev_st
= pstr
->cur_state
;
235 /* Apply the translation if we need. */
236 if (BE (pstr
->trans
!= NULL
, 0))
240 for (i
= 0; i
< pstr
->mb_cur_max
&& i
< remain_len
; ++i
)
242 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
+ i
];
243 buf
[i
] = pstr
->mbs
[byte_idx
+ i
] = pstr
->trans
[ch
];
245 p
= (const char *) buf
;
248 p
= (const char *) pstr
->raw_mbs
+ pstr
->raw_mbs_idx
+ byte_idx
;
249 mbclen
= mbrtowc (&wc
, p
, remain_len
, &pstr
->cur_state
);
250 if (BE (mbclen
== (size_t) -2, 0))
252 /* The buffer doesn't have enough space, finish to build. */
253 pstr
->cur_state
= prev_st
;
256 else if (BE (mbclen
== (size_t) -1 || mbclen
== 0, 0))
258 /* We treat these cases as a singlebyte character. */
260 wc
= (wchar_t) pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
];
261 if (BE (pstr
->trans
!= NULL
, 0))
262 wc
= pstr
->trans
[wc
];
263 pstr
->cur_state
= prev_st
;
266 /* Write wide character and padding. */
267 pstr
->wcs
[byte_idx
++] = wc
;
268 /* Write paddings. */
269 for (remain_len
= byte_idx
+ mbclen
- 1; byte_idx
< remain_len
;)
270 pstr
->wcs
[byte_idx
++] = WEOF
;
272 pstr
->valid_len
= byte_idx
;
273 pstr
->valid_raw_len
= byte_idx
;
276 /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
277 but for REG_ICASE. */
280 build_wcs_upper_buffer (pstr
)
284 int src_idx
, byte_idx
, end_idx
, mbclen
, remain_len
;
286 unsigned char buf
[MB_LEN_MAX
];
287 assert (MB_LEN_MAX
>= pstr
->mb_cur_max
);
289 unsigned char buf
[64];
292 byte_idx
= pstr
->valid_len
;
293 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
295 /* The following optimization assumes that ASCII characters can be
296 mapped to wide characters with a simple cast. */
297 if (! pstr
->map_notascii
&& pstr
->trans
== NULL
&& !pstr
->offsets_needed
)
299 while (byte_idx
< end_idx
)
303 if (isascii (pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
])
304 && mbsinit (&pstr
->cur_state
))
306 /* In case of a singlebyte character. */
308 = toupper (pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
]);
309 /* The next step uses the assumption that wchar_t is encoded
310 ASCII-safe: all ASCII values can be converted like this. */
311 pstr
->wcs
[byte_idx
] = (wchar_t) pstr
->mbs
[byte_idx
];
316 remain_len
= end_idx
- byte_idx
;
317 prev_st
= pstr
->cur_state
;
318 mbclen
= mbrtowc (&wc
,
319 ((const char *) pstr
->raw_mbs
+ pstr
->raw_mbs_idx
320 + byte_idx
), remain_len
, &pstr
->cur_state
);
321 if (BE (mbclen
> 0, 1))
329 mbcdlen
= wcrtomb (buf
, wcu
, &prev_st
);
330 if (BE (mbclen
== mbcdlen
, 1))
331 memcpy (pstr
->mbs
+ byte_idx
, buf
, mbclen
);
339 memcpy (pstr
->mbs
+ byte_idx
,
340 pstr
->raw_mbs
+ pstr
->raw_mbs_idx
+ byte_idx
, mbclen
);
341 pstr
->wcs
[byte_idx
++] = wcu
;
342 /* Write paddings. */
343 for (remain_len
= byte_idx
+ mbclen
- 1; byte_idx
< remain_len
;)
344 pstr
->wcs
[byte_idx
++] = WEOF
;
346 else if (mbclen
== (size_t) -1 || mbclen
== 0)
348 /* It is an invalid character or '\0'. Just use the byte. */
349 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
];
350 pstr
->mbs
[byte_idx
] = ch
;
351 /* And also cast it to wide char. */
352 pstr
->wcs
[byte_idx
++] = (wchar_t) ch
;
353 if (BE (mbclen
== (size_t) -1, 0))
354 pstr
->cur_state
= prev_st
;
358 /* The buffer doesn't have enough space, finish to build. */
359 pstr
->cur_state
= prev_st
;
363 pstr
->valid_len
= byte_idx
;
364 pstr
->valid_raw_len
= byte_idx
;
368 for (src_idx
= pstr
->valid_raw_len
; byte_idx
< end_idx
;)
373 remain_len
= end_idx
- byte_idx
;
374 prev_st
= pstr
->cur_state
;
375 if (BE (pstr
->trans
!= NULL
, 0))
379 for (i
= 0; i
< pstr
->mb_cur_max
&& i
< remain_len
; ++i
)
381 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ src_idx
+ i
];
382 buf
[i
] = pstr
->trans
[ch
];
384 p
= (const char *) buf
;
387 p
= (const char *) pstr
->raw_mbs
+ pstr
->raw_mbs_idx
+ src_idx
;
388 mbclen
= mbrtowc (&wc
, p
, remain_len
, &pstr
->cur_state
);
389 if (BE (mbclen
> 0, 1))
397 mbcdlen
= wcrtomb ((char *) buf
, wcu
, &prev_st
);
398 if (BE (mbclen
== mbcdlen
, 1))
399 memcpy (pstr
->mbs
+ byte_idx
, buf
, mbclen
);
404 if (byte_idx
+ mbcdlen
> pstr
->bufs_len
)
406 pstr
->cur_state
= prev_st
;
410 if (pstr
->offsets
== NULL
)
412 pstr
->offsets
= re_malloc (int, pstr
->bufs_len
);
414 if (pstr
->offsets
== NULL
)
417 if (!pstr
->offsets_needed
)
419 for (i
= 0; i
< byte_idx
; ++i
)
420 pstr
->offsets
[i
] = i
;
421 pstr
->offsets_needed
= 1;
424 memcpy (pstr
->mbs
+ byte_idx
, buf
, mbcdlen
);
425 pstr
->wcs
[byte_idx
] = wcu
;
426 pstr
->offsets
[byte_idx
] = src_idx
;
427 for (i
= 1; i
< mbcdlen
; ++i
)
429 pstr
->offsets
[byte_idx
+ i
]
430 = src_idx
+ (i
< mbclen
? i
: mbclen
- 1);
431 pstr
->wcs
[byte_idx
+ i
] = WEOF
;
433 pstr
->len
+= mbcdlen
- mbclen
;
434 if (pstr
->raw_stop
> src_idx
)
435 pstr
->stop
+= mbcdlen
- mbclen
;
436 end_idx
= (pstr
->bufs_len
> pstr
->len
)
437 ? pstr
->len
: pstr
->bufs_len
;
444 memcpy (pstr
->mbs
+ byte_idx
, p
, mbclen
);
446 if (BE (pstr
->offsets_needed
!= 0, 0))
449 for (i
= 0; i
< mbclen
; ++i
)
450 pstr
->offsets
[byte_idx
+ i
] = src_idx
+ i
;
454 pstr
->wcs
[byte_idx
++] = wcu
;
455 /* Write paddings. */
456 for (remain_len
= byte_idx
+ mbclen
- 1; byte_idx
< remain_len
;)
457 pstr
->wcs
[byte_idx
++] = WEOF
;
459 else if (mbclen
== (size_t) -1 || mbclen
== 0)
461 /* It is an invalid character or '\0'. Just use the byte. */
462 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ src_idx
];
464 if (BE (pstr
->trans
!= NULL
, 0))
465 ch
= pstr
->trans
[ch
];
466 pstr
->mbs
[byte_idx
] = ch
;
468 if (BE (pstr
->offsets_needed
!= 0, 0))
469 pstr
->offsets
[byte_idx
] = src_idx
;
472 /* And also cast it to wide char. */
473 pstr
->wcs
[byte_idx
++] = (wchar_t) ch
;
474 if (BE (mbclen
== (size_t) -1, 0))
475 pstr
->cur_state
= prev_st
;
479 /* The buffer doesn't have enough space, finish to build. */
480 pstr
->cur_state
= prev_st
;
484 pstr
->valid_len
= byte_idx
;
485 pstr
->valid_raw_len
= src_idx
;
489 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
493 re_string_skip_chars (pstr
, new_raw_idx
, last_wc
)
499 int rawbuf_idx
, mbclen
;
502 /* Skip the characters which are not necessary to check. */
503 for (rawbuf_idx
= pstr
->raw_mbs_idx
+ pstr
->valid_raw_len
;
504 rawbuf_idx
< new_raw_idx
;)
507 remain_len
= pstr
->len
- rawbuf_idx
;
508 prev_st
= pstr
->cur_state
;
509 mbclen
= mbrtowc (&wc
, (const char *) pstr
->raw_mbs
+ rawbuf_idx
,
510 remain_len
, &pstr
->cur_state
);
511 if (BE (mbclen
== (size_t) -2 || mbclen
== (size_t) -1 || mbclen
== 0, 0))
513 /* We treat these cases as a singlebyte character. */
515 pstr
->cur_state
= prev_st
;
517 /* Then proceed the next character. */
518 rawbuf_idx
+= mbclen
;
520 *last_wc
= (wint_t) wc
;
523 #endif /* RE_ENABLE_I18N */
525 /* Build the buffer PSTR->MBS, and apply the translation if we need.
526 This function is used in case of REG_ICASE. */
529 build_upper_buffer (pstr
)
532 int char_idx
, end_idx
;
533 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
535 for (char_idx
= pstr
->valid_len
; char_idx
< end_idx
; ++char_idx
)
537 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ char_idx
];
538 if (BE (pstr
->trans
!= NULL
, 0))
539 ch
= pstr
->trans
[ch
];
541 pstr
->mbs
[char_idx
] = toupper (ch
);
543 pstr
->mbs
[char_idx
] = ch
;
545 pstr
->valid_len
= char_idx
;
546 pstr
->valid_raw_len
= char_idx
;
549 /* Apply TRANS to the buffer in PSTR. */
552 re_string_translate_buffer (pstr
)
555 int buf_idx
, end_idx
;
556 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
558 for (buf_idx
= pstr
->valid_len
; buf_idx
< end_idx
; ++buf_idx
)
560 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ buf_idx
];
561 pstr
->mbs
[buf_idx
] = pstr
->trans
[ch
];
564 pstr
->valid_len
= buf_idx
;
565 pstr
->valid_raw_len
= buf_idx
;
568 /* This function re-construct the buffers.
569 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
570 convert to upper case in case of REG_ICASE, apply translation. */
573 re_string_reconstruct (pstr
, idx
, eflags
)
577 int offset
= idx
- pstr
->raw_mbs_idx
;
578 if (BE (offset
< 0, 0))
581 #ifdef RE_ENABLE_I18N
582 if (pstr
->mb_cur_max
> 1)
583 memset (&pstr
->cur_state
, '\0', sizeof (mbstate_t));
584 #endif /* RE_ENABLE_I18N */
585 pstr
->len
= pstr
->raw_len
;
586 pstr
->stop
= pstr
->raw_stop
;
588 pstr
->raw_mbs_idx
= 0;
589 pstr
->valid_raw_len
= 0;
590 pstr
->offsets_needed
= 0;
591 pstr
->tip_context
= ((eflags
& REG_NOTBOL
) ? CONTEXT_BEGBUF
592 : CONTEXT_NEWLINE
| CONTEXT_BEGBUF
);
593 if (!pstr
->mbs_allocated
)
594 pstr
->mbs
= (unsigned char *) pstr
->raw_mbs
;
598 if (BE (offset
!= 0, 1))
600 /* Are the characters which are already checked remain? */
601 if (BE (offset
< pstr
->valid_raw_len
, 1)
602 #ifdef RE_ENABLE_I18N
603 /* Handling this would enlarge the code too much.
604 Accept a slowdown in that case. */
605 && pstr
->offsets_needed
== 0
609 /* Yes, move them to the front of the buffer. */
610 pstr
->tip_context
= re_string_context_at (pstr
, offset
- 1, eflags
);
611 #ifdef RE_ENABLE_I18N
612 if (pstr
->mb_cur_max
> 1)
613 memmove (pstr
->wcs
, pstr
->wcs
+ offset
,
614 (pstr
->valid_len
- offset
) * sizeof (wint_t));
615 #endif /* RE_ENABLE_I18N */
616 if (BE (pstr
->mbs_allocated
, 0))
617 memmove (pstr
->mbs
, pstr
->mbs
+ offset
,
618 pstr
->valid_len
- offset
);
619 pstr
->valid_len
-= offset
;
620 pstr
->valid_raw_len
-= offset
;
622 assert (pstr
->valid_len
> 0);
627 /* No, skip all characters until IDX. */
628 #ifdef RE_ENABLE_I18N
629 if (BE (pstr
->offsets_needed
, 0))
631 pstr
->len
= pstr
->raw_len
- idx
+ offset
;
632 pstr
->stop
= pstr
->raw_stop
- idx
+ offset
;
633 pstr
->offsets_needed
= 0;
637 pstr
->valid_raw_len
= 0;
638 #ifdef RE_ENABLE_I18N
639 if (pstr
->mb_cur_max
> 1)
646 const unsigned char *raw
, *p
, *q
, *end
;
648 /* Special case UTF-8. Multi-byte chars start with any
649 byte other than 0x80 - 0xbf. */
650 raw
= pstr
->raw_mbs
+ pstr
->raw_mbs_idx
;
651 end
= raw
+ (offset
- pstr
->mb_cur_max
);
652 for (p
= raw
+ offset
- 1; p
>= end
; --p
)
653 if ((*p
& 0xc0) != 0x80)
657 int mlen
= raw
+ pstr
->len
- p
;
658 unsigned char buf
[6];
661 if (BE (pstr
->trans
!= NULL
, 0))
663 int i
= mlen
< 6 ? mlen
: 6;
665 buf
[i
] = pstr
->trans
[p
[i
]];
668 /* XXX Don't use mbrtowc, we know which conversion
669 to use (UTF-8 -> UCS4). */
670 memset (&cur_state
, 0, sizeof (cur_state
));
671 mlen
= mbrtowc (&wc2
, p
, mlen
, &cur_state
)
672 - (raw
+ offset
- p
);
675 memset (&pstr
->cur_state
, '\0',
677 pstr
->valid_len
= mlen
;
685 pstr
->valid_len
= re_string_skip_chars (pstr
, idx
, &wc
) - idx
;
686 if (BE (pstr
->valid_len
, 0))
688 for (wcs_idx
= 0; wcs_idx
< pstr
->valid_len
; ++wcs_idx
)
689 pstr
->wcs
[wcs_idx
] = WEOF
;
690 if (pstr
->mbs_allocated
)
691 memset (pstr
->mbs
, 255, pstr
->valid_len
);
693 pstr
->valid_raw_len
= pstr
->valid_len
;
694 pstr
->tip_context
= ((BE (pstr
->word_ops_used
!= 0, 0)
695 && IS_WIDE_WORD_CHAR (wc
))
697 : ((IS_WIDE_NEWLINE (wc
)
698 && pstr
->newline_anchor
)
699 ? CONTEXT_NEWLINE
: 0));
702 #endif /* RE_ENABLE_I18N */
704 int c
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ offset
- 1];
707 pstr
->tip_context
= (bitset_contain (pstr
->word_char
, c
)
709 : ((IS_NEWLINE (c
) && pstr
->newline_anchor
)
710 ? CONTEXT_NEWLINE
: 0));
713 if (!BE (pstr
->mbs_allocated
, 0))
716 pstr
->raw_mbs_idx
= idx
;
718 pstr
->stop
-= offset
;
720 /* Then build the buffers. */
721 #ifdef RE_ENABLE_I18N
722 if (pstr
->mb_cur_max
> 1)
726 int ret
= build_wcs_upper_buffer (pstr
);
727 if (BE (ret
!= REG_NOERROR
, 0))
731 build_wcs_buffer (pstr
);
734 #endif /* RE_ENABLE_I18N */
735 if (BE (pstr
->mbs_allocated
, 0))
738 build_upper_buffer (pstr
);
739 else if (pstr
->trans
!= NULL
)
740 re_string_translate_buffer (pstr
);
743 pstr
->valid_len
= pstr
->len
;
750 re_string_peek_byte_case (pstr
, idx
)
751 const re_string_t
*pstr
;
756 /* Handle the common (easiest) cases first. */
757 if (BE (!pstr
->mbs_allocated
, 1))
758 return re_string_peek_byte (pstr
, idx
);
760 #ifdef RE_ENABLE_I18N
761 if (pstr
->mb_cur_max
> 1
762 && ! re_string_is_single_byte_char (pstr
, pstr
->cur_idx
+ idx
))
763 return re_string_peek_byte (pstr
, idx
);
766 off
= pstr
->cur_idx
+ idx
;
767 #ifdef RE_ENABLE_I18N
768 if (pstr
->offsets_needed
)
769 off
= pstr
->offsets
[off
];
772 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ off
];
774 #ifdef RE_ENABLE_I18N
775 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
776 this function returns CAPITAL LETTER I instead of first byte of
777 DOTLESS SMALL LETTER I. The latter would confuse the parser,
778 since peek_byte_case doesn't advance cur_idx in any way. */
779 if (pstr
->offsets_needed
&& !isascii (ch
))
780 return re_string_peek_byte (pstr
, idx
);
787 re_string_fetch_byte_case (pstr
)
790 if (BE (!pstr
->mbs_allocated
, 1))
791 return re_string_fetch_byte (pstr
);
793 #ifdef RE_ENABLE_I18N
794 if (pstr
->offsets_needed
)
798 /* For tr_TR.UTF-8 [[:islower:]] there is
799 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
800 in that case the whole multi-byte character and return
801 the original letter. On the other side, with
802 [[: DOTLESS SMALL LETTER I return [[:I, as doing
803 anything else would complicate things too much. */
805 if (!re_string_first_byte (pstr
, pstr
->cur_idx
))
806 return re_string_fetch_byte (pstr
);
808 off
= pstr
->offsets
[pstr
->cur_idx
];
809 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ off
];
812 return re_string_fetch_byte (pstr
);
814 re_string_skip_bytes (pstr
,
815 re_string_char_size_at (pstr
, pstr
->cur_idx
));
820 return pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ pstr
->cur_idx
++];
824 re_string_destruct (pstr
)
827 #ifdef RE_ENABLE_I18N
829 re_free (pstr
->offsets
);
830 #endif /* RE_ENABLE_I18N */
831 if (pstr
->mbs_allocated
)
835 /* Return the context at IDX in INPUT. */
838 re_string_context_at (input
, idx
, eflags
)
839 const re_string_t
*input
;
844 /* In this case, we use the value stored in input->tip_context,
845 since we can't know the character in input->mbs[-1] here. */
846 return input
->tip_context
;
847 if (BE (idx
== input
->len
, 0))
848 return ((eflags
& REG_NOTEOL
) ? CONTEXT_ENDBUF
849 : CONTEXT_NEWLINE
| CONTEXT_ENDBUF
);
850 #ifdef RE_ENABLE_I18N
851 if (input
->mb_cur_max
> 1)
855 while(input
->wcs
[wc_idx
] == WEOF
)
858 /* It must not happen. */
859 assert (wc_idx
>= 0);
863 return input
->tip_context
;
865 wc
= input
->wcs
[wc_idx
];
866 if (BE (input
->word_ops_used
!= 0, 0) && IS_WIDE_WORD_CHAR (wc
))
868 return (IS_WIDE_NEWLINE (wc
) && input
->newline_anchor
869 ? CONTEXT_NEWLINE
: 0);
874 c
= re_string_byte_at (input
, idx
);
875 if (bitset_contain (input
->word_char
, c
))
877 return IS_NEWLINE (c
) && input
->newline_anchor
? CONTEXT_NEWLINE
: 0;
881 /* Functions for set operation. */
884 re_node_set_alloc (set
, size
)
890 set
->elems
= re_malloc (int, size
);
891 if (BE (set
->elems
== NULL
, 0))
897 re_node_set_init_1 (set
, elem
)
903 set
->elems
= re_malloc (int, 1);
904 if (BE (set
->elems
== NULL
, 0))
906 set
->alloc
= set
->nelem
= 0;
909 set
->elems
[0] = elem
;
914 re_node_set_init_2 (set
, elem1
, elem2
)
919 set
->elems
= re_malloc (int, 2);
920 if (BE (set
->elems
== NULL
, 0))
925 set
->elems
[0] = elem1
;
932 set
->elems
[0] = elem1
;
933 set
->elems
[1] = elem2
;
937 set
->elems
[0] = elem2
;
938 set
->elems
[1] = elem1
;
945 re_node_set_init_copy (dest
, src
)
947 const re_node_set
*src
;
949 dest
->nelem
= src
->nelem
;
952 dest
->alloc
= dest
->nelem
;
953 dest
->elems
= re_malloc (int, dest
->alloc
);
954 if (BE (dest
->elems
== NULL
, 0))
956 dest
->alloc
= dest
->nelem
= 0;
959 memcpy (dest
->elems
, src
->elems
, src
->nelem
* sizeof (int));
962 re_node_set_init_empty (dest
);
966 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
967 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
968 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
971 re_node_set_add_intersect (dest
, src1
, src2
)
973 const re_node_set
*src1
, *src2
;
975 int i1
, i2
, is
, id
, delta
, sbase
;
976 if (src1
->nelem
== 0 || src2
->nelem
== 0)
979 /* We need dest->nelem + 2 * elems_in_intersection; this is a
980 conservative estimate. */
981 if (src1
->nelem
+ src2
->nelem
+ dest
->nelem
> dest
->alloc
)
983 int new_alloc
= src1
->nelem
+ src2
->nelem
+ dest
->alloc
;
984 int *new_elems
= re_realloc (dest
->elems
, int, new_alloc
);
985 if (BE (new_elems
== NULL
, 0))
987 dest
->elems
= new_elems
;
988 dest
->alloc
= new_alloc
;
991 /* Find the items in the intersection of SRC1 and SRC2, and copy
992 into the top of DEST those that are not already in DEST itself. */
993 sbase
= dest
->nelem
+ src1
->nelem
+ src2
->nelem
;
994 i1
= src1
->nelem
- 1;
995 i2
= src2
->nelem
- 1;
996 id
= dest
->nelem
- 1;
999 if (src1
->elems
[i1
] == src2
->elems
[i2
])
1001 /* Try to find the item in DEST. Maybe we could binary search? */
1002 while (id
>= 0 && dest
->elems
[id
] > src1
->elems
[i1
])
1005 if (id
< 0 || dest
->elems
[id
] != src1
->elems
[i1
])
1006 dest
->elems
[--sbase
] = src1
->elems
[i1
];
1008 if (--i1
< 0 || --i2
< 0)
1012 /* Lower the highest of the two items. */
1013 else if (src1
->elems
[i1
] < src2
->elems
[i2
])
1025 id
= dest
->nelem
- 1;
1026 is
= dest
->nelem
+ src1
->nelem
+ src2
->nelem
- 1;
1027 delta
= is
- sbase
+ 1;
1029 /* Now copy. When DELTA becomes zero, the remaining
1030 DEST elements are already in place; this is more or
1031 less the same loop that is in re_node_set_merge. */
1032 dest
->nelem
+= delta
;
1033 if (delta
> 0 && id
>= 0)
1036 if (dest
->elems
[is
] > dest
->elems
[id
])
1038 /* Copy from the top. */
1039 dest
->elems
[id
+ delta
--] = dest
->elems
[is
--];
1045 /* Slide from the bottom. */
1046 dest
->elems
[id
+ delta
] = dest
->elems
[id
];
1052 /* Copy remaining SRC elements. */
1053 memcpy (dest
->elems
, dest
->elems
+ sbase
, delta
* sizeof (int));
1058 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
1059 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1061 static reg_errcode_t
1062 re_node_set_init_union (dest
, src1
, src2
)
1064 const re_node_set
*src1
, *src2
;
1067 if (src1
!= NULL
&& src1
->nelem
> 0 && src2
!= NULL
&& src2
->nelem
> 0)
1069 dest
->alloc
= src1
->nelem
+ src2
->nelem
;
1070 dest
->elems
= re_malloc (int, dest
->alloc
);
1071 if (BE (dest
->elems
== NULL
, 0))
1076 if (src1
!= NULL
&& src1
->nelem
> 0)
1077 return re_node_set_init_copy (dest
, src1
);
1078 else if (src2
!= NULL
&& src2
->nelem
> 0)
1079 return re_node_set_init_copy (dest
, src2
);
1081 re_node_set_init_empty (dest
);
1084 for (i1
= i2
= id
= 0 ; i1
< src1
->nelem
&& i2
< src2
->nelem
;)
1086 if (src1
->elems
[i1
] > src2
->elems
[i2
])
1088 dest
->elems
[id
++] = src2
->elems
[i2
++];
1091 if (src1
->elems
[i1
] == src2
->elems
[i2
])
1093 dest
->elems
[id
++] = src1
->elems
[i1
++];
1095 if (i1
< src1
->nelem
)
1097 memcpy (dest
->elems
+ id
, src1
->elems
+ i1
,
1098 (src1
->nelem
- i1
) * sizeof (int));
1099 id
+= src1
->nelem
- i1
;
1101 else if (i2
< src2
->nelem
)
1103 memcpy (dest
->elems
+ id
, src2
->elems
+ i2
,
1104 (src2
->nelem
- i2
) * sizeof (int));
1105 id
+= src2
->nelem
- i2
;
1111 /* Calculate the union set of the sets DEST and SRC. And store it to
1112 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1114 static reg_errcode_t
1115 re_node_set_merge (dest
, src
)
1117 const re_node_set
*src
;
1119 int is
, id
, sbase
, delta
;
1120 if (src
== NULL
|| src
->nelem
== 0)
1122 if (dest
->alloc
< 2 * src
->nelem
+ dest
->nelem
)
1124 int new_alloc
= 2 * (src
->nelem
+ dest
->alloc
);
1125 int *new_buffer
= re_realloc (dest
->elems
, int, new_alloc
);
1126 if (BE (new_buffer
== NULL
, 0))
1128 dest
->elems
= new_buffer
;
1129 dest
->alloc
= new_alloc
;
1132 if (BE (dest
->nelem
== 0, 0))
1134 dest
->nelem
= src
->nelem
;
1135 memcpy (dest
->elems
, src
->elems
, src
->nelem
* sizeof (int));
1139 /* Copy into the top of DEST the items of SRC that are not
1140 found in DEST. Maybe we could binary search in DEST? */
1141 for (sbase
= dest
->nelem
+ 2 * src
->nelem
,
1142 is
= src
->nelem
- 1, id
= dest
->nelem
- 1; is
>= 0 && id
>= 0; )
1144 if (dest
->elems
[id
] == src
->elems
[is
])
1146 else if (dest
->elems
[id
] < src
->elems
[is
])
1147 dest
->elems
[--sbase
] = src
->elems
[is
--];
1148 else /* if (dest->elems[id] > src->elems[is]) */
1154 /* If DEST is exhausted, the remaining items of SRC must be unique. */
1156 memcpy (dest
->elems
+ sbase
, src
->elems
, (is
+ 1) * sizeof (int));
1159 id
= dest
->nelem
- 1;
1160 is
= dest
->nelem
+ 2 * src
->nelem
- 1;
1161 delta
= is
- sbase
+ 1;
1165 /* Now copy. When DELTA becomes zero, the remaining
1166 DEST elements are already in place. */
1167 dest
->nelem
+= delta
;
1170 if (dest
->elems
[is
] > dest
->elems
[id
])
1172 /* Copy from the top. */
1173 dest
->elems
[id
+ delta
--] = dest
->elems
[is
--];
1179 /* Slide from the bottom. */
1180 dest
->elems
[id
+ delta
] = dest
->elems
[id
];
1183 /* Copy remaining SRC elements. */
1184 memcpy (dest
->elems
, dest
->elems
+ sbase
,
1185 delta
* sizeof (int));
1194 /* Insert the new element ELEM to the re_node_set* SET.
1195 SET should not already have ELEM.
1196 return -1 if an error is occured, return 1 otherwise. */
1199 re_node_set_insert (set
, elem
)
1204 /* In case the set is empty. */
1205 if (set
->alloc
== 0)
1207 if (BE (re_node_set_init_1 (set
, elem
) == REG_NOERROR
, 1))
1213 if (BE (set
->nelem
, 0) == 0)
1215 /* We already guaranteed above that set->alloc != 0. */
1216 set
->elems
[0] = elem
;
1221 /* Realloc if we need. */
1222 if (set
->alloc
== set
->nelem
)
1225 set
->alloc
= set
->alloc
* 2;
1226 new_array
= re_realloc (set
->elems
, int, set
->alloc
);
1227 if (BE (new_array
== NULL
, 0))
1229 set
->elems
= new_array
;
1232 /* Move the elements which follows the new element. Test the
1233 first element separately to skip a check in the inner loop. */
1234 if (elem
< set
->elems
[0])
1237 for (idx
= set
->nelem
; idx
> 0; idx
--)
1238 set
->elems
[idx
] = set
->elems
[idx
- 1];
1242 for (idx
= set
->nelem
; set
->elems
[idx
- 1] > elem
; idx
--)
1243 set
->elems
[idx
] = set
->elems
[idx
- 1];
1246 /* Insert the new element. */
1247 set
->elems
[idx
] = elem
;
1252 /* Insert the new element ELEM to the re_node_set* SET.
1253 SET should not already have any element greater than or equal to ELEM.
1254 Return -1 if an error is occured, return 1 otherwise. */
1257 re_node_set_insert_last (set
, elem
)
1261 /* Realloc if we need. */
1262 if (set
->alloc
== set
->nelem
)
1265 set
->alloc
= (set
->alloc
+ 1) * 2;
1266 new_array
= re_realloc (set
->elems
, int, set
->alloc
);
1267 if (BE (new_array
== NULL
, 0))
1269 set
->elems
= new_array
;
1272 /* Insert the new element. */
1273 set
->elems
[set
->nelem
++] = elem
;
1277 /* Compare two node sets SET1 and SET2.
1278 return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */
1281 re_node_set_compare (set1
, set2
)
1282 const re_node_set
*set1
, *set2
;
1285 if (set1
== NULL
|| set2
== NULL
|| set1
->nelem
!= set2
->nelem
)
1287 for (i
= set1
->nelem
; --i
>= 0 ; )
1288 if (set1
->elems
[i
] != set2
->elems
[i
])
1293 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1296 re_node_set_contains (set
, elem
)
1297 const re_node_set
*set
;
1300 unsigned int idx
, right
, mid
;
1301 if (set
->nelem
<= 0)
1304 /* Binary search the element. */
1306 right
= set
->nelem
- 1;
1309 mid
= (idx
+ right
) / 2;
1310 if (set
->elems
[mid
] < elem
)
1315 return set
->elems
[idx
] == elem
? idx
+ 1 : 0;
1319 re_node_set_remove_at (set
, idx
)
1323 if (idx
< 0 || idx
>= set
->nelem
)
1326 for (; idx
< set
->nelem
; idx
++)
1327 set
->elems
[idx
] = set
->elems
[idx
+ 1];
1331 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
1332 Or return -1, if an error will be occured. */
1335 re_dfa_add_node (dfa
, token
, mode
)
1340 if (BE (dfa
->nodes_len
>= dfa
->nodes_alloc
, 0))
1342 int new_nodes_alloc
= dfa
->nodes_alloc
* 2;
1343 re_token_t
*new_array
= re_realloc (dfa
->nodes
, re_token_t
,
1345 if (BE (new_array
== NULL
, 0))
1347 dfa
->nodes
= new_array
;
1350 int *new_nexts
, *new_indices
;
1351 re_node_set
*new_edests
, *new_eclosures
, *new_inveclosures
;
1353 new_nexts
= re_realloc (dfa
->nexts
, int, new_nodes_alloc
);
1354 new_indices
= re_realloc (dfa
->org_indices
, int, new_nodes_alloc
);
1355 new_edests
= re_realloc (dfa
->edests
, re_node_set
, new_nodes_alloc
);
1356 new_eclosures
= re_realloc (dfa
->eclosures
, re_node_set
,
1358 new_inveclosures
= re_realloc (dfa
->inveclosures
, re_node_set
,
1360 if (BE (new_nexts
== NULL
|| new_indices
== NULL
1361 || new_edests
== NULL
|| new_eclosures
== NULL
1362 || new_inveclosures
== NULL
, 0))
1364 dfa
->nexts
= new_nexts
;
1365 dfa
->org_indices
= new_indices
;
1366 dfa
->edests
= new_edests
;
1367 dfa
->eclosures
= new_eclosures
;
1368 dfa
->inveclosures
= new_inveclosures
;
1370 dfa
->nodes_alloc
= new_nodes_alloc
;
1372 dfa
->nodes
[dfa
->nodes_len
] = token
;
1373 dfa
->nodes
[dfa
->nodes_len
].opt_subexp
= 0;
1374 dfa
->nodes
[dfa
->nodes_len
].duplicated
= 0;
1375 dfa
->nodes
[dfa
->nodes_len
].constraint
= 0;
1376 return dfa
->nodes_len
++;
1379 static unsigned int inline
1380 calc_state_hash (nodes
, context
)
1381 const re_node_set
*nodes
;
1382 unsigned int context
;
1384 unsigned int hash
= nodes
->nelem
+ context
;
1386 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1387 hash
+= nodes
->elems
[i
];
1391 /* Search for the state whose node_set is equivalent to NODES.
1392 Return the pointer to the state, if we found it in the DFA.
1393 Otherwise create the new one and return it. In case of an error
1394 return NULL and set the error code in ERR.
1395 Note: - We assume NULL as the invalid state, then it is possible that
1396 return value is NULL and ERR is REG_NOERROR.
1397 - We never return non-NULL value in case of any errors, it is for
1400 static re_dfastate_t
*
1401 re_acquire_state (err
, dfa
, nodes
)
1404 const re_node_set
*nodes
;
1407 re_dfastate_t
*new_state
;
1408 struct re_state_table_entry
*spot
;
1410 if (BE (nodes
->nelem
== 0, 0))
1415 hash
= calc_state_hash (nodes
, 0);
1416 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1418 for (i
= 0 ; i
< spot
->num
; i
++)
1420 re_dfastate_t
*state
= spot
->array
[i
];
1421 if (hash
!= state
->hash
)
1423 if (re_node_set_compare (&state
->nodes
, nodes
))
1427 /* There are no appropriate state in the dfa, create the new one. */
1428 new_state
= create_ci_newstate (dfa
, nodes
, hash
);
1429 if (BE (new_state
!= NULL
, 1))
1438 /* Search for the state whose node_set is equivalent to NODES and
1439 whose context is equivalent to CONTEXT.
1440 Return the pointer to the state, if we found it in the DFA.
1441 Otherwise create the new one and return it. In case of an error
1442 return NULL and set the error code in ERR.
1443 Note: - We assume NULL as the invalid state, then it is possible that
1444 return value is NULL and ERR is REG_NOERROR.
1445 - We never return non-NULL value in case of any errors, it is for
1448 static re_dfastate_t
*
1449 re_acquire_state_context (err
, dfa
, nodes
, context
)
1452 const re_node_set
*nodes
;
1453 unsigned int context
;
1456 re_dfastate_t
*new_state
;
1457 struct re_state_table_entry
*spot
;
1459 if (nodes
->nelem
== 0)
1464 hash
= calc_state_hash (nodes
, context
);
1465 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1467 for (i
= 0 ; i
< spot
->num
; i
++)
1469 re_dfastate_t
*state
= spot
->array
[i
];
1470 if (state
->hash
== hash
1471 && state
->context
== context
1472 && re_node_set_compare (state
->entrance_nodes
, nodes
))
1475 /* There are no appropriate state in `dfa', create the new one. */
1476 new_state
= create_cd_newstate (dfa
, nodes
, context
, hash
);
1477 if (BE (new_state
!= NULL
, 1))
1486 /* Finish initialization of the new state NEWSTATE, and using its hash value
1487 HASH put in the appropriate bucket of DFA's state table. Return value
1488 indicates the error code if failed. */
1490 static reg_errcode_t
1491 register_state (dfa
, newstate
, hash
)
1493 re_dfastate_t
*newstate
;
1496 struct re_state_table_entry
*spot
;
1500 newstate
->hash
= hash
;
1501 err
= re_node_set_alloc (&newstate
->non_eps_nodes
, newstate
->nodes
.nelem
);
1502 if (BE (err
!= REG_NOERROR
, 0))
1504 for (i
= 0; i
< newstate
->nodes
.nelem
; i
++)
1506 int elem
= newstate
->nodes
.elems
[i
];
1507 if (!IS_EPSILON_NODE (dfa
->nodes
[elem
].type
))
1508 re_node_set_insert_last (&newstate
->non_eps_nodes
, elem
);
1511 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1512 if (BE (spot
->alloc
<= spot
->num
, 0))
1514 int new_alloc
= 2 * spot
->num
+ 2;
1515 re_dfastate_t
**new_array
= re_realloc (spot
->array
, re_dfastate_t
*,
1517 if (BE (new_array
== NULL
, 0))
1519 spot
->array
= new_array
;
1520 spot
->alloc
= new_alloc
;
1522 spot
->array
[spot
->num
++] = newstate
;
1526 /* Create the new state which is independ of contexts.
1527 Return the new state if succeeded, otherwise return NULL. */
1529 static re_dfastate_t
*
1530 create_ci_newstate (dfa
, nodes
, hash
)
1532 const re_node_set
*nodes
;
1537 re_dfastate_t
*newstate
;
1539 newstate
= (re_dfastate_t
*) calloc (sizeof (re_dfastate_t
), 1);
1540 if (BE (newstate
== NULL
, 0))
1542 err
= re_node_set_init_copy (&newstate
->nodes
, nodes
);
1543 if (BE (err
!= REG_NOERROR
, 0))
1549 newstate
->entrance_nodes
= &newstate
->nodes
;
1550 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1552 re_token_t
*node
= dfa
->nodes
+ nodes
->elems
[i
];
1553 re_token_type_t type
= node
->type
;
1554 if (type
== CHARACTER
&& !node
->constraint
)
1557 /* If the state has the halt node, the state is a halt state. */
1558 else if (type
== END_OF_RE
)
1560 #ifdef RE_ENABLE_I18N
1561 else if (type
== COMPLEX_BRACKET
1562 || type
== OP_UTF8_PERIOD
1563 || (type
== OP_PERIOD
&& dfa
->mb_cur_max
> 1))
1564 newstate
->accept_mb
= 1;
1565 #endif /* RE_ENABLE_I18N */
1566 else if (type
== OP_BACK_REF
)
1567 newstate
->has_backref
= 1;
1568 else if (type
== ANCHOR
|| node
->constraint
)
1569 newstate
->has_constraint
= 1;
1571 err
= register_state (dfa
, newstate
, hash
);
1572 if (BE (err
!= REG_NOERROR
, 0))
1574 free_state (newstate
);
1580 /* Create the new state which is depend on the context CONTEXT.
1581 Return the new state if succeeded, otherwise return NULL. */
1583 static re_dfastate_t
*
1584 create_cd_newstate (dfa
, nodes
, context
, hash
)
1586 const re_node_set
*nodes
;
1587 unsigned int context
, hash
;
1589 int i
, nctx_nodes
= 0;
1591 re_dfastate_t
*newstate
;
1593 newstate
= (re_dfastate_t
*) calloc (sizeof (re_dfastate_t
), 1);
1594 if (BE (newstate
== NULL
, 0))
1596 err
= re_node_set_init_copy (&newstate
->nodes
, nodes
);
1597 if (BE (err
!= REG_NOERROR
, 0))
1603 newstate
->context
= context
;
1604 newstate
->entrance_nodes
= &newstate
->nodes
;
1606 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1608 unsigned int constraint
= 0;
1609 re_token_t
*node
= dfa
->nodes
+ nodes
->elems
[i
];
1610 re_token_type_t type
= node
->type
;
1611 if (node
->constraint
)
1612 constraint
= node
->constraint
;
1614 if (type
== CHARACTER
&& !constraint
)
1616 /* If the state has the halt node, the state is a halt state. */
1617 else if (type
== END_OF_RE
)
1619 #ifdef RE_ENABLE_I18N
1620 else if (type
== COMPLEX_BRACKET
1621 || type
== OP_UTF8_PERIOD
1622 || (type
== OP_PERIOD
&& dfa
->mb_cur_max
> 1))
1623 newstate
->accept_mb
= 1;
1624 #endif /* RE_ENABLE_I18N */
1625 else if (type
== OP_BACK_REF
)
1626 newstate
->has_backref
= 1;
1627 else if (type
== ANCHOR
)
1628 constraint
= node
->opr
.ctx_type
;
1632 if (newstate
->entrance_nodes
== &newstate
->nodes
)
1634 newstate
->entrance_nodes
= re_malloc (re_node_set
, 1);
1635 if (BE (newstate
->entrance_nodes
== NULL
, 0))
1637 free_state (newstate
);
1640 re_node_set_init_copy (newstate
->entrance_nodes
, nodes
);
1642 newstate
->has_constraint
= 1;
1645 if (NOT_SATISFY_PREV_CONSTRAINT (constraint
,context
))
1647 re_node_set_remove_at (&newstate
->nodes
, i
- nctx_nodes
);
1652 err
= register_state (dfa
, newstate
, hash
);
1653 if (BE (err
!= REG_NOERROR
, 0))
1655 free_state (newstate
);
1663 re_dfastate_t
*state
;
1665 re_node_set_free (&state
->non_eps_nodes
);
1666 re_node_set_free (&state
->inveclosure
);
1667 if (state
->entrance_nodes
!= &state
->nodes
)
1669 re_node_set_free (state
->entrance_nodes
);
1670 re_free (state
->entrance_nodes
);
1672 re_node_set_free (&state
->nodes
);
1673 re_free (state
->trtable
);