1 /**********************************************************************
2 regcomp.c - Oniguruma (regular expression library)
3 **********************************************************************/
5 * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 OnigCaseFoldType OnigDefaultCaseFoldFlag
= ONIGENC_CASE_FOLD_MIN
;
34 extern OnigCaseFoldType
35 onig_get_default_case_fold_flag(void)
37 return OnigDefaultCaseFoldFlag
;
41 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag
)
43 OnigDefaultCaseFoldFlag
= case_fold_flag
;
48 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
49 static unsigned char PadBuf
[WORD_ALIGNMENT_SIZE
];
53 str_dup(UChar
* s
, UChar
* end
)
58 UChar
* r
= (UChar
* )xmalloc(len
+ 1);
68 swap_node(Node
* a
, Node
* b
)
71 c
= *a
; *a
= *b
; *b
= c
;
73 if (NTYPE(a
) == NT_STR
) {
74 StrNode
* sn
= NSTR(a
);
76 int len
= sn
->end
- sn
->s
;
78 sn
->end
= sn
->s
+ len
;
82 if (NTYPE(b
) == NT_STR
) {
83 StrNode
* sn
= NSTR(b
);
85 int len
= sn
->end
- sn
->s
;
87 sn
->end
= sn
->s
+ len
;
93 distance_add(OnigDistance d1
, OnigDistance d2
)
95 if (d1
== ONIG_INFINITE_DISTANCE
|| d2
== ONIG_INFINITE_DISTANCE
)
96 return ONIG_INFINITE_DISTANCE
;
98 if (d1
<= ONIG_INFINITE_DISTANCE
- d2
) return d1
+ d2
;
99 else return ONIG_INFINITE_DISTANCE
;
104 distance_multiply(OnigDistance d
, int m
)
106 if (m
== 0) return 0;
108 if (d
< ONIG_INFINITE_DISTANCE
/ m
)
111 return ONIG_INFINITE_DISTANCE
;
115 bitset_is_empty(BitSetRef bs
)
118 for (i
= 0; i
< (int )BITSET_SIZE
; i
++) {
119 if (bs
[i
] != 0) return 0;
126 bitset_on_num(BitSetRef bs
)
131 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
132 if (BITSET_AT(bs
, i
)) n
++;
139 onig_bbuf_init(BBuf
* buf
, int size
)
146 buf
->p
= (UChar
* )xmalloc(size
);
147 if (IS_NULL(buf
->p
)) return(ONIGERR_MEMORY
);
156 #ifdef USE_SUBEXP_CALL
159 unset_addr_list_init(UnsetAddrList
* uslist
, int size
)
163 p
= (UnsetAddr
* )xmalloc(sizeof(UnsetAddr
)* size
);
164 CHECK_NULL_RETURN_MEMERR(p
);
166 uslist
->alloc
= size
;
172 unset_addr_list_end(UnsetAddrList
* uslist
)
174 if (IS_NOT_NULL(uslist
->us
))
179 unset_addr_list_add(UnsetAddrList
* uslist
, int offset
, struct _Node
* node
)
184 if (uslist
->num
>= uslist
->alloc
) {
185 size
= uslist
->alloc
* 2;
186 p
= (UnsetAddr
* )xrealloc(uslist
->us
, sizeof(UnsetAddr
) * size
);
187 CHECK_NULL_RETURN_MEMERR(p
);
188 uslist
->alloc
= size
;
192 uslist
->us
[uslist
->num
].offset
= offset
;
193 uslist
->us
[uslist
->num
].target
= node
;
197 #endif /* USE_SUBEXP_CALL */
201 add_opcode(regex_t
* reg
, int opcode
)
203 BBUF_ADD1(reg
, opcode
);
207 #ifdef USE_COMBINATION_EXPLOSION_CHECK
209 add_state_check_num(regex_t
* reg
, int num
)
211 StateCheckNumType n
= (StateCheckNumType
)num
;
213 BBUF_ADD(reg
, &n
, SIZE_STATE_CHECK_NUM
);
219 add_rel_addr(regex_t
* reg
, int addr
)
221 RelAddrType ra
= (RelAddrType
)addr
;
223 BBUF_ADD(reg
, &ra
, SIZE_RELADDR
);
228 add_abs_addr(regex_t
* reg
, int addr
)
230 AbsAddrType ra
= (AbsAddrType
)addr
;
232 BBUF_ADD(reg
, &ra
, SIZE_ABSADDR
);
237 add_length(regex_t
* reg
, int len
)
239 LengthType l
= (LengthType
)len
;
241 BBUF_ADD(reg
, &l
, SIZE_LENGTH
);
246 add_mem_num(regex_t
* reg
, int num
)
248 MemNumType n
= (MemNumType
)num
;
250 BBUF_ADD(reg
, &n
, SIZE_MEMNUM
);
255 add_pointer(regex_t
* reg
, void* addr
)
257 PointerType ptr
= (PointerType
)addr
;
259 BBUF_ADD(reg
, &ptr
, SIZE_POINTER
);
264 add_option(regex_t
* reg
, OnigOptionType option
)
266 BBUF_ADD(reg
, &option
, SIZE_OPTION
);
271 add_opcode_rel_addr(regex_t
* reg
, int opcode
, int addr
)
275 r
= add_opcode(reg
, opcode
);
277 r
= add_rel_addr(reg
, addr
);
282 add_bytes(regex_t
* reg
, UChar
* bytes
, int len
)
284 BBUF_ADD(reg
, bytes
, len
);
289 add_bitset(regex_t
* reg
, BitSetRef bs
)
291 BBUF_ADD(reg
, bs
, SIZE_BITSET
);
296 add_opcode_option(regex_t
* reg
, int opcode
, OnigOptionType option
)
300 r
= add_opcode(reg
, opcode
);
302 r
= add_option(reg
, option
);
306 static int compile_length_tree(Node
* node
, regex_t
* reg
);
307 static int compile_tree(Node
* node
, regex_t
* reg
);
310 #define IS_NEED_STR_LEN_OP_EXACT(op) \
311 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
312 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
315 select_str_opcode(int mb_len
, int str_len
, int ignore_case
)
321 case 1: op
= OP_EXACT1_IC
; break;
322 default: op
= OP_EXACTN_IC
; break;
329 case 1: op
= OP_EXACT1
; break;
330 case 2: op
= OP_EXACT2
; break;
331 case 3: op
= OP_EXACT3
; break;
332 case 4: op
= OP_EXACT4
; break;
333 case 5: op
= OP_EXACT5
; break;
334 default: op
= OP_EXACTN
; break;
340 case 1: op
= OP_EXACTMB2N1
; break;
341 case 2: op
= OP_EXACTMB2N2
; break;
342 case 3: op
= OP_EXACTMB2N3
; break;
343 default: op
= OP_EXACTMB2N
; break;
360 compile_tree_empty_check(Node
* node
, regex_t
* reg
, int empty_info
)
363 int saved_num_null_check
= reg
->num_null_check
;
365 if (empty_info
!= 0) {
366 r
= add_opcode(reg
, OP_NULL_CHECK_START
);
368 r
= add_mem_num(reg
, reg
->num_null_check
); /* NULL CHECK ID */
370 reg
->num_null_check
++;
373 r
= compile_tree(node
, reg
);
376 if (empty_info
!= 0) {
377 if (empty_info
== NQ_TARGET_IS_EMPTY
)
378 r
= add_opcode(reg
, OP_NULL_CHECK_END
);
379 else if (empty_info
== NQ_TARGET_IS_EMPTY_MEM
)
380 r
= add_opcode(reg
, OP_NULL_CHECK_END_MEMST
);
381 else if (empty_info
== NQ_TARGET_IS_EMPTY_REC
)
382 r
= add_opcode(reg
, OP_NULL_CHECK_END_MEMST_PUSH
);
385 r
= add_mem_num(reg
, saved_num_null_check
); /* NULL CHECK ID */
390 #ifdef USE_SUBEXP_CALL
392 compile_call(CallNode
* node
, regex_t
* reg
)
396 r
= add_opcode(reg
, OP_CALL
);
398 r
= unset_addr_list_add(node
->unset_addr_list
, BBUF_GET_OFFSET_POS(reg
),
401 r
= add_abs_addr(reg
, 0 /*dummy addr.*/);
407 compile_tree_n_times(Node
* node
, int n
, regex_t
* reg
)
411 for (i
= 0; i
< n
; i
++) {
412 r
= compile_tree(node
, reg
);
419 add_compile_string_length(UChar
* s ARG_UNUSED
, int mb_len
, int str_len
,
420 regex_t
* reg ARG_UNUSED
, int ignore_case
)
423 int op
= select_str_opcode(mb_len
, str_len
, ignore_case
);
427 if (op
== OP_EXACTMBN
) len
+= SIZE_LENGTH
;
428 if (IS_NEED_STR_LEN_OP_EXACT(op
))
431 len
+= mb_len
* str_len
;
436 add_compile_string(UChar
* s
, int mb_len
, int str_len
,
437 regex_t
* reg
, int ignore_case
)
439 int op
= select_str_opcode(mb_len
, str_len
, ignore_case
);
442 if (op
== OP_EXACTMBN
)
443 add_length(reg
, mb_len
);
445 if (IS_NEED_STR_LEN_OP_EXACT(op
)) {
446 if (op
== OP_EXACTN_IC
)
447 add_length(reg
, mb_len
* str_len
);
449 add_length(reg
, str_len
);
452 add_bytes(reg
, s
, mb_len
* str_len
);
458 compile_length_string_node(Node
* node
, regex_t
* reg
)
460 int rlen
, r
, len
, prev_len
, slen
, ambig
;
461 OnigEncoding enc
= reg
->enc
;
466 if (sn
->end
<= sn
->s
)
469 ambig
= NSTRING_IS_AMBIG(node
);
472 prev_len
= enclen(enc
, p
, sn
->end
);
477 for (; p
< sn
->end
; ) {
478 len
= enclen(enc
, p
, sn
->end
);
479 if (len
== prev_len
) {
483 r
= add_compile_string_length(prev
, prev_len
, slen
, reg
, ambig
);
491 r
= add_compile_string_length(prev
, prev_len
, slen
, reg
, ambig
);
497 compile_length_string_raw_node(StrNode
* sn
, regex_t
* reg
)
499 if (sn
->end
<= sn
->s
)
502 return add_compile_string_length(sn
->s
, 1 /* sb */, sn
->end
- sn
->s
, reg
, 0);
506 compile_string_node(Node
* node
, regex_t
* reg
)
508 int r
, len
, prev_len
, slen
, ambig
;
509 OnigEncoding enc
= reg
->enc
;
510 UChar
*p
, *prev
, *end
;
514 if (sn
->end
<= sn
->s
)
518 ambig
= NSTRING_IS_AMBIG(node
);
521 prev_len
= enclen(enc
, p
, end
);
526 len
= enclen(enc
, p
, end
);
527 if (len
== prev_len
) {
531 r
= add_compile_string(prev
, prev_len
, slen
, reg
, ambig
);
541 return add_compile_string(prev
, prev_len
, slen
, reg
, ambig
);
545 compile_string_raw_node(StrNode
* sn
, regex_t
* reg
)
547 if (sn
->end
<= sn
->s
)
550 return add_compile_string(sn
->s
, 1 /* sb */, sn
->end
- sn
->s
, reg
, 0);
554 add_multi_byte_cclass(BBuf
* mbuf
, regex_t
* reg
)
556 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
557 add_length(reg
, mbuf
->used
);
558 return add_bytes(reg
, mbuf
->p
, mbuf
->used
);
561 UChar
* p
= BBUF_GET_ADD_ADDRESS(reg
) + SIZE_LENGTH
;
563 GET_ALIGNMENT_PAD_SIZE(p
, pad_size
);
564 add_length(reg
, mbuf
->used
+ (WORD_ALIGNMENT_SIZE
- 1));
565 if (pad_size
!= 0) add_bytes(reg
, PadBuf
, pad_size
);
567 r
= add_bytes(reg
, mbuf
->p
, mbuf
->used
);
569 /* padding for return value from compile_length_cclass_node() to be fix. */
570 pad_size
= (WORD_ALIGNMENT_SIZE
- 1) - pad_size
;
571 if (pad_size
!= 0) add_bytes(reg
, PadBuf
, pad_size
);
577 compile_length_cclass_node(CClassNode
* cc
, regex_t
* reg
)
581 if (IS_NCCLASS_SHARE(cc
)) {
582 len
= SIZE_OPCODE
+ SIZE_POINTER
;
586 if (IS_NULL(cc
->mbuf
)) {
587 len
= SIZE_OPCODE
+ SIZE_BITSET
;
590 if (ONIGENC_MBC_MINLEN(reg
->enc
) > 1 || bitset_is_empty(cc
->bs
)) {
594 len
= SIZE_OPCODE
+ SIZE_BITSET
;
596 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
597 len
+= SIZE_LENGTH
+ cc
->mbuf
->used
;
599 len
+= SIZE_LENGTH
+ cc
->mbuf
->used
+ (WORD_ALIGNMENT_SIZE
- 1);
607 compile_cclass_node(CClassNode
* cc
, regex_t
* reg
)
611 if (IS_NCCLASS_SHARE(cc
)) {
612 add_opcode(reg
, OP_CCLASS_NODE
);
613 r
= add_pointer(reg
, cc
);
617 if (IS_NULL(cc
->mbuf
)) {
618 if (IS_NCCLASS_NOT(cc
))
619 add_opcode(reg
, OP_CCLASS_NOT
);
621 add_opcode(reg
, OP_CCLASS
);
623 r
= add_bitset(reg
, cc
->bs
);
626 if (ONIGENC_MBC_MINLEN(reg
->enc
) > 1 || bitset_is_empty(cc
->bs
)) {
627 if (IS_NCCLASS_NOT(cc
))
628 add_opcode(reg
, OP_CCLASS_MB_NOT
);
630 add_opcode(reg
, OP_CCLASS_MB
);
632 r
= add_multi_byte_cclass(cc
->mbuf
, reg
);
635 if (IS_NCCLASS_NOT(cc
))
636 add_opcode(reg
, OP_CCLASS_MIX_NOT
);
638 add_opcode(reg
, OP_CCLASS_MIX
);
640 r
= add_bitset(reg
, cc
->bs
);
642 r
= add_multi_byte_cclass(cc
->mbuf
, reg
);
650 entry_repeat_range(regex_t
* reg
, int id
, int lower
, int upper
)
652 #define REPEAT_RANGE_ALLOC 4
656 if (reg
->repeat_range_alloc
== 0) {
657 p
= (OnigRepeatRange
* )xmalloc(sizeof(OnigRepeatRange
) * REPEAT_RANGE_ALLOC
);
658 CHECK_NULL_RETURN_MEMERR(p
);
659 reg
->repeat_range
= p
;
660 reg
->repeat_range_alloc
= REPEAT_RANGE_ALLOC
;
662 else if (reg
->repeat_range_alloc
<= id
) {
664 n
= reg
->repeat_range_alloc
+ REPEAT_RANGE_ALLOC
;
665 p
= (OnigRepeatRange
* )xrealloc(reg
->repeat_range
,
666 sizeof(OnigRepeatRange
) * n
);
667 CHECK_NULL_RETURN_MEMERR(p
);
668 reg
->repeat_range
= p
;
669 reg
->repeat_range_alloc
= n
;
672 p
= reg
->repeat_range
;
676 p
[id
].upper
= (IS_REPEAT_INFINITE(upper
) ? 0x7fffffff : upper
);
681 compile_range_repeat_node(QtfrNode
* qn
, int target_len
, int empty_info
,
685 int num_repeat
= reg
->num_repeat
;
687 r
= add_opcode(reg
, qn
->greedy
? OP_REPEAT
: OP_REPEAT_NG
);
689 r
= add_mem_num(reg
, num_repeat
); /* OP_REPEAT ID */
692 r
= add_rel_addr(reg
, target_len
+ SIZE_OP_REPEAT_INC
);
695 r
= entry_repeat_range(reg
, num_repeat
, qn
->lower
, qn
->upper
);
698 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
702 #ifdef USE_SUBEXP_CALL
705 IS_QUANTIFIER_IN_REPEAT(qn
)) {
706 r
= add_opcode(reg
, qn
->greedy
? OP_REPEAT_INC_SG
: OP_REPEAT_INC_NG_SG
);
709 r
= add_opcode(reg
, qn
->greedy
? OP_REPEAT_INC
: OP_REPEAT_INC_NG
);
712 r
= add_mem_num(reg
, num_repeat
); /* OP_REPEAT ID */
717 is_anychar_star_quantifier(QtfrNode
* qn
)
719 if (qn
->greedy
&& IS_REPEAT_INFINITE(qn
->upper
) &&
720 NTYPE(qn
->target
) == NT_CANY
)
726 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
727 #define CKN_ON (ckn > 0)
729 #ifdef USE_COMBINATION_EXPLOSION_CHECK
732 compile_length_quantifier_node(QtfrNode
* qn
, regex_t
* reg
)
734 int len
, mod_tlen
, cklen
;
736 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
737 int empty_info
= qn
->target_empty_info
;
738 int tlen
= compile_length_tree(qn
->target
, reg
);
740 if (tlen
< 0) return tlen
;
742 ckn
= ((reg
->num_comb_exp_check
> 0) ? qn
->comb_exp_check_num
: 0);
744 cklen
= (CKN_ON
? SIZE_STATE_CHECK_NUM
: 0);
747 if (NTYPE(qn
->target
) == NT_CANY
) {
748 if (qn
->greedy
&& infinite
) {
749 if (IS_NOT_NULL(qn
->next_head_exact
) && !CKN_ON
)
750 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT
+ tlen
* qn
->lower
+ cklen
;
752 return SIZE_OP_ANYCHAR_STAR
+ tlen
* qn
->lower
+ cklen
;
757 mod_tlen
= tlen
+ (SIZE_OP_NULL_CHECK_START
+ SIZE_OP_NULL_CHECK_END
);
761 if (infinite
&& qn
->lower
<= 1) {
768 len
+= SIZE_OP_PUSH
+ cklen
+ mod_tlen
+ SIZE_OP_JUMP
;
776 len
+= mod_tlen
+ SIZE_OP_PUSH
+ cklen
;
779 else if (qn
->upper
== 0) {
780 if (qn
->is_refered
!= 0) /* /(?<n>..){0}/ */
781 len
= SIZE_OP_JUMP
+ tlen
;
785 else if (qn
->upper
== 1 && qn
->greedy
) {
786 if (qn
->lower
== 0) {
788 len
= SIZE_OP_STATE_CHECK_PUSH
+ tlen
;
791 len
= SIZE_OP_PUSH
+ tlen
;
798 else if (!qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
799 len
= SIZE_OP_PUSH
+ cklen
+ SIZE_OP_JUMP
+ tlen
;
802 len
= SIZE_OP_REPEAT_INC
803 + mod_tlen
+ SIZE_OPCODE
+ SIZE_RELADDR
+ SIZE_MEMNUM
;
805 len
+= SIZE_OP_STATE_CHECK
;
812 compile_quantifier_node(QtfrNode
* qn
, regex_t
* reg
)
816 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
817 int empty_info
= qn
->target_empty_info
;
818 int tlen
= compile_length_tree(qn
->target
, reg
);
820 if (tlen
< 0) return tlen
;
822 ckn
= ((reg
->num_comb_exp_check
> 0) ? qn
->comb_exp_check_num
: 0);
824 if (is_anychar_star_quantifier(qn
)) {
825 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
827 if (IS_NOT_NULL(qn
->next_head_exact
) && !CKN_ON
) {
828 if (IS_MULTILINE(reg
->options
))
829 r
= add_opcode(reg
, OP_ANYCHAR_ML_STAR_PEEK_NEXT
);
831 r
= add_opcode(reg
, OP_ANYCHAR_STAR_PEEK_NEXT
);
834 r
= add_state_check_num(reg
, ckn
);
838 return add_bytes(reg
, NSTR(qn
->next_head_exact
)->s
, 1);
841 if (IS_MULTILINE(reg
->options
)) {
842 r
= add_opcode(reg
, (CKN_ON
?
843 OP_STATE_CHECK_ANYCHAR_ML_STAR
844 : OP_ANYCHAR_ML_STAR
));
847 r
= add_opcode(reg
, (CKN_ON
?
848 OP_STATE_CHECK_ANYCHAR_STAR
853 r
= add_state_check_num(reg
, ckn
);
860 mod_tlen
= tlen
+ (SIZE_OP_NULL_CHECK_START
+ SIZE_OP_NULL_CHECK_END
);
864 if (infinite
&& qn
->lower
<= 1) {
866 if (qn
->lower
== 1) {
867 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
868 (CKN_ON
? SIZE_OP_STATE_CHECK_PUSH
: SIZE_OP_PUSH
));
873 r
= add_opcode(reg
, OP_STATE_CHECK_PUSH
);
875 r
= add_state_check_num(reg
, ckn
);
877 r
= add_rel_addr(reg
, mod_tlen
+ SIZE_OP_JUMP
);
880 r
= add_opcode_rel_addr(reg
, OP_PUSH
, mod_tlen
+ SIZE_OP_JUMP
);
883 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
885 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
886 -(mod_tlen
+ (int )SIZE_OP_JUMP
887 + (int )(CKN_ON
? SIZE_OP_STATE_CHECK_PUSH
: SIZE_OP_PUSH
)));
890 if (qn
->lower
== 0) {
891 r
= add_opcode_rel_addr(reg
, OP_JUMP
, mod_tlen
);
894 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
897 r
= add_opcode(reg
, OP_STATE_CHECK_PUSH_OR_JUMP
);
899 r
= add_state_check_num(reg
, ckn
);
901 r
= add_rel_addr(reg
,
902 -(mod_tlen
+ (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP
));
905 r
= add_opcode_rel_addr(reg
, OP_PUSH
, -(mod_tlen
+ (int )SIZE_OP_PUSH
));
908 else if (qn
->upper
== 0) {
909 if (qn
->is_refered
!= 0) { /* /(?<n>..){0}/ */
910 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
912 r
= compile_tree(qn
->target
, reg
);
917 else if (qn
->upper
== 1 && qn
->greedy
) {
918 if (qn
->lower
== 0) {
920 r
= add_opcode(reg
, OP_STATE_CHECK_PUSH
);
922 r
= add_state_check_num(reg
, ckn
);
924 r
= add_rel_addr(reg
, tlen
);
927 r
= add_opcode_rel_addr(reg
, OP_PUSH
, tlen
);
932 r
= compile_tree(qn
->target
, reg
);
934 else if (!qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
936 r
= add_opcode(reg
, OP_STATE_CHECK_PUSH
);
938 r
= add_state_check_num(reg
, ckn
);
940 r
= add_rel_addr(reg
, SIZE_OP_JUMP
);
943 r
= add_opcode_rel_addr(reg
, OP_PUSH
, SIZE_OP_JUMP
);
947 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
949 r
= compile_tree(qn
->target
, reg
);
952 r
= compile_range_repeat_node(qn
, mod_tlen
, empty_info
, reg
);
955 r
= add_opcode(reg
, OP_STATE_CHECK
);
957 r
= add_state_check_num(reg
, ckn
);
963 #else /* USE_COMBINATION_EXPLOSION_CHECK */
966 compile_length_quantifier_node(QtfrNode
* qn
, regex_t
* reg
)
969 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
970 int empty_info
= qn
->target_empty_info
;
971 int tlen
= compile_length_tree(qn
->target
, reg
);
973 if (tlen
< 0) return tlen
;
976 if (NTYPE(qn
->target
) == NT_CANY
) {
977 if (qn
->greedy
&& infinite
) {
978 if (IS_NOT_NULL(qn
->next_head_exact
))
979 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT
+ tlen
* qn
->lower
;
981 return SIZE_OP_ANYCHAR_STAR
+ tlen
* qn
->lower
;
986 mod_tlen
= tlen
+ (SIZE_OP_NULL_CHECK_START
+ SIZE_OP_NULL_CHECK_END
);
991 (qn
->lower
<= 1 || tlen
* qn
->lower
<= QUANTIFIER_EXPAND_LIMIT_SIZE
)) {
992 if (qn
->lower
== 1 && tlen
> QUANTIFIER_EXPAND_LIMIT_SIZE
) {
996 len
= tlen
* qn
->lower
;
1000 if (IS_NOT_NULL(qn
->head_exact
))
1001 len
+= SIZE_OP_PUSH_OR_JUMP_EXACT1
+ mod_tlen
+ SIZE_OP_JUMP
;
1002 else if (IS_NOT_NULL(qn
->next_head_exact
))
1003 len
+= SIZE_OP_PUSH_IF_PEEK_NEXT
+ mod_tlen
+ SIZE_OP_JUMP
;
1005 len
+= SIZE_OP_PUSH
+ mod_tlen
+ SIZE_OP_JUMP
;
1008 len
+= SIZE_OP_JUMP
+ mod_tlen
+ SIZE_OP_PUSH
;
1010 else if (qn
->upper
== 0 && qn
->is_refered
!= 0) { /* /(?<n>..){0}/ */
1011 len
= SIZE_OP_JUMP
+ tlen
;
1013 else if (!infinite
&& qn
->greedy
&&
1014 (qn
->upper
== 1 || (tlen
+ SIZE_OP_PUSH
) * qn
->upper
1015 <= QUANTIFIER_EXPAND_LIMIT_SIZE
)) {
1016 len
= tlen
* qn
->lower
;
1017 len
+= (SIZE_OP_PUSH
+ tlen
) * (qn
->upper
- qn
->lower
);
1019 else if (!qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
1020 len
= SIZE_OP_PUSH
+ SIZE_OP_JUMP
+ tlen
;
1023 len
= SIZE_OP_REPEAT_INC
1024 + mod_tlen
+ SIZE_OPCODE
+ SIZE_RELADDR
+ SIZE_MEMNUM
;
1031 compile_quantifier_node(QtfrNode
* qn
, regex_t
* reg
)
1034 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
1035 int empty_info
= qn
->target_empty_info
;
1036 int tlen
= compile_length_tree(qn
->target
, reg
);
1038 if (tlen
< 0) return tlen
;
1040 if (is_anychar_star_quantifier(qn
)) {
1041 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
1043 if (IS_NOT_NULL(qn
->next_head_exact
)) {
1044 if (IS_MULTILINE(reg
->options
))
1045 r
= add_opcode(reg
, OP_ANYCHAR_ML_STAR_PEEK_NEXT
);
1047 r
= add_opcode(reg
, OP_ANYCHAR_STAR_PEEK_NEXT
);
1049 return add_bytes(reg
, NSTR(qn
->next_head_exact
)->s
, 1);
1052 if (IS_MULTILINE(reg
->options
))
1053 return add_opcode(reg
, OP_ANYCHAR_ML_STAR
);
1055 return add_opcode(reg
, OP_ANYCHAR_STAR
);
1059 if (empty_info
!= 0)
1060 mod_tlen
= tlen
+ (SIZE_OP_NULL_CHECK_START
+ SIZE_OP_NULL_CHECK_END
);
1065 (qn
->lower
<= 1 || tlen
* qn
->lower
<= QUANTIFIER_EXPAND_LIMIT_SIZE
)) {
1066 if (qn
->lower
== 1 && tlen
> QUANTIFIER_EXPAND_LIMIT_SIZE
) {
1068 if (IS_NOT_NULL(qn
->head_exact
))
1069 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_PUSH_OR_JUMP_EXACT1
);
1070 else if (IS_NOT_NULL(qn
->next_head_exact
))
1071 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_PUSH_IF_PEEK_NEXT
);
1073 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_PUSH
);
1076 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_JUMP
);
1081 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
1086 if (IS_NOT_NULL(qn
->head_exact
)) {
1087 r
= add_opcode_rel_addr(reg
, OP_PUSH_OR_JUMP_EXACT1
,
1088 mod_tlen
+ SIZE_OP_JUMP
);
1090 add_bytes(reg
, NSTR(qn
->head_exact
)->s
, 1);
1091 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
1093 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
1094 -(mod_tlen
+ (int )SIZE_OP_JUMP
+ (int )SIZE_OP_PUSH_OR_JUMP_EXACT1
));
1096 else if (IS_NOT_NULL(qn
->next_head_exact
)) {
1097 r
= add_opcode_rel_addr(reg
, OP_PUSH_IF_PEEK_NEXT
,
1098 mod_tlen
+ SIZE_OP_JUMP
);
1100 add_bytes(reg
, NSTR(qn
->next_head_exact
)->s
, 1);
1101 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
1103 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
1104 -(mod_tlen
+ (int )SIZE_OP_JUMP
+ (int )SIZE_OP_PUSH_IF_PEEK_NEXT
));
1107 r
= add_opcode_rel_addr(reg
, OP_PUSH
, mod_tlen
+ SIZE_OP_JUMP
);
1109 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
1111 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
1112 -(mod_tlen
+ (int )SIZE_OP_JUMP
+ (int )SIZE_OP_PUSH
));
1116 r
= add_opcode_rel_addr(reg
, OP_JUMP
, mod_tlen
);
1118 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
1120 r
= add_opcode_rel_addr(reg
, OP_PUSH
, -(mod_tlen
+ (int )SIZE_OP_PUSH
));
1123 else if (qn
->upper
== 0 && qn
->is_refered
!= 0) { /* /(?<n>..){0}/ */
1124 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
1126 r
= compile_tree(qn
->target
, reg
);
1128 else if (!infinite
&& qn
->greedy
&&
1129 (qn
->upper
== 1 || (tlen
+ SIZE_OP_PUSH
) * qn
->upper
1130 <= QUANTIFIER_EXPAND_LIMIT_SIZE
)) {
1131 int n
= qn
->upper
- qn
->lower
;
1133 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
1136 for (i
= 0; i
< n
; i
++) {
1137 r
= add_opcode_rel_addr(reg
, OP_PUSH
,
1138 (n
- i
) * tlen
+ (n
- i
- 1) * SIZE_OP_PUSH
);
1140 r
= compile_tree(qn
->target
, reg
);
1144 else if (!qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
1145 r
= add_opcode_rel_addr(reg
, OP_PUSH
, SIZE_OP_JUMP
);
1147 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
1149 r
= compile_tree(qn
->target
, reg
);
1152 r
= compile_range_repeat_node(qn
, mod_tlen
, empty_info
, reg
);
1156 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1159 compile_length_option_node(EncloseNode
* node
, regex_t
* reg
)
1162 OnigOptionType prev
= reg
->options
;
1164 reg
->options
= node
->option
;
1165 tlen
= compile_length_tree(node
->target
, reg
);
1166 reg
->options
= prev
;
1168 if (tlen
< 0) return tlen
;
1170 if (IS_DYNAMIC_OPTION(prev
^ node
->option
)) {
1171 return SIZE_OP_SET_OPTION_PUSH
+ SIZE_OP_SET_OPTION
+ SIZE_OP_FAIL
1172 + tlen
+ SIZE_OP_SET_OPTION
;
1179 compile_option_node(EncloseNode
* node
, regex_t
* reg
)
1182 OnigOptionType prev
= reg
->options
;
1184 if (IS_DYNAMIC_OPTION(prev
^ node
->option
)) {
1185 r
= add_opcode_option(reg
, OP_SET_OPTION_PUSH
, node
->option
);
1187 r
= add_opcode_option(reg
, OP_SET_OPTION
, prev
);
1189 r
= add_opcode(reg
, OP_FAIL
);
1193 reg
->options
= node
->option
;
1194 r
= compile_tree(node
->target
, reg
);
1195 reg
->options
= prev
;
1197 if (IS_DYNAMIC_OPTION(prev
^ node
->option
)) {
1199 r
= add_opcode_option(reg
, OP_SET_OPTION
, prev
);
1205 compile_length_enclose_node(EncloseNode
* node
, regex_t
* reg
)
1210 if (node
->type
== ENCLOSE_OPTION
)
1211 return compile_length_option_node(node
, reg
);
1214 tlen
= compile_length_tree(node
->target
, reg
);
1215 if (tlen
< 0) return tlen
;
1220 switch (node
->type
) {
1221 case ENCLOSE_MEMORY
:
1222 #ifdef USE_SUBEXP_CALL
1223 if (IS_ENCLOSE_CALLED(node
)) {
1224 len
= SIZE_OP_MEMORY_START_PUSH
+ tlen
1225 + SIZE_OP_CALL
+ SIZE_OP_JUMP
+ SIZE_OP_RETURN
;
1226 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1227 len
+= (IS_ENCLOSE_RECURSION(node
)
1228 ? SIZE_OP_MEMORY_END_PUSH_REC
: SIZE_OP_MEMORY_END_PUSH
);
1230 len
+= (IS_ENCLOSE_RECURSION(node
)
1231 ? SIZE_OP_MEMORY_END_REC
: SIZE_OP_MEMORY_END
);
1236 if (BIT_STATUS_AT(reg
->bt_mem_start
, node
->regnum
))
1237 len
= SIZE_OP_MEMORY_START_PUSH
;
1239 len
= SIZE_OP_MEMORY_START
;
1241 len
+= tlen
+ (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
)
1242 ? SIZE_OP_MEMORY_END_PUSH
: SIZE_OP_MEMORY_END
);
1246 case ENCLOSE_STOP_BACKTRACK
:
1247 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node
)) {
1248 QtfrNode
* qn
= NQTFR(node
->target
);
1249 tlen
= compile_length_tree(qn
->target
, reg
);
1250 if (tlen
< 0) return tlen
;
1252 len
= tlen
* qn
->lower
1253 + SIZE_OP_PUSH
+ tlen
+ SIZE_OP_POP
+ SIZE_OP_JUMP
;
1256 len
= SIZE_OP_PUSH_STOP_BT
+ tlen
+ SIZE_OP_POP_STOP_BT
;
1261 return ONIGERR_TYPE_BUG
;
1268 static int get_char_length_tree(Node
* node
, regex_t
* reg
, int* len
);
1271 compile_enclose_node(EncloseNode
* node
, regex_t
* reg
)
1275 if (node
->type
== ENCLOSE_OPTION
)
1276 return compile_option_node(node
, reg
);
1278 switch (node
->type
) {
1279 case ENCLOSE_MEMORY
:
1280 #ifdef USE_SUBEXP_CALL
1281 if (IS_ENCLOSE_CALLED(node
)) {
1282 r
= add_opcode(reg
, OP_CALL
);
1284 node
->call_addr
= BBUF_GET_OFFSET_POS(reg
) + SIZE_ABSADDR
+ SIZE_OP_JUMP
;
1285 node
->state
|= NST_ADDR_FIXED
;
1286 r
= add_abs_addr(reg
, (int )node
->call_addr
);
1288 len
= compile_length_tree(node
->target
, reg
);
1289 len
+= (SIZE_OP_MEMORY_START_PUSH
+ SIZE_OP_RETURN
);
1290 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1291 len
+= (IS_ENCLOSE_RECURSION(node
)
1292 ? SIZE_OP_MEMORY_END_PUSH_REC
: SIZE_OP_MEMORY_END_PUSH
);
1294 len
+= (IS_ENCLOSE_RECURSION(node
)
1295 ? SIZE_OP_MEMORY_END_REC
: SIZE_OP_MEMORY_END
);
1297 r
= add_opcode_rel_addr(reg
, OP_JUMP
, len
);
1301 if (BIT_STATUS_AT(reg
->bt_mem_start
, node
->regnum
))
1302 r
= add_opcode(reg
, OP_MEMORY_START_PUSH
);
1304 r
= add_opcode(reg
, OP_MEMORY_START
);
1306 r
= add_mem_num(reg
, node
->regnum
);
1308 r
= compile_tree(node
->target
, reg
);
1310 #ifdef USE_SUBEXP_CALL
1311 if (IS_ENCLOSE_CALLED(node
)) {
1312 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1313 r
= add_opcode(reg
, (IS_ENCLOSE_RECURSION(node
)
1314 ? OP_MEMORY_END_PUSH_REC
: OP_MEMORY_END_PUSH
));
1316 r
= add_opcode(reg
, (IS_ENCLOSE_RECURSION(node
)
1317 ? OP_MEMORY_END_REC
: OP_MEMORY_END
));
1320 r
= add_mem_num(reg
, node
->regnum
);
1322 r
= add_opcode(reg
, OP_RETURN
);
1327 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1328 r
= add_opcode(reg
, OP_MEMORY_END_PUSH
);
1330 r
= add_opcode(reg
, OP_MEMORY_END
);
1332 r
= add_mem_num(reg
, node
->regnum
);
1336 case ENCLOSE_STOP_BACKTRACK
:
1337 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node
)) {
1338 QtfrNode
* qn
= NQTFR(node
->target
);
1339 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
1342 len
= compile_length_tree(qn
->target
, reg
);
1343 if (len
< 0) return len
;
1345 r
= add_opcode_rel_addr(reg
, OP_PUSH
, len
+ SIZE_OP_POP
+ SIZE_OP_JUMP
);
1347 r
= compile_tree(qn
->target
, reg
);
1349 r
= add_opcode(reg
, OP_POP
);
1351 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
1352 -((int )SIZE_OP_PUSH
+ len
+ (int )SIZE_OP_POP
+ (int )SIZE_OP_JUMP
));
1355 r
= add_opcode(reg
, OP_PUSH_STOP_BT
);
1357 r
= compile_tree(node
->target
, reg
);
1359 r
= add_opcode(reg
, OP_POP_STOP_BT
);
1364 return ONIGERR_TYPE_BUG
;
1372 compile_length_anchor_node(AnchorNode
* node
, regex_t
* reg
)
1378 tlen
= compile_length_tree(node
->target
, reg
);
1379 if (tlen
< 0) return tlen
;
1382 switch (node
->type
) {
1383 case ANCHOR_PREC_READ
:
1384 len
= SIZE_OP_PUSH_POS
+ tlen
+ SIZE_OP_POP_POS
;
1386 case ANCHOR_PREC_READ_NOT
:
1387 len
= SIZE_OP_PUSH_POS_NOT
+ tlen
+ SIZE_OP_FAIL_POS
;
1389 case ANCHOR_LOOK_BEHIND
:
1390 len
= SIZE_OP_LOOK_BEHIND
+ tlen
;
1392 case ANCHOR_LOOK_BEHIND_NOT
:
1393 len
= SIZE_OP_PUSH_LOOK_BEHIND_NOT
+ tlen
+ SIZE_OP_FAIL_LOOK_BEHIND_NOT
;
1405 compile_anchor_node(AnchorNode
* node
, regex_t
* reg
)
1409 switch (node
->type
) {
1410 case ANCHOR_BEGIN_BUF
: r
= add_opcode(reg
, OP_BEGIN_BUF
); break;
1411 case ANCHOR_END_BUF
: r
= add_opcode(reg
, OP_END_BUF
); break;
1412 case ANCHOR_BEGIN_LINE
: r
= add_opcode(reg
, OP_BEGIN_LINE
); break;
1413 case ANCHOR_END_LINE
: r
= add_opcode(reg
, OP_END_LINE
); break;
1414 case ANCHOR_SEMI_END_BUF
: r
= add_opcode(reg
, OP_SEMI_END_BUF
); break;
1415 case ANCHOR_BEGIN_POSITION
: r
= add_opcode(reg
, OP_BEGIN_POSITION
); break;
1417 case ANCHOR_WORD_BOUND
: r
= add_opcode(reg
, OP_WORD_BOUND
); break;
1418 case ANCHOR_NOT_WORD_BOUND
: r
= add_opcode(reg
, OP_NOT_WORD_BOUND
); break;
1419 #ifdef USE_WORD_BEGIN_END
1420 case ANCHOR_WORD_BEGIN
: r
= add_opcode(reg
, OP_WORD_BEGIN
); break;
1421 case ANCHOR_WORD_END
: r
= add_opcode(reg
, OP_WORD_END
); break;
1424 case ANCHOR_PREC_READ
:
1425 r
= add_opcode(reg
, OP_PUSH_POS
);
1427 r
= compile_tree(node
->target
, reg
);
1429 r
= add_opcode(reg
, OP_POP_POS
);
1432 case ANCHOR_PREC_READ_NOT
:
1433 len
= compile_length_tree(node
->target
, reg
);
1434 if (len
< 0) return len
;
1435 r
= add_opcode_rel_addr(reg
, OP_PUSH_POS_NOT
, len
+ SIZE_OP_FAIL_POS
);
1437 r
= compile_tree(node
->target
, reg
);
1439 r
= add_opcode(reg
, OP_FAIL_POS
);
1442 case ANCHOR_LOOK_BEHIND
:
1445 r
= add_opcode(reg
, OP_LOOK_BEHIND
);
1447 if (node
->char_len
< 0) {
1448 r
= get_char_length_tree(node
->target
, reg
, &n
);
1449 if (r
) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
1453 r
= add_length(reg
, n
);
1455 r
= compile_tree(node
->target
, reg
);
1459 case ANCHOR_LOOK_BEHIND_NOT
:
1462 len
= compile_length_tree(node
->target
, reg
);
1463 r
= add_opcode_rel_addr(reg
, OP_PUSH_LOOK_BEHIND_NOT
,
1464 len
+ SIZE_OP_FAIL_LOOK_BEHIND_NOT
);
1466 if (node
->char_len
< 0) {
1467 r
= get_char_length_tree(node
->target
, reg
, &n
);
1468 if (r
) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
1472 r
= add_length(reg
, n
);
1474 r
= compile_tree(node
->target
, reg
);
1476 r
= add_opcode(reg
, OP_FAIL_LOOK_BEHIND_NOT
);
1481 return ONIGERR_TYPE_BUG
;
1489 compile_length_tree(Node
* node
, regex_t
* reg
)
1498 r
= compile_length_tree(NCAR(node
), reg
);
1499 if (r
< 0) return r
;
1501 } while (IS_NOT_NULL(node
= NCDR(node
)));
1511 r
+= compile_length_tree(NCAR(node
), reg
);
1513 } while (IS_NOT_NULL(node
= NCDR(node
)));
1514 r
+= (SIZE_OP_PUSH
+ SIZE_OP_JUMP
) * (n
- 1);
1519 if (NSTRING_IS_RAW(node
))
1520 r
= compile_length_string_raw_node(NSTR(node
), reg
);
1522 r
= compile_length_string_node(node
, reg
);
1526 r
= compile_length_cclass_node(NCCLASS(node
), reg
);
1536 BRefNode
* br
= NBREF(node
);
1538 #ifdef USE_BACKREF_WITH_LEVEL
1539 if (IS_BACKREF_NEST_LEVEL(br
)) {
1540 r
= SIZE_OPCODE
+ SIZE_OPTION
+ SIZE_LENGTH
+
1541 SIZE_LENGTH
+ (SIZE_MEMNUM
* br
->back_num
);
1545 if (br
->back_num
== 1) {
1546 r
= ((!IS_IGNORECASE(reg
->options
) && br
->back_static
[0] <= 2)
1547 ? SIZE_OPCODE
: (SIZE_OPCODE
+ SIZE_MEMNUM
));
1550 r
= SIZE_OPCODE
+ SIZE_LENGTH
+ (SIZE_MEMNUM
* br
->back_num
);
1555 #ifdef USE_SUBEXP_CALL
1562 r
= compile_length_quantifier_node(NQTFR(node
), reg
);
1566 r
= compile_length_enclose_node(NENCLOSE(node
), reg
);
1570 r
= compile_length_anchor_node(NANCHOR(node
), reg
);
1574 return ONIGERR_TYPE_BUG
;
1582 compile_tree(Node
* node
, regex_t
* reg
)
1584 int n
, type
, len
, pos
, r
= 0;
1590 r
= compile_tree(NCAR(node
), reg
);
1591 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
1599 len
+= compile_length_tree(NCAR(x
), reg
);
1600 if (NCDR(x
) != NULL
) {
1601 len
+= SIZE_OP_PUSH
+ SIZE_OP_JUMP
;
1603 } while (IS_NOT_NULL(x
= NCDR(x
)));
1604 pos
= reg
->used
+ len
; /* goal position */
1607 len
= compile_length_tree(NCAR(node
), reg
);
1608 if (IS_NOT_NULL(NCDR(node
))) {
1609 r
= add_opcode_rel_addr(reg
, OP_PUSH
, len
+ SIZE_OP_JUMP
);
1612 r
= compile_tree(NCAR(node
), reg
);
1614 if (IS_NOT_NULL(NCDR(node
))) {
1615 len
= pos
- (reg
->used
+ SIZE_OP_JUMP
);
1616 r
= add_opcode_rel_addr(reg
, OP_JUMP
, len
);
1619 } while (IS_NOT_NULL(node
= NCDR(node
)));
1624 if (NSTRING_IS_RAW(node
))
1625 r
= compile_string_raw_node(NSTR(node
), reg
);
1627 r
= compile_string_node(node
, reg
);
1631 r
= compile_cclass_node(NCCLASS(node
), reg
);
1638 switch (NCTYPE(node
)->ctype
) {
1639 case ONIGENC_CTYPE_WORD
:
1640 if (NCTYPE(node
)->not != 0) op
= OP_NOT_WORD
;
1644 return ONIGERR_TYPE_BUG
;
1647 r
= add_opcode(reg
, op
);
1652 if (IS_MULTILINE(reg
->options
))
1653 r
= add_opcode(reg
, OP_ANYCHAR_ML
);
1655 r
= add_opcode(reg
, OP_ANYCHAR
);
1660 BRefNode
* br
= NBREF(node
);
1662 #ifdef USE_BACKREF_WITH_LEVEL
1663 if (IS_BACKREF_NEST_LEVEL(br
)) {
1664 r
= add_opcode(reg
, OP_BACKREF_WITH_LEVEL
);
1666 r
= add_option(reg
, (reg
->options
& ONIG_OPTION_IGNORECASE
));
1668 r
= add_length(reg
, br
->nest_level
);
1671 goto add_bacref_mems
;
1675 if (br
->back_num
== 1) {
1676 n
= br
->back_static
[0];
1677 if (IS_IGNORECASE(reg
->options
)) {
1678 r
= add_opcode(reg
, OP_BACKREFN_IC
);
1680 r
= add_mem_num(reg
, n
);
1684 case 1: r
= add_opcode(reg
, OP_BACKREF1
); break;
1685 case 2: r
= add_opcode(reg
, OP_BACKREF2
); break;
1687 r
= add_opcode(reg
, OP_BACKREFN
);
1689 r
= add_mem_num(reg
, n
);
1698 if (IS_IGNORECASE(reg
->options
)) {
1699 r
= add_opcode(reg
, OP_BACKREF_MULTI_IC
);
1702 r
= add_opcode(reg
, OP_BACKREF_MULTI
);
1706 #ifdef USE_BACKREF_WITH_LEVEL
1709 r
= add_length(reg
, br
->back_num
);
1712 for (i
= br
->back_num
- 1; i
>= 0; i
--) {
1713 r
= add_mem_num(reg
, p
[i
]);
1720 #ifdef USE_SUBEXP_CALL
1722 r
= compile_call(NCALL(node
), reg
);
1727 r
= compile_quantifier_node(NQTFR(node
), reg
);
1731 r
= compile_enclose_node(NENCLOSE(node
), reg
);
1735 r
= compile_anchor_node(NANCHOR(node
), reg
);
1740 fprintf(stderr
, "compile_tree: undefined node type %d\n", NTYPE(node
));
1748 #ifdef USE_NAMED_GROUP
1751 noname_disable_map(Node
** plink
, GroupNumRemap
* map
, int* counter
)
1754 Node
* node
= *plink
;
1756 switch (NTYPE(node
)) {
1760 r
= noname_disable_map(&(NCAR(node
)), map
, counter
);
1761 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
1766 Node
** ptarget
= &(NQTFR(node
)->target
);
1767 Node
* old
= *ptarget
;
1768 r
= noname_disable_map(ptarget
, map
, counter
);
1769 if (*ptarget
!= old
&& NTYPE(*ptarget
) == NT_QTFR
) {
1770 onig_reduce_nested_quantifier(node
, *ptarget
);
1777 EncloseNode
* en
= NENCLOSE(node
);
1778 if (en
->type
== ENCLOSE_MEMORY
) {
1779 if (IS_ENCLOSE_NAMED_GROUP(en
)) {
1781 map
[en
->regnum
].new_val
= *counter
;
1782 en
->regnum
= *counter
;
1783 r
= noname_disable_map(&(en
->target
), map
, counter
);
1786 *plink
= en
->target
;
1787 en
->target
= NULL_NODE
;
1788 onig_node_free(node
);
1789 r
= noname_disable_map(plink
, map
, counter
);
1793 r
= noname_disable_map(&(en
->target
), map
, counter
);
1805 renumber_node_backref(Node
* node
, GroupNumRemap
* map
)
1807 int i
, pos
, n
, old_num
;
1809 BRefNode
* bn
= NBREF(node
);
1811 if (! IS_BACKREF_NAME_REF(bn
))
1812 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
;
1814 old_num
= bn
->back_num
;
1815 if (IS_NULL(bn
->back_dynamic
))
1816 backs
= bn
->back_static
;
1818 backs
= bn
->back_dynamic
;
1820 for (i
= 0, pos
= 0; i
< old_num
; i
++) {
1821 n
= map
[backs
[i
]].new_val
;
1833 renumber_by_map(Node
* node
, GroupNumRemap
* map
)
1837 switch (NTYPE(node
)) {
1841 r
= renumber_by_map(NCAR(node
), map
);
1842 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
1845 r
= renumber_by_map(NQTFR(node
)->target
, map
);
1848 r
= renumber_by_map(NENCLOSE(node
)->target
, map
);
1852 r
= renumber_node_backref(node
, map
);
1863 numbered_ref_check(Node
* node
)
1867 switch (NTYPE(node
)) {
1871 r
= numbered_ref_check(NCAR(node
));
1872 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
1875 r
= numbered_ref_check(NQTFR(node
)->target
);
1878 r
= numbered_ref_check(NENCLOSE(node
)->target
);
1882 if (! IS_BACKREF_NAME_REF(NBREF(node
)))
1883 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
;
1894 disable_noname_group_capture(Node
** root
, regex_t
* reg
, ScanEnv
* env
)
1896 int r
, i
, pos
, counter
;
1900 map
= (GroupNumRemap
* )xalloca(sizeof(GroupNumRemap
) * (env
->num_mem
+ 1));
1901 CHECK_NULL_RETURN_MEMERR(map
);
1902 for (i
= 1; i
<= env
->num_mem
; i
++) {
1906 r
= noname_disable_map(root
, map
, &counter
);
1907 if (r
!= 0) return r
;
1909 r
= renumber_by_map(*root
, map
);
1910 if (r
!= 0) return r
;
1912 for (i
= 1, pos
= 1; i
<= env
->num_mem
; i
++) {
1913 if (map
[i
].new_val
> 0) {
1914 SCANENV_MEM_NODES(env
)[pos
] = SCANENV_MEM_NODES(env
)[i
];
1919 loc
= env
->capture_history
;
1920 BIT_STATUS_CLEAR(env
->capture_history
);
1921 for (i
= 1; i
<= ONIG_MAX_CAPTURE_HISTORY_GROUP
; i
++) {
1922 if (BIT_STATUS_AT(loc
, i
)) {
1923 BIT_STATUS_ON_AT_SIMPLE(env
->capture_history
, map
[i
].new_val
);
1927 env
->num_mem
= env
->num_named
;
1928 reg
->num_mem
= env
->num_named
;
1930 return onig_renumber_name_table(reg
, map
);
1932 #endif /* USE_NAMED_GROUP */
1934 #ifdef USE_SUBEXP_CALL
1936 unset_addr_list_fix(UnsetAddrList
* uslist
, regex_t
* reg
)
1942 for (i
= 0; i
< uslist
->num
; i
++) {
1943 en
= NENCLOSE(uslist
->us
[i
].target
);
1944 if (! IS_ENCLOSE_ADDR_FIXED(en
)) return ONIGERR_PARSER_BUG
;
1945 addr
= en
->call_addr
;
1946 offset
= uslist
->us
[i
].offset
;
1948 BBUF_WRITE(reg
, offset
, &addr
, SIZE_ABSADDR
);
1954 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
1956 quantifiers_memory_node_info(Node
* node
)
1960 switch (NTYPE(node
)) {
1966 v
= quantifiers_memory_node_info(NCAR(node
));
1968 } while (v
>= 0 && IS_NOT_NULL(node
= NCDR(node
)));
1972 #ifdef USE_SUBEXP_CALL
1974 if (IS_CALL_RECURSION(NCALL(node
))) {
1975 return NQ_TARGET_IS_EMPTY_REC
; /* tiny version */
1978 r
= quantifiers_memory_node_info(NCALL(node
)->target
);
1984 QtfrNode
* qn
= NQTFR(node
);
1985 if (qn
->upper
!= 0) {
1986 r
= quantifiers_memory_node_info(qn
->target
);
1993 EncloseNode
* en
= NENCLOSE(node
);
1995 case ENCLOSE_MEMORY
:
1996 return NQ_TARGET_IS_EMPTY_MEM
;
1999 case ENCLOSE_OPTION
:
2000 case ENCLOSE_STOP_BACKTRACK
:
2001 r
= quantifiers_memory_node_info(en
->target
);
2021 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2024 get_min_match_length(Node
* node
, OnigDistance
*min
, ScanEnv
* env
)
2030 switch (NTYPE(node
)) {
2035 Node
** nodes
= SCANENV_MEM_NODES(env
);
2036 BRefNode
* br
= NBREF(node
);
2037 if (br
->state
& NST_RECURSION
) break;
2039 backs
= BACKREFS_P(br
);
2040 if (backs
[0] > env
->num_mem
) return ONIGERR_INVALID_BACKREF
;
2041 r
= get_min_match_length(nodes
[backs
[0]], min
, env
);
2043 for (i
= 1; i
< br
->back_num
; i
++) {
2044 if (backs
[i
] > env
->num_mem
) return ONIGERR_INVALID_BACKREF
;
2045 r
= get_min_match_length(nodes
[backs
[i
]], &tmin
, env
);
2047 if (*min
> tmin
) *min
= tmin
;
2052 #ifdef USE_SUBEXP_CALL
2054 if (IS_CALL_RECURSION(NCALL(node
))) {
2055 EncloseNode
* en
= NENCLOSE(NCALL(node
)->target
);
2056 if (IS_ENCLOSE_MIN_FIXED(en
))
2060 r
= get_min_match_length(NCALL(node
)->target
, min
, env
);
2066 r
= get_min_match_length(NCAR(node
), &tmin
, env
);
2067 if (r
== 0) *min
+= tmin
;
2068 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2077 r
= get_min_match_length(x
, &tmin
, env
);
2079 if (y
== node
) *min
= tmin
;
2080 else if (*min
> tmin
) *min
= tmin
;
2081 } while (r
== 0 && IS_NOT_NULL(y
= NCDR(y
)));
2087 StrNode
* sn
= NSTR(node
);
2088 *min
= sn
->end
- sn
->s
;
2103 QtfrNode
* qn
= NQTFR(node
);
2105 if (qn
->lower
> 0) {
2106 r
= get_min_match_length(qn
->target
, min
, env
);
2108 *min
= distance_multiply(*min
, qn
->lower
);
2115 EncloseNode
* en
= NENCLOSE(node
);
2117 case ENCLOSE_MEMORY
:
2118 #ifdef USE_SUBEXP_CALL
2119 if (IS_ENCLOSE_MIN_FIXED(en
))
2122 r
= get_min_match_length(en
->target
, min
, env
);
2125 SET_ENCLOSE_STATUS(node
, NST_MIN_FIXED
);
2130 case ENCLOSE_OPTION
:
2131 case ENCLOSE_STOP_BACKTRACK
:
2132 r
= get_min_match_length(en
->target
, min
, env
);
2147 get_max_match_length(Node
* node
, OnigDistance
*max
, ScanEnv
* env
)
2153 switch (NTYPE(node
)) {
2156 r
= get_max_match_length(NCAR(node
), &tmax
, env
);
2158 *max
= distance_add(*max
, tmax
);
2159 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2164 r
= get_max_match_length(NCAR(node
), &tmax
, env
);
2165 if (r
== 0 && *max
< tmax
) *max
= tmax
;
2166 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2171 StrNode
* sn
= NSTR(node
);
2172 *max
= sn
->end
- sn
->s
;
2177 *max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
2182 *max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
2189 Node
** nodes
= SCANENV_MEM_NODES(env
);
2190 BRefNode
* br
= NBREF(node
);
2191 if (br
->state
& NST_RECURSION
) {
2192 *max
= ONIG_INFINITE_DISTANCE
;
2195 backs
= BACKREFS_P(br
);
2196 for (i
= 0; i
< br
->back_num
; i
++) {
2197 if (backs
[i
] > env
->num_mem
) return ONIGERR_INVALID_BACKREF
;
2198 r
= get_max_match_length(nodes
[backs
[i
]], &tmax
, env
);
2200 if (*max
< tmax
) *max
= tmax
;
2205 #ifdef USE_SUBEXP_CALL
2207 if (! IS_CALL_RECURSION(NCALL(node
)))
2208 r
= get_max_match_length(NCALL(node
)->target
, max
, env
);
2210 *max
= ONIG_INFINITE_DISTANCE
;
2216 QtfrNode
* qn
= NQTFR(node
);
2218 if (qn
->upper
!= 0) {
2219 r
= get_max_match_length(qn
->target
, max
, env
);
2220 if (r
== 0 && *max
!= 0) {
2221 if (! IS_REPEAT_INFINITE(qn
->upper
))
2222 *max
= distance_multiply(*max
, qn
->upper
);
2224 *max
= ONIG_INFINITE_DISTANCE
;
2232 EncloseNode
* en
= NENCLOSE(node
);
2234 case ENCLOSE_MEMORY
:
2235 #ifdef USE_SUBEXP_CALL
2236 if (IS_ENCLOSE_MAX_FIXED(en
))
2239 r
= get_max_match_length(en
->target
, max
, env
);
2242 SET_ENCLOSE_STATUS(node
, NST_MAX_FIXED
);
2247 case ENCLOSE_OPTION
:
2248 case ENCLOSE_STOP_BACKTRACK
:
2249 r
= get_max_match_length(en
->target
, max
, env
);
2263 #define GET_CHAR_LEN_VARLEN -1
2264 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2266 /* fixed size pattern node only */
2268 get_char_length_tree1(Node
* node
, regex_t
* reg
, int* len
, int level
)
2275 switch (NTYPE(node
)) {
2278 r
= get_char_length_tree1(NCAR(node
), reg
, &tlen
, level
);
2280 *len
= distance_add(*len
, tlen
);
2281 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2289 r
= get_char_length_tree1(NCAR(node
), reg
, &tlen
, level
);
2290 while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
))) {
2291 r
= get_char_length_tree1(NCAR(node
), reg
, &tlen2
, level
);
2300 r
= GET_CHAR_LEN_TOP_ALT_VARLEN
;
2302 r
= GET_CHAR_LEN_VARLEN
;
2312 StrNode
* sn
= NSTR(node
);
2314 while (s
< sn
->end
) {
2315 s
+= enclen(reg
->enc
, s
, sn
->end
);
2323 QtfrNode
* qn
= NQTFR(node
);
2324 if (qn
->lower
== qn
->upper
) {
2325 r
= get_char_length_tree1(qn
->target
, reg
, &tlen
, level
);
2327 *len
= distance_multiply(tlen
, qn
->lower
);
2330 r
= GET_CHAR_LEN_VARLEN
;
2334 #ifdef USE_SUBEXP_CALL
2336 if (! IS_CALL_RECURSION(NCALL(node
)))
2337 r
= get_char_length_tree1(NCALL(node
)->target
, reg
, len
, level
);
2339 r
= GET_CHAR_LEN_VARLEN
;
2354 EncloseNode
* en
= NENCLOSE(node
);
2356 case ENCLOSE_MEMORY
:
2357 #ifdef USE_SUBEXP_CALL
2358 if (IS_ENCLOSE_CLEN_FIXED(en
))
2359 *len
= en
->char_len
;
2361 r
= get_char_length_tree1(en
->target
, reg
, len
, level
);
2363 en
->char_len
= *len
;
2364 SET_ENCLOSE_STATUS(node
, NST_CLEN_FIXED
);
2369 case ENCLOSE_OPTION
:
2370 case ENCLOSE_STOP_BACKTRACK
:
2371 r
= get_char_length_tree1(en
->target
, reg
, len
, level
);
2383 r
= GET_CHAR_LEN_VARLEN
;
2391 get_char_length_tree(Node
* node
, regex_t
* reg
, int* len
)
2393 return get_char_length_tree1(node
, reg
, len
, 0);
2396 /* x is not included y ==> 1 : 0 */
2398 is_not_included(Node
* x
, Node
* y
, regex_t
* reg
)
2412 if (NCTYPE(y
)->ctype
== NCTYPE(x
)->ctype
&&
2413 NCTYPE(y
)->not != NCTYPE(x
)->not)
2423 tmp
= x
; x
= y
; y
= tmp
;
2440 CClassNode
* xc
= NCCLASS(x
);
2443 switch (NCTYPE(y
)->ctype
) {
2444 case ONIGENC_CTYPE_WORD
:
2445 if (NCTYPE(y
)->not == 0) {
2446 if (IS_NULL(xc
->mbuf
) && !IS_NCCLASS_NOT(xc
)) {
2447 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
2448 if (BITSET_AT(xc
->bs
, i
)) {
2449 if (IS_CODE_SB_WORD(reg
->enc
, i
)) return 0;
2457 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
2458 if (! IS_CODE_SB_WORD(reg
->enc
, i
)) {
2459 if (!IS_NCCLASS_NOT(xc
)) {
2460 if (BITSET_AT(xc
->bs
, i
))
2464 if (! BITSET_AT(xc
->bs
, i
))
2481 CClassNode
* yc
= NCCLASS(y
);
2483 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
2484 v
= BITSET_AT(xc
->bs
, i
);
2485 if ((v
!= 0 && !IS_NCCLASS_NOT(xc
)) ||
2486 (v
== 0 && IS_NCCLASS_NOT(xc
))) {
2487 v
= BITSET_AT(yc
->bs
, i
);
2488 if ((v
!= 0 && !IS_NCCLASS_NOT(yc
)) ||
2489 (v
== 0 && IS_NCCLASS_NOT(yc
)))
2493 if ((IS_NULL(xc
->mbuf
) && !IS_NCCLASS_NOT(xc
)) ||
2494 (IS_NULL(yc
->mbuf
) && !IS_NCCLASS_NOT(yc
)))
2512 StrNode
* xs
= NSTR(x
);
2513 if (NSTRING_LEN(x
) == 0)
2519 switch (NCTYPE(y
)->ctype
) {
2520 case ONIGENC_CTYPE_WORD
:
2521 if (ONIGENC_IS_MBC_WORD(reg
->enc
, xs
->s
, xs
->end
))
2522 return NCTYPE(y
)->not;
2524 return !(NCTYPE(y
)->not);
2533 CClassNode
* cc
= NCCLASS(y
);
2535 code
= ONIGENC_MBC_TO_CODE(reg
->enc
, xs
->s
,
2536 xs
->s
+ ONIGENC_MBC_MAXLEN(reg
->enc
));
2537 return (onig_is_code_in_cc(reg
->enc
, code
, cc
) != 0 ? 0 : 1);
2544 StrNode
* ys
= NSTR(y
);
2545 len
= NSTRING_LEN(x
);
2546 if (len
> NSTRING_LEN(y
)) len
= NSTRING_LEN(y
);
2547 if (NSTRING_IS_AMBIG(x
) || NSTRING_IS_AMBIG(y
)) {
2552 for (i
= 0, p
= ys
->s
, q
= xs
->s
; i
< len
; i
++, p
++, q
++) {
2553 if (*p
!= *q
) return 1;
2573 get_head_value_node(Node
* node
, int exact
, regex_t
* reg
)
2575 Node
* n
= NULL_NODE
;
2577 switch (NTYPE(node
)) {
2581 #ifdef USE_SUBEXP_CALL
2594 n
= get_head_value_node(NCAR(node
), exact
, reg
);
2599 StrNode
* sn
= NSTR(node
);
2601 if (sn
->end
<= sn
->s
)
2605 !NSTRING_IS_RAW(node
) && IS_IGNORECASE(reg
->options
)) {
2615 QtfrNode
* qn
= NQTFR(node
);
2616 if (qn
->lower
> 0) {
2617 if (IS_NOT_NULL(qn
->head_exact
))
2620 n
= get_head_value_node(qn
->target
, exact
, reg
);
2627 EncloseNode
* en
= NENCLOSE(node
);
2629 case ENCLOSE_OPTION
:
2631 OnigOptionType options
= reg
->options
;
2633 reg
->options
= NENCLOSE(node
)->option
;
2634 n
= get_head_value_node(NENCLOSE(node
)->target
, exact
, reg
);
2635 reg
->options
= options
;
2639 case ENCLOSE_MEMORY
:
2640 case ENCLOSE_STOP_BACKTRACK
:
2641 n
= get_head_value_node(en
->target
, exact
, reg
);
2648 if (NANCHOR(node
)->type
== ANCHOR_PREC_READ
)
2649 n
= get_head_value_node(NANCHOR(node
)->target
, exact
, reg
);
2660 check_type_tree(Node
* node
, int type_mask
, int enclose_mask
, int anchor_mask
)
2665 if ((NTYPE2BIT(type
) & type_mask
) == 0)
2672 r
= check_type_tree(NCAR(node
), type_mask
, enclose_mask
,
2674 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2678 r
= check_type_tree(NQTFR(node
)->target
, type_mask
, enclose_mask
,
2684 EncloseNode
* en
= NENCLOSE(node
);
2685 if ((en
->type
& enclose_mask
) == 0)
2688 r
= check_type_tree(en
->target
, type_mask
, enclose_mask
, anchor_mask
);
2693 type
= NANCHOR(node
)->type
;
2694 if ((type
& anchor_mask
) == 0)
2697 if (NANCHOR(node
)->target
)
2698 r
= check_type_tree(NANCHOR(node
)->target
,
2699 type_mask
, enclose_mask
, anchor_mask
);
2708 #ifdef USE_SUBEXP_CALL
2710 #define RECURSION_EXIST 1
2711 #define RECURSION_INFINITE 2
2714 subexp_inf_recursive_check(Node
* node
, ScanEnv
* env
, int head
)
2729 ret
= subexp_inf_recursive_check(NCAR(x
), env
, head
);
2730 if (ret
< 0 || ret
== RECURSION_INFINITE
) return ret
;
2733 ret
= get_min_match_length(NCAR(x
), &min
, env
);
2734 if (ret
!= 0) return ret
;
2735 if (min
!= 0) head
= 0;
2737 } while (IS_NOT_NULL(x
= NCDR(x
)));
2744 r
= RECURSION_EXIST
;
2746 ret
= subexp_inf_recursive_check(NCAR(node
), env
, head
);
2747 if (ret
< 0 || ret
== RECURSION_INFINITE
) return ret
;
2749 } while (IS_NOT_NULL(node
= NCDR(node
)));
2754 r
= subexp_inf_recursive_check(NQTFR(node
)->target
, env
, head
);
2755 if (r
== RECURSION_EXIST
) {
2756 if (NQTFR(node
)->lower
== 0) r
= 0;
2762 AnchorNode
* an
= NANCHOR(node
);
2764 case ANCHOR_PREC_READ
:
2765 case ANCHOR_PREC_READ_NOT
:
2766 case ANCHOR_LOOK_BEHIND
:
2767 case ANCHOR_LOOK_BEHIND_NOT
:
2768 r
= subexp_inf_recursive_check(an
->target
, env
, head
);
2775 r
= subexp_inf_recursive_check(NCALL(node
)->target
, env
, head
);
2779 if (IS_ENCLOSE_MARK2(NENCLOSE(node
)))
2781 else if (IS_ENCLOSE_MARK1(NENCLOSE(node
)))
2782 return (head
== 0 ? RECURSION_EXIST
: RECURSION_INFINITE
);
2784 SET_ENCLOSE_STATUS(node
, NST_MARK2
);
2785 r
= subexp_inf_recursive_check(NENCLOSE(node
)->target
, env
, head
);
2786 CLEAR_ENCLOSE_STATUS(node
, NST_MARK2
);
2798 subexp_inf_recursive_check_trav(Node
* node
, ScanEnv
* env
)
2808 r
= subexp_inf_recursive_check_trav(NCAR(node
), env
);
2809 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2813 r
= subexp_inf_recursive_check_trav(NQTFR(node
)->target
, env
);
2818 AnchorNode
* an
= NANCHOR(node
);
2820 case ANCHOR_PREC_READ
:
2821 case ANCHOR_PREC_READ_NOT
:
2822 case ANCHOR_LOOK_BEHIND
:
2823 case ANCHOR_LOOK_BEHIND_NOT
:
2824 r
= subexp_inf_recursive_check_trav(an
->target
, env
);
2832 EncloseNode
* en
= NENCLOSE(node
);
2834 if (IS_ENCLOSE_RECURSION(en
)) {
2835 SET_ENCLOSE_STATUS(node
, NST_MARK1
);
2836 r
= subexp_inf_recursive_check(en
->target
, env
, 1);
2837 if (r
> 0) return ONIGERR_NEVER_ENDING_RECURSION
;
2838 CLEAR_ENCLOSE_STATUS(node
, NST_MARK1
);
2840 r
= subexp_inf_recursive_check_trav(en
->target
, env
);
2853 subexp_recursive_check(Node
* node
)
2857 switch (NTYPE(node
)) {
2861 r
|= subexp_recursive_check(NCAR(node
));
2862 } while (IS_NOT_NULL(node
= NCDR(node
)));
2866 r
= subexp_recursive_check(NQTFR(node
)->target
);
2871 AnchorNode
* an
= NANCHOR(node
);
2873 case ANCHOR_PREC_READ
:
2874 case ANCHOR_PREC_READ_NOT
:
2875 case ANCHOR_LOOK_BEHIND
:
2876 case ANCHOR_LOOK_BEHIND_NOT
:
2877 r
= subexp_recursive_check(an
->target
);
2884 r
= subexp_recursive_check(NCALL(node
)->target
);
2885 if (r
!= 0) SET_CALL_RECURSION(node
);
2889 if (IS_ENCLOSE_MARK2(NENCLOSE(node
)))
2891 else if (IS_ENCLOSE_MARK1(NENCLOSE(node
)))
2892 return 1; /* recursion */
2894 SET_ENCLOSE_STATUS(node
, NST_MARK2
);
2895 r
= subexp_recursive_check(NENCLOSE(node
)->target
);
2896 CLEAR_ENCLOSE_STATUS(node
, NST_MARK2
);
2909 subexp_recursive_check_trav(Node
* node
, ScanEnv
* env
)
2911 #define FOUND_CALLED_NODE 1
2923 ret
= subexp_recursive_check_trav(NCAR(node
), env
);
2924 if (ret
== FOUND_CALLED_NODE
) r
= FOUND_CALLED_NODE
;
2925 else if (ret
< 0) return ret
;
2926 } while (IS_NOT_NULL(node
= NCDR(node
)));
2931 r
= subexp_recursive_check_trav(NQTFR(node
)->target
, env
);
2932 if (NQTFR(node
)->upper
== 0) {
2933 if (r
== FOUND_CALLED_NODE
)
2934 NQTFR(node
)->is_refered
= 1;
2940 AnchorNode
* an
= NANCHOR(node
);
2942 case ANCHOR_PREC_READ
:
2943 case ANCHOR_PREC_READ_NOT
:
2944 case ANCHOR_LOOK_BEHIND
:
2945 case ANCHOR_LOOK_BEHIND_NOT
:
2946 r
= subexp_recursive_check_trav(an
->target
, env
);
2954 EncloseNode
* en
= NENCLOSE(node
);
2956 if (! IS_ENCLOSE_RECURSION(en
)) {
2957 if (IS_ENCLOSE_CALLED(en
)) {
2958 SET_ENCLOSE_STATUS(node
, NST_MARK1
);
2959 r
= subexp_recursive_check(en
->target
);
2960 if (r
!= 0) SET_ENCLOSE_STATUS(node
, NST_RECURSION
);
2961 CLEAR_ENCLOSE_STATUS(node
, NST_MARK1
);
2964 r
= subexp_recursive_check_trav(en
->target
, env
);
2965 if (IS_ENCLOSE_CALLED(en
))
2966 r
|= FOUND_CALLED_NODE
;
2978 setup_subexp_call(Node
* node
, ScanEnv
* env
)
2987 r
= setup_subexp_call(NCAR(node
), env
);
2988 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2993 r
= setup_subexp_call(NCAR(node
), env
);
2994 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2998 r
= setup_subexp_call(NQTFR(node
)->target
, env
);
3001 r
= setup_subexp_call(NENCLOSE(node
)->target
, env
);
3006 CallNode
* cn
= NCALL(node
);
3007 Node
** nodes
= SCANENV_MEM_NODES(env
);
3009 if (cn
->group_num
!= 0) {
3010 int gnum
= cn
->group_num
;
3012 #ifdef USE_NAMED_GROUP
3013 if (env
->num_named
> 0 &&
3014 IS_SYNTAX_BV(env
->syntax
, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
) &&
3015 !ONIG_IS_OPTION_ON(env
->option
, ONIG_OPTION_CAPTURE_GROUP
)) {
3016 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
;
3019 if (gnum
> env
->num_mem
) {
3020 onig_scan_env_set_error_string(env
,
3021 ONIGERR_UNDEFINED_GROUP_REFERENCE
, cn
->name
, cn
->name_end
);
3022 return ONIGERR_UNDEFINED_GROUP_REFERENCE
;
3025 #ifdef USE_NAMED_GROUP
3028 cn
->target
= nodes
[cn
->group_num
];
3029 if (IS_NULL(cn
->target
)) {
3030 onig_scan_env_set_error_string(env
,
3031 ONIGERR_UNDEFINED_NAME_REFERENCE
, cn
->name
, cn
->name_end
);
3032 return ONIGERR_UNDEFINED_NAME_REFERENCE
;
3034 SET_ENCLOSE_STATUS(cn
->target
, NST_CALLED
);
3035 BIT_STATUS_ON_AT(env
->bt_mem_start
, cn
->group_num
);
3036 cn
->unset_addr_list
= env
->unset_addr_list
;
3038 #ifdef USE_NAMED_GROUP
3042 int n
= onig_name_to_group_numbers(env
->reg
, cn
->name
, cn
->name_end
,
3045 onig_scan_env_set_error_string(env
,
3046 ONIGERR_UNDEFINED_NAME_REFERENCE
, cn
->name
, cn
->name_end
);
3047 return ONIGERR_UNDEFINED_NAME_REFERENCE
;
3050 onig_scan_env_set_error_string(env
,
3051 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL
, cn
->name
, cn
->name_end
);
3052 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL
;
3055 cn
->group_num
= refs
[0];
3065 AnchorNode
* an
= NANCHOR(node
);
3068 case ANCHOR_PREC_READ
:
3069 case ANCHOR_PREC_READ_NOT
:
3070 case ANCHOR_LOOK_BEHIND
:
3071 case ANCHOR_LOOK_BEHIND_NOT
:
3072 r
= setup_subexp_call(an
->target
, env
);
3086 /* divide different length alternatives in look-behind.
3087 (?<=A|B) ==> (?<=A)|(?<=B)
3088 (?<!A|B) ==> (?<!A)(?<!B)
3091 divide_look_behind_alternatives(Node
* node
)
3093 Node
*head
, *np
, *insert_node
;
3094 AnchorNode
* an
= NANCHOR(node
);
3095 int anc_type
= an
->type
;
3099 swap_node(node
, head
);
3101 NANCHOR(head
)->target
= np
;
3104 while ((np
= NCDR(np
)) != NULL_NODE
) {
3105 insert_node
= onig_node_new_anchor(anc_type
);
3106 CHECK_NULL_RETURN_MEMERR(insert_node
);
3107 NANCHOR(insert_node
)->target
= NCAR(np
);
3108 NCAR(np
) = insert_node
;
3111 if (anc_type
== ANCHOR_LOOK_BEHIND_NOT
) {
3114 SET_NTYPE(np
, NT_LIST
); /* alt -> list */
3115 } while ((np
= NCDR(np
)) != NULL_NODE
);
3121 setup_look_behind(Node
* node
, regex_t
* reg
, ScanEnv
* env
)
3124 AnchorNode
* an
= NANCHOR(node
);
3126 r
= get_char_length_tree(an
->target
, reg
, &len
);
3129 else if (r
== GET_CHAR_LEN_VARLEN
)
3130 r
= ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
3131 else if (r
== GET_CHAR_LEN_TOP_ALT_VARLEN
) {
3132 if (IS_SYNTAX_BV(env
->syntax
, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
))
3133 r
= divide_look_behind_alternatives(node
);
3135 r
= ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
3142 next_setup(Node
* node
, Node
* next_node
, regex_t
* reg
)
3148 if (type
== NT_QTFR
) {
3149 QtfrNode
* qn
= NQTFR(node
);
3150 if (qn
->greedy
&& IS_REPEAT_INFINITE(qn
->upper
)) {
3151 #ifdef USE_QTFR_PEEK_NEXT
3152 Node
* n
= get_head_value_node(next_node
, 1, reg
);
3153 /* '\0': for UTF-16BE etc... */
3154 if (IS_NOT_NULL(n
) && NSTR(n
)->s
[0] != '\0') {
3155 qn
->next_head_exact
= n
;
3158 /* automatic posseivation a*b ==> (?>a*)b */
3159 if (qn
->lower
<= 1) {
3160 int ttype
= NTYPE(qn
->target
);
3161 if (IS_NODE_TYPE_SIMPLE(ttype
)) {
3163 x
= get_head_value_node(qn
->target
, 0, reg
);
3164 if (IS_NOT_NULL(x
)) {
3165 y
= get_head_value_node(next_node
, 0, reg
);
3166 if (IS_NOT_NULL(y
) && is_not_included(x
, y
, reg
)) {
3167 Node
* en
= onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK
);
3168 CHECK_NULL_RETURN_MEMERR(en
);
3169 SET_ENCLOSE_STATUS(en
, NST_STOP_BT_SIMPLE_REPEAT
);
3170 swap_node(node
, en
);
3171 NENCLOSE(node
)->target
= en
;
3178 else if (type
== NT_ENCLOSE
) {
3179 EncloseNode
* en
= NENCLOSE(node
);
3180 if (en
->type
== ENCLOSE_MEMORY
) {
3190 update_string_node_case_fold(regex_t
* reg
, Node
*node
)
3192 UChar
*p
, *q
, *end
, buf
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
3193 UChar
*sbuf
, *ebuf
, *sp
;
3194 int r
, i
, len
, sbuf_size
;
3195 StrNode
* sn
= NSTR(node
);
3198 sbuf_size
= (end
- sn
->s
) * 2;
3199 sbuf
= (UChar
* )xmalloc(sbuf_size
);
3200 CHECK_NULL_RETURN_MEMERR(sbuf
);
3201 ebuf
= sbuf
+ sbuf_size
;
3206 len
= ONIGENC_MBC_CASE_FOLD(reg
->enc
, reg
->case_fold_flag
, &p
, end
, buf
);
3208 for (i
= 0; i
< len
; i
++) {
3210 sbuf
= (UChar
* )xrealloc(sbuf
, sbuf_size
* 2);
3211 CHECK_NULL_RETURN_MEMERR(sbuf
);
3212 sp
= sbuf
+ sbuf_size
;
3214 ebuf
= sbuf
+ sbuf_size
;
3221 r
= onig_node_str_set(node
, sbuf
, sp
);
3232 expand_case_fold_make_rem_string(Node
** rnode
, UChar
*s
, UChar
*end
,
3238 node
= onig_node_new_str(s
, end
);
3239 if (IS_NULL(node
)) return ONIGERR_MEMORY
;
3241 r
= update_string_node_case_fold(reg
, node
);
3243 onig_node_free(node
);
3247 NSTRING_SET_AMBIG(node
);
3248 NSTRING_SET_DONT_GET_OPT_INFO(node
);
3254 expand_case_fold_string_alt(int item_num
, OnigCaseFoldCodeItem items
[],
3255 UChar
*p
, int slen
, UChar
*end
,
3256 regex_t
* reg
, Node
**rnode
)
3258 int r
, i
, j
, len
, varlen
;
3259 Node
*anode
, *var_anode
, *snode
, *xnode
, *an
;
3260 UChar buf
[ONIGENC_CODE_TO_MBC_MAXLEN
];
3262 *rnode
= var_anode
= NULL_NODE
;
3265 for (i
= 0; i
< item_num
; i
++) {
3266 if (items
[i
].byte_len
!= slen
) {
3273 *rnode
= var_anode
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3274 if (IS_NULL(var_anode
)) return ONIGERR_MEMORY
;
3276 xnode
= onig_node_new_list(NULL
, NULL
);
3277 if (IS_NULL(xnode
)) goto mem_err
;
3278 NCAR(var_anode
) = xnode
;
3280 anode
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3281 if (IS_NULL(anode
)) goto mem_err
;
3282 NCAR(xnode
) = anode
;
3285 *rnode
= anode
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3286 if (IS_NULL(anode
)) return ONIGERR_MEMORY
;
3289 snode
= onig_node_new_str(p
, p
+ slen
);
3290 if (IS_NULL(snode
)) goto mem_err
;
3292 NCAR(anode
) = snode
;
3294 for (i
= 0; i
< item_num
; i
++) {
3295 snode
= onig_node_new_str(NULL
, NULL
);
3296 if (IS_NULL(snode
)) goto mem_err
;
3298 for (j
= 0; j
< items
[i
].code_len
; j
++) {
3299 len
= ONIGENC_CODE_TO_MBC(reg
->enc
, items
[i
].code
[j
], buf
);
3305 r
= onig_node_str_cat(snode
, buf
, buf
+ len
);
3306 if (r
!= 0) goto mem_err2
;
3309 an
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3314 if (items
[i
].byte_len
!= slen
) {
3316 UChar
*q
= p
+ items
[i
].byte_len
;
3319 r
= expand_case_fold_make_rem_string(&rem
, q
, end
, reg
);
3325 xnode
= onig_node_list_add(NULL_NODE
, snode
);
3326 if (IS_NULL(xnode
)) {
3328 onig_node_free(rem
);
3331 if (IS_NULL(onig_node_list_add(xnode
, rem
))) {
3333 onig_node_free(xnode
);
3334 onig_node_free(rem
);
3344 NCDR(var_anode
) = an
;
3357 onig_node_free(snode
);
3360 onig_node_free(*rnode
);
3362 return ONIGERR_MEMORY
;
3366 expand_case_fold_string(Node
* node
, regex_t
* reg
)
3368 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3370 int r
, n
, len
, alt_num
;
3371 UChar
*start
, *end
, *p
;
3372 Node
*top_root
, *root
, *snode
, *prev_node
;
3373 OnigCaseFoldCodeItem items
[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
];
3374 StrNode
* sn
= NSTR(node
);
3376 if (NSTRING_IS_AMBIG(node
)) return 0;
3380 if (start
>= end
) return 0;
3383 top_root
= root
= prev_node
= snode
= NULL_NODE
;
3387 n
= ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg
->enc
, reg
->case_fold_flag
,
3394 len
= enclen(reg
->enc
, p
, end
);
3397 if (IS_NULL(snode
)) {
3398 if (IS_NULL(root
) && IS_NOT_NULL(prev_node
)) {
3399 top_root
= root
= onig_node_list_add(NULL_NODE
, prev_node
);
3400 if (IS_NULL(root
)) {
3401 onig_node_free(prev_node
);
3406 prev_node
= snode
= onig_node_new_str(NULL
, NULL
);
3407 if (IS_NULL(snode
)) goto mem_err
;
3408 if (IS_NOT_NULL(root
)) {
3409 if (IS_NULL(onig_node_list_add(root
, snode
))) {
3410 onig_node_free(snode
);
3416 r
= onig_node_str_cat(snode
, p
, p
+ len
);
3417 if (r
!= 0) goto err
;
3421 if (alt_num
> THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION
) break;
3423 if (IS_NULL(root
) && IS_NOT_NULL(prev_node
)) {
3424 top_root
= root
= onig_node_list_add(NULL_NODE
, prev_node
);
3425 if (IS_NULL(root
)) {
3426 onig_node_free(prev_node
);
3431 r
= expand_case_fold_string_alt(n
, items
, p
, len
, end
, reg
, &prev_node
);
3432 if (r
< 0) goto mem_err
;
3434 if (IS_NULL(root
)) {
3435 top_root
= prev_node
;
3438 if (IS_NULL(onig_node_list_add(root
, prev_node
))) {
3439 onig_node_free(prev_node
);
3444 root
= NCAR(prev_node
);
3447 if (IS_NOT_NULL(root
)) {
3448 if (IS_NULL(onig_node_list_add(root
, prev_node
))) {
3449 onig_node_free(prev_node
);
3464 r
= expand_case_fold_make_rem_string(&srem
, p
, end
, reg
);
3465 if (r
!= 0) goto mem_err
;
3467 if (IS_NOT_NULL(prev_node
) && IS_NULL(root
)) {
3468 top_root
= root
= onig_node_list_add(NULL_NODE
, prev_node
);
3469 if (IS_NULL(root
)) {
3470 onig_node_free(srem
);
3471 onig_node_free(prev_node
);
3476 if (IS_NULL(root
)) {
3480 if (IS_NULL(onig_node_list_add(root
, srem
))) {
3481 onig_node_free(srem
);
3488 top_root
= (IS_NOT_NULL(top_root
) ? top_root
: prev_node
);
3489 swap_node(node
, top_root
);
3490 onig_node_free(top_root
);
3497 onig_node_free(top_root
);
3502 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3504 #define CEC_THRES_NUM_BIG_REPEAT 512
3505 #define CEC_INFINITE_NUM 0x7fffffff
3507 #define CEC_IN_INFINITE_REPEAT (1<<0)
3508 #define CEC_IN_FINITE_REPEAT (1<<1)
3509 #define CEC_CONT_BIG_REPEAT (1<<2)
3512 setup_comb_exp_check(Node
* node
, int state
, ScanEnv
* env
)
3521 Node
* prev
= NULL_NODE
;
3523 r
= setup_comb_exp_check(NCAR(node
), r
, env
);
3525 } while (r
>= 0 && IS_NOT_NULL(node
= NCDR(node
)));
3533 ret
= setup_comb_exp_check(NCAR(node
), state
, env
);
3535 } while (ret
>= 0 && IS_NOT_NULL(node
= NCDR(node
)));
3541 int child_state
= state
;
3543 QtfrNode
* qn
= NQTFR(node
);
3544 Node
* target
= qn
->target
;
3547 if (! IS_REPEAT_INFINITE(qn
->upper
)) {
3548 if (qn
->upper
> 1) {
3549 /* {0,1}, {1,1} are allowed */
3550 child_state
|= CEC_IN_FINITE_REPEAT
;
3552 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3553 if (env
->backrefed_mem
== 0) {
3554 if (NTYPE(qn
->target
) == NT_ENCLOSE
) {
3555 EncloseNode
* en
= NENCLOSE(qn
->target
);
3556 if (en
->type
== ENCLOSE_MEMORY
) {
3557 if (NTYPE(en
->target
) == NT_QTFR
) {
3558 QtfrNode
* q
= NQTFR(en
->target
);
3559 if (IS_REPEAT_INFINITE(q
->upper
)
3560 && q
->greedy
== qn
->greedy
) {
3561 qn
->upper
= (qn
->lower
== 0 ? 1 : qn
->lower
);
3563 child_state
= state
;
3572 if (state
& CEC_IN_FINITE_REPEAT
) {
3573 qn
->comb_exp_check_num
= -1;
3576 if (IS_REPEAT_INFINITE(qn
->upper
)) {
3577 var_num
= CEC_INFINITE_NUM
;
3578 child_state
|= CEC_IN_INFINITE_REPEAT
;
3581 var_num
= qn
->upper
- qn
->lower
;
3584 if (var_num
>= CEC_THRES_NUM_BIG_REPEAT
)
3585 add_state
|= CEC_CONT_BIG_REPEAT
;
3587 if (((state
& CEC_IN_INFINITE_REPEAT
) != 0 && var_num
!= 0) ||
3588 ((state
& CEC_CONT_BIG_REPEAT
) != 0 &&
3589 var_num
>= CEC_THRES_NUM_BIG_REPEAT
)) {
3590 if (qn
->comb_exp_check_num
== 0) {
3591 env
->num_comb_exp_check
++;
3592 qn
->comb_exp_check_num
= env
->num_comb_exp_check
;
3593 if (env
->curr_max_regnum
> env
->comb_exp_max_regnum
)
3594 env
->comb_exp_max_regnum
= env
->curr_max_regnum
;
3599 r
= setup_comb_exp_check(target
, child_state
, env
);
3606 EncloseNode
* en
= NENCLOSE(node
);
3609 case ENCLOSE_MEMORY
:
3611 if (env
->curr_max_regnum
< en
->regnum
)
3612 env
->curr_max_regnum
= en
->regnum
;
3614 r
= setup_comb_exp_check(en
->target
, state
, env
);
3619 r
= setup_comb_exp_check(en
->target
, state
, env
);
3625 #ifdef USE_SUBEXP_CALL
3627 if (IS_CALL_RECURSION(NCALL(node
)))
3628 env
->has_recursion
= 1;
3630 r
= setup_comb_exp_check(NCALL(node
)->target
, state
, env
);
3642 #define IN_ALT (1<<0)
3643 #define IN_NOT (1<<1)
3644 #define IN_REPEAT (1<<2)
3645 #define IN_VAR_REPEAT (1<<3)
3647 /* setup_tree does the following work.
3648 1. check empty loop. (set qn->target_empty_info)
3649 2. expand ignore-case in char class.
3650 3. set memory status bit flags. (reg->mem_stats)
3651 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3652 5. find invalid patterns in look-behind.
3653 6. expand repeated string.
3656 setup_tree(Node
* node
, regex_t
* reg
, int state
, ScanEnv
* env
)
3665 Node
* prev
= NULL_NODE
;
3667 r
= setup_tree(NCAR(node
), reg
, state
, env
);
3668 if (IS_NOT_NULL(prev
) && r
== 0) {
3669 r
= next_setup(prev
, NCAR(node
), reg
);
3672 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
3678 r
= setup_tree(NCAR(node
), reg
, (state
| IN_ALT
), env
);
3679 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
3686 if (IS_IGNORECASE(reg
->options
) && !NSTRING_IS_RAW(node
)) {
3687 r
= expand_case_fold_string(node
, reg
);
3695 #ifdef USE_SUBEXP_CALL
3704 Node
** nodes
= SCANENV_MEM_NODES(env
);
3705 BRefNode
* br
= NBREF(node
);
3707 for (i
= 0; i
< br
->back_num
; i
++) {
3708 if (p
[i
] > env
->num_mem
) return ONIGERR_INVALID_BACKREF
;
3709 BIT_STATUS_ON_AT(env
->backrefed_mem
, p
[i
]);
3710 BIT_STATUS_ON_AT(env
->bt_mem_start
, p
[i
]);
3711 #ifdef USE_BACKREF_WITH_LEVEL
3712 if (IS_BACKREF_NEST_LEVEL(br
)) {
3713 BIT_STATUS_ON_AT(env
->bt_mem_end
, p
[i
]);
3716 SET_ENCLOSE_STATUS(nodes
[p
[i
]], NST_MEM_BACKREFED
);
3724 QtfrNode
* qn
= NQTFR(node
);
3725 Node
* target
= qn
->target
;
3727 if ((state
& IN_REPEAT
) != 0) {
3728 qn
->state
|= NST_IN_REPEAT
;
3731 if (IS_REPEAT_INFINITE(qn
->upper
) || qn
->upper
>= 1) {
3732 r
= get_min_match_length(target
, &d
, env
);
3735 qn
->target_empty_info
= NQ_TARGET_IS_EMPTY
;
3736 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3737 r
= quantifiers_memory_node_info(target
);
3740 qn
->target_empty_info
= r
;
3744 r
= get_max_match_length(target
, &d
, env
);
3745 if (r
== 0 && d
== 0) {
3746 /* ()* ==> ()?, ()+ ==> () */
3748 if (qn
->lower
> 1) qn
->lower
= 1;
3749 if (NTYPE(target
) == NT_STR
) {
3750 qn
->upper
= qn
->lower
= 0; /* /(?:)+/ ==> // */
3758 if (qn
->lower
!= qn
->upper
)
3759 state
|= IN_VAR_REPEAT
;
3760 r
= setup_tree(target
, reg
, state
, env
);
3764 #define EXPAND_STRING_MAX_LENGTH 100
3765 if (NTYPE(target
) == NT_STR
) {
3766 if (!IS_REPEAT_INFINITE(qn
->lower
) && qn
->lower
== qn
->upper
&&
3767 qn
->lower
> 1 && qn
->lower
<= EXPAND_STRING_MAX_LENGTH
) {
3768 int len
= NSTRING_LEN(target
);
3769 StrNode
* sn
= NSTR(target
);
3771 if (len
* qn
->lower
<= EXPAND_STRING_MAX_LENGTH
) {
3772 int i
, n
= qn
->lower
;
3773 onig_node_conv_to_str_node(node
, NSTR(target
)->flag
);
3774 for (i
= 0; i
< n
; i
++) {
3775 r
= onig_node_str_cat(node
, sn
->s
, sn
->end
);
3778 onig_node_free(target
);
3779 break; /* break case NT_QTFR: */
3784 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
3785 if (qn
->greedy
&& (qn
->target_empty_info
!= 0)) {
3786 if (NTYPE(target
) == NT_QTFR
) {
3787 QtfrNode
* tqn
= NQTFR(target
);
3788 if (IS_NOT_NULL(tqn
->head_exact
)) {
3789 qn
->head_exact
= tqn
->head_exact
;
3790 tqn
->head_exact
= NULL
;
3794 qn
->head_exact
= get_head_value_node(qn
->target
, 1, reg
);
3803 EncloseNode
* en
= NENCLOSE(node
);
3806 case ENCLOSE_OPTION
:
3808 OnigOptionType options
= reg
->options
;
3809 reg
->options
= NENCLOSE(node
)->option
;
3810 r
= setup_tree(NENCLOSE(node
)->target
, reg
, state
, env
);
3811 reg
->options
= options
;
3815 case ENCLOSE_MEMORY
:
3816 if ((state
& (IN_ALT
| IN_NOT
| IN_VAR_REPEAT
)) != 0) {
3817 BIT_STATUS_ON_AT(env
->bt_mem_start
, en
->regnum
);
3818 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
3820 r
= setup_tree(en
->target
, reg
, state
, env
);
3823 case ENCLOSE_STOP_BACKTRACK
:
3825 Node
* target
= en
->target
;
3826 r
= setup_tree(target
, reg
, state
, env
);
3827 if (NTYPE(target
) == NT_QTFR
) {
3828 QtfrNode
* tqn
= NQTFR(target
);
3829 if (IS_REPEAT_INFINITE(tqn
->upper
) && tqn
->lower
<= 1 &&
3830 tqn
->greedy
!= 0) { /* (?>a*), a*+ etc... */
3831 int qtype
= NTYPE(tqn
->target
);
3832 if (IS_NODE_TYPE_SIMPLE(qtype
))
3833 SET_ENCLOSE_STATUS(node
, NST_STOP_BT_SIMPLE_REPEAT
);
3844 AnchorNode
* an
= NANCHOR(node
);
3847 case ANCHOR_PREC_READ
:
3848 r
= setup_tree(an
->target
, reg
, state
, env
);
3850 case ANCHOR_PREC_READ_NOT
:
3851 r
= setup_tree(an
->target
, reg
, (state
| IN_NOT
), env
);
3854 /* allowed node types in look-behind */
3855 #define ALLOWED_TYPE_IN_LB \
3856 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
3857 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
3859 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY )
3860 #define ALLOWED_ENCLOSE_IN_LB_NOT 0
3862 #define ALLOWED_ANCHOR_IN_LB \
3863 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3864 #define ALLOWED_ANCHOR_IN_LB_NOT \
3865 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3867 case ANCHOR_LOOK_BEHIND
:
3869 r
= check_type_tree(an
->target
, ALLOWED_TYPE_IN_LB
,
3870 ALLOWED_ENCLOSE_IN_LB
, ALLOWED_ANCHOR_IN_LB
);
3871 if (r
< 0) return r
;
3872 if (r
> 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
3873 r
= setup_look_behind(node
, reg
, env
);
3874 if (r
!= 0) return r
;
3875 r
= setup_tree(an
->target
, reg
, state
, env
);
3879 case ANCHOR_LOOK_BEHIND_NOT
:
3881 r
= check_type_tree(an
->target
, ALLOWED_TYPE_IN_LB
,
3882 ALLOWED_ENCLOSE_IN_LB_NOT
, ALLOWED_ANCHOR_IN_LB_NOT
);
3883 if (r
< 0) return r
;
3884 if (r
> 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
3885 r
= setup_look_behind(node
, reg
, env
);
3886 if (r
!= 0) return r
;
3887 r
= setup_tree(an
->target
, reg
, (state
| IN_NOT
), env
);
3901 /* set skip map for Boyer-Moor search */
3903 set_bm_skip(UChar
* s
, UChar
* end
, OnigEncoding enc ARG_UNUSED
,
3904 UChar skip
[], int** int_skip
)
3909 if (len
< ONIG_CHAR_TABLE_SIZE
) {
3910 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) skip
[i
] = len
;
3912 for (i
= 0; i
< len
- 1; i
++)
3913 skip
[s
[i
]] = len
- 1 - i
;
3916 if (IS_NULL(*int_skip
)) {
3917 *int_skip
= (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE
);
3918 if (IS_NULL(*int_skip
)) return ONIGERR_MEMORY
;
3920 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) (*int_skip
)[i
] = len
;
3922 for (i
= 0; i
< len
- 1; i
++)
3923 (*int_skip
)[s
[i
]] = len
- 1 - i
;
3928 #define OPT_EXACT_MAXLEN 24
3931 OnigDistance min
; /* min byte length */
3932 OnigDistance max
; /* max byte length */
3938 OnigOptionType options
;
3939 OnigCaseFoldType case_fold_flag
;
3949 MinMaxLen mmd
; /* info position */
3955 UChar s
[OPT_EXACT_MAXLEN
];
3959 MinMaxLen mmd
; /* info position */
3962 int value
; /* weighted value */
3963 UChar map
[ONIG_CHAR_TABLE_SIZE
];
3970 OptExactInfo exb
; /* boundary */
3971 OptExactInfo exm
; /* middle */
3972 OptExactInfo expr
; /* prec read (?=...) */
3974 OptMapInfo map
; /* boundary */
3979 map_position_value(OnigEncoding enc
, int i
)
3981 static const short int ByteValTable
[] = {
3982 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
3983 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3984 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
3985 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
3986 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
3987 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
3988 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
3989 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
3992 if (i
< (int )(sizeof(ByteValTable
)/sizeof(ByteValTable
[0]))) {
3993 if (i
== 0 && ONIGENC_MBC_MINLEN(enc
) > 1)
3996 return (int )ByteValTable
[i
];
3999 return 4; /* Take it easy. */
4003 distance_value(MinMaxLen
* mm
)
4005 /* 1000 / (min-max-dist + 1) */
4006 static const short int dist_vals
[] = {
4007 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4008 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4009 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4010 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4011 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4012 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4013 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4014 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4015 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4016 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4021 if (mm
->max
== ONIG_INFINITE_DISTANCE
) return 0;
4023 d
= mm
->max
- mm
->min
;
4024 if (d
< (int )(sizeof(dist_vals
)/sizeof(dist_vals
[0])))
4025 /* return dist_vals[d] * 16 / (mm->min + 12); */
4026 return (int )dist_vals
[d
];
4032 comp_distance_value(MinMaxLen
* d1
, MinMaxLen
* d2
, int v1
, int v2
)
4034 if (v2
<= 0) return -1;
4035 if (v1
<= 0) return 1;
4037 v1
*= distance_value(d1
);
4038 v2
*= distance_value(d2
);
4040 if (v2
> v1
) return 1;
4041 if (v2
< v1
) return -1;
4043 if (d2
->min
< d1
->min
) return 1;
4044 if (d2
->min
> d1
->min
) return -1;
4049 is_equal_mml(MinMaxLen
* a
, MinMaxLen
* b
)
4051 return (a
->min
== b
->min
&& a
->max
== b
->max
) ? 1 : 0;
4056 set_mml(MinMaxLen
* mml
, OnigDistance min
, OnigDistance max
)
4063 clear_mml(MinMaxLen
* mml
)
4065 mml
->min
= mml
->max
= 0;
4069 copy_mml(MinMaxLen
* to
, MinMaxLen
* from
)
4071 to
->min
= from
->min
;
4072 to
->max
= from
->max
;
4076 add_mml(MinMaxLen
* to
, MinMaxLen
* from
)
4078 to
->min
= distance_add(to
->min
, from
->min
);
4079 to
->max
= distance_add(to
->max
, from
->max
);
4084 add_len_mml(MinMaxLen
* to
, OnigDistance len
)
4086 to
->min
= distance_add(to
->min
, len
);
4087 to
->max
= distance_add(to
->max
, len
);
4092 alt_merge_mml(MinMaxLen
* to
, MinMaxLen
* from
)
4094 if (to
->min
> from
->min
) to
->min
= from
->min
;
4095 if (to
->max
< from
->max
) to
->max
= from
->max
;
4099 copy_opt_env(OptEnv
* to
, OptEnv
* from
)
4105 clear_opt_anc_info(OptAncInfo
* anc
)
4107 anc
->left_anchor
= 0;
4108 anc
->right_anchor
= 0;
4112 copy_opt_anc_info(OptAncInfo
* to
, OptAncInfo
* from
)
4118 concat_opt_anc_info(OptAncInfo
* to
, OptAncInfo
* left
, OptAncInfo
* right
,
4119 OnigDistance left_len
, OnigDistance right_len
)
4121 clear_opt_anc_info(to
);
4123 to
->left_anchor
= left
->left_anchor
;
4124 if (left_len
== 0) {
4125 to
->left_anchor
|= right
->left_anchor
;
4128 to
->right_anchor
= right
->right_anchor
;
4129 if (right_len
== 0) {
4130 to
->right_anchor
|= left
->right_anchor
;
4135 is_left_anchor(int anc
)
4137 if (anc
== ANCHOR_END_BUF
|| anc
== ANCHOR_SEMI_END_BUF
||
4138 anc
== ANCHOR_END_LINE
|| anc
== ANCHOR_PREC_READ
||
4139 anc
== ANCHOR_PREC_READ_NOT
)
4146 is_set_opt_anc_info(OptAncInfo
* to
, int anc
)
4148 if ((to
->left_anchor
& anc
) != 0) return 1;
4150 return ((to
->right_anchor
& anc
) != 0 ? 1 : 0);
4154 add_opt_anc_info(OptAncInfo
* to
, int anc
)
4156 if (is_left_anchor(anc
))
4157 to
->left_anchor
|= anc
;
4159 to
->right_anchor
|= anc
;
4163 remove_opt_anc_info(OptAncInfo
* to
, int anc
)
4165 if (is_left_anchor(anc
))
4166 to
->left_anchor
&= ~anc
;
4168 to
->right_anchor
&= ~anc
;
4172 alt_merge_opt_anc_info(OptAncInfo
* to
, OptAncInfo
* add
)
4174 to
->left_anchor
&= add
->left_anchor
;
4175 to
->right_anchor
&= add
->right_anchor
;
4179 is_full_opt_exact_info(OptExactInfo
* ex
)
4181 return (ex
->len
>= OPT_EXACT_MAXLEN
? 1 : 0);
4185 clear_opt_exact_info(OptExactInfo
* ex
)
4187 clear_mml(&ex
->mmd
);
4188 clear_opt_anc_info(&ex
->anc
);
4190 ex
->ignore_case
= 0;
4196 copy_opt_exact_info(OptExactInfo
* to
, OptExactInfo
* from
)
4202 concat_opt_exact_info(OptExactInfo
* to
, OptExactInfo
* add
, OnigEncoding enc
)
4208 if (! to
->ignore_case
&& add
->ignore_case
) {
4209 if (to
->len
>= add
->len
) return ; /* avoid */
4211 to
->ignore_case
= 1;
4216 for (i
= to
->len
; p
< end
; ) {
4217 len
= enclen(enc
, p
, end
);
4218 if (i
+ len
> OPT_EXACT_MAXLEN
) break;
4219 for (j
= 0; j
< len
&& p
< end
; j
++)
4224 to
->reach_end
= (p
== end
? add
->reach_end
: 0);
4226 concat_opt_anc_info(&tanc
, &to
->anc
, &add
->anc
, 1, 1);
4227 if (! to
->reach_end
) tanc
.right_anchor
= 0;
4228 copy_opt_anc_info(&to
->anc
, &tanc
);
4232 concat_opt_exact_info_str(OptExactInfo
* to
, UChar
* s
, UChar
* end
,
4233 int raw ARG_UNUSED
, OnigEncoding enc
)
4238 for (i
= to
->len
, p
= s
; p
< end
&& i
< OPT_EXACT_MAXLEN
; ) {
4239 len
= enclen(enc
, p
, end
);
4240 if (i
+ len
> OPT_EXACT_MAXLEN
) break;
4241 for (j
= 0; j
< len
&& p
< end
; j
++)
4249 alt_merge_opt_exact_info(OptExactInfo
* to
, OptExactInfo
* add
, OptEnv
* env
)
4253 if (add
->len
== 0 || to
->len
== 0) {
4254 clear_opt_exact_info(to
);
4258 if (! is_equal_mml(&to
->mmd
, &add
->mmd
)) {
4259 clear_opt_exact_info(to
);
4263 for (i
= 0; i
< to
->len
&& i
< add
->len
; ) {
4264 if (to
->s
[i
] != add
->s
[i
]) break;
4265 len
= enclen(env
->enc
, to
->s
+ i
, to
->s
+ to
->len
);
4267 for (j
= 1; j
< len
; j
++) {
4268 if (to
->s
[i
+j
] != add
->s
[i
+j
]) break;
4274 if (! add
->reach_end
|| i
< add
->len
|| i
< to
->len
) {
4278 to
->ignore_case
|= add
->ignore_case
;
4280 alt_merge_opt_anc_info(&to
->anc
, &add
->anc
);
4281 if (! to
->reach_end
) to
->anc
.right_anchor
= 0;
4285 select_opt_exact_info(OnigEncoding enc
, OptExactInfo
* now
, OptExactInfo
* alt
)
4296 copy_opt_exact_info(now
, alt
);
4299 else if (v1
<= 2 && v2
<= 2) {
4300 /* ByteValTable[x] is big value --> low price */
4301 v2
= map_position_value(enc
, now
->s
[0]);
4302 v1
= map_position_value(enc
, alt
->s
[0]);
4304 if (now
->len
> 1) v1
+= 5;
4305 if (alt
->len
> 1) v2
+= 5;
4308 if (now
->ignore_case
== 0) v1
*= 2;
4309 if (alt
->ignore_case
== 0) v2
*= 2;
4311 if (comp_distance_value(&now
->mmd
, &alt
->mmd
, v1
, v2
) > 0)
4312 copy_opt_exact_info(now
, alt
);
4316 clear_opt_map_info(OptMapInfo
* map
)
4318 static const OptMapInfo clean_info
= {
4321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4322 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4323 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4324 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4325 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4326 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4327 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4328 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4330 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4331 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4332 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4333 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4334 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4335 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4336 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4340 xmemcpy(map
, &clean_info
, sizeof(OptMapInfo
));
4344 copy_opt_map_info(OptMapInfo
* to
, OptMapInfo
* from
)
4350 add_char_opt_map_info(OptMapInfo
* map
, UChar c
, OnigEncoding enc
)
4352 if (map
->map
[c
] == 0) {
4354 map
->value
+= map_position_value(enc
, c
);
4359 add_char_amb_opt_map_info(OptMapInfo
* map
, UChar
* p
, UChar
* end
,
4360 OnigEncoding enc
, OnigCaseFoldType case_fold_flag
)
4362 OnigCaseFoldCodeItem items
[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
];
4363 UChar buf
[ONIGENC_CODE_TO_MBC_MAXLEN
];
4366 add_char_opt_map_info(map
, p
[0], enc
);
4368 case_fold_flag
= DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag
);
4369 n
= ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc
, case_fold_flag
, p
, end
, items
);
4370 if (n
< 0) return n
;
4372 for (i
= 0; i
< n
; i
++) {
4373 ONIGENC_CODE_TO_MBC(enc
, items
[i
].code
[0], buf
);
4374 add_char_opt_map_info(map
, buf
[0], enc
);
4381 select_opt_map_info(OptMapInfo
* now
, OptMapInfo
* alt
)
4383 static int z
= 1<<15; /* 32768: something big value */
4387 if (alt
->value
== 0) return ;
4388 if (now
->value
== 0) {
4389 copy_opt_map_info(now
, alt
);
4393 v1
= z
/ now
->value
;
4394 v2
= z
/ alt
->value
;
4395 if (comp_distance_value(&now
->mmd
, &alt
->mmd
, v1
, v2
) > 0)
4396 copy_opt_map_info(now
, alt
);
4400 comp_opt_exact_or_map_info(OptExactInfo
* e
, OptMapInfo
* m
)
4402 #define COMP_EM_BASE 20
4405 if (m
->value
<= 0) return -1;
4407 ve
= COMP_EM_BASE
* e
->len
* (e
->ignore_case
? 1 : 2);
4408 vm
= COMP_EM_BASE
* 5 * 2 / m
->value
;
4409 return comp_distance_value(&e
->mmd
, &m
->mmd
, ve
, vm
);
4413 alt_merge_opt_map_info(OnigEncoding enc
, OptMapInfo
* to
, OptMapInfo
* add
)
4417 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4418 if (to
->value
== 0) return ;
4419 if (add
->value
== 0 || to
->mmd
.max
< add
->mmd
.min
) {
4420 clear_opt_map_info(to
);
4424 alt_merge_mml(&to
->mmd
, &add
->mmd
);
4427 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) {
4432 val
+= map_position_value(enc
, i
);
4436 alt_merge_opt_anc_info(&to
->anc
, &add
->anc
);
4440 set_bound_node_opt_info(NodeOptInfo
* opt
, MinMaxLen
* mmd
)
4442 copy_mml(&(opt
->exb
.mmd
), mmd
);
4443 copy_mml(&(opt
->expr
.mmd
), mmd
);
4444 copy_mml(&(opt
->map
.mmd
), mmd
);
4448 clear_node_opt_info(NodeOptInfo
* opt
)
4450 clear_mml(&opt
->len
);
4451 clear_opt_anc_info(&opt
->anc
);
4452 clear_opt_exact_info(&opt
->exb
);
4453 clear_opt_exact_info(&opt
->exm
);
4454 clear_opt_exact_info(&opt
->expr
);
4455 clear_opt_map_info(&opt
->map
);
4459 copy_node_opt_info(NodeOptInfo
* to
, NodeOptInfo
* from
)
4465 concat_left_node_opt_info(OnigEncoding enc
, NodeOptInfo
* to
, NodeOptInfo
* add
)
4467 int exb_reach
, exm_reach
;
4470 concat_opt_anc_info(&tanc
, &to
->anc
, &add
->anc
, to
->len
.max
, add
->len
.max
);
4471 copy_opt_anc_info(&to
->anc
, &tanc
);
4473 if (add
->exb
.len
> 0 && to
->len
.max
== 0) {
4474 concat_opt_anc_info(&tanc
, &to
->anc
, &add
->exb
.anc
,
4475 to
->len
.max
, add
->len
.max
);
4476 copy_opt_anc_info(&add
->exb
.anc
, &tanc
);
4479 if (add
->map
.value
> 0 && to
->len
.max
== 0) {
4480 if (add
->map
.mmd
.max
== 0)
4481 add
->map
.anc
.left_anchor
|= to
->anc
.left_anchor
;
4484 exb_reach
= to
->exb
.reach_end
;
4485 exm_reach
= to
->exm
.reach_end
;
4487 if (add
->len
.max
!= 0)
4488 to
->exb
.reach_end
= to
->exm
.reach_end
= 0;
4490 if (add
->exb
.len
> 0) {
4492 concat_opt_exact_info(&to
->exb
, &add
->exb
, enc
);
4493 clear_opt_exact_info(&add
->exb
);
4495 else if (exm_reach
) {
4496 concat_opt_exact_info(&to
->exm
, &add
->exb
, enc
);
4497 clear_opt_exact_info(&add
->exb
);
4500 select_opt_exact_info(enc
, &to
->exm
, &add
->exb
);
4501 select_opt_exact_info(enc
, &to
->exm
, &add
->exm
);
4503 if (to
->expr
.len
> 0) {
4504 if (add
->len
.max
> 0) {
4505 if (to
->expr
.len
> (int )add
->len
.max
)
4506 to
->expr
.len
= add
->len
.max
;
4508 if (to
->expr
.mmd
.max
== 0)
4509 select_opt_exact_info(enc
, &to
->exb
, &to
->expr
);
4511 select_opt_exact_info(enc
, &to
->exm
, &to
->expr
);
4514 else if (add
->expr
.len
> 0) {
4515 copy_opt_exact_info(&to
->expr
, &add
->expr
);
4518 select_opt_map_info(&to
->map
, &add
->map
);
4520 add_mml(&to
->len
, &add
->len
);
4524 alt_merge_node_opt_info(NodeOptInfo
* to
, NodeOptInfo
* add
, OptEnv
* env
)
4526 alt_merge_opt_anc_info (&to
->anc
, &add
->anc
);
4527 alt_merge_opt_exact_info(&to
->exb
, &add
->exb
, env
);
4528 alt_merge_opt_exact_info(&to
->exm
, &add
->exm
, env
);
4529 alt_merge_opt_exact_info(&to
->expr
, &add
->expr
, env
);
4530 alt_merge_opt_map_info(env
->enc
, &to
->map
, &add
->map
);
4532 alt_merge_mml(&to
->len
, &add
->len
);
4536 #define MAX_NODE_OPT_INFO_REF_COUNT 5
4539 optimize_node_left(Node
* node
, NodeOptInfo
* opt
, OptEnv
* env
)
4544 clear_node_opt_info(opt
);
4545 set_bound_node_opt_info(opt
, &env
->mmd
);
4555 copy_opt_env(&nenv
, env
);
4557 r
= optimize_node_left(NCAR(nd
), &nopt
, &nenv
);
4559 add_mml(&nenv
.mmd
, &nopt
.len
);
4560 concat_left_node_opt_info(env
->enc
, opt
, &nopt
);
4562 } while (r
== 0 && IS_NOT_NULL(nd
= NCDR(nd
)));
4572 r
= optimize_node_left(NCAR(nd
), &nopt
, env
);
4574 if (nd
== node
) copy_node_opt_info(opt
, &nopt
);
4575 else alt_merge_node_opt_info(opt
, &nopt
, env
);
4577 } while ((r
== 0) && IS_NOT_NULL(nd
= NCDR(nd
)));
4583 StrNode
* sn
= NSTR(node
);
4584 int slen
= sn
->end
- sn
->s
;
4585 int is_raw
= NSTRING_IS_RAW(node
);
4587 if (! NSTRING_IS_AMBIG(node
)) {
4588 concat_opt_exact_info_str(&opt
->exb
, sn
->s
, sn
->end
,
4589 NSTRING_IS_RAW(node
), env
->enc
);
4591 add_char_opt_map_info(&opt
->map
, *(sn
->s
), env
->enc
);
4593 set_mml(&opt
->len
, slen
, slen
);
4598 if (NSTRING_IS_DONT_GET_OPT_INFO(node
)) {
4599 int n
= onigenc_strlen(env
->enc
, sn
->s
, sn
->end
);
4600 max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
) * n
;
4603 concat_opt_exact_info_str(&opt
->exb
, sn
->s
, sn
->end
,
4605 opt
->exb
.ignore_case
= 1;
4608 r
= add_char_amb_opt_map_info(&opt
->map
, sn
->s
, sn
->end
,
4609 env
->enc
, env
->case_fold_flag
);
4616 set_mml(&opt
->len
, slen
, max
);
4619 if (opt
->exb
.len
== slen
)
4620 opt
->exb
.reach_end
= 1;
4627 CClassNode
* cc
= NCCLASS(node
);
4629 /* no need to check ignore case. (setted in setup_tree()) */
4631 if (IS_NOT_NULL(cc
->mbuf
) || IS_NCCLASS_NOT(cc
)) {
4632 OnigDistance min
= ONIGENC_MBC_MINLEN(env
->enc
);
4633 OnigDistance max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
4635 set_mml(&opt
->len
, min
, max
);
4638 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
4639 z
= BITSET_AT(cc
->bs
, i
);
4640 if ((z
&& !IS_NCCLASS_NOT(cc
)) || (!z
&& IS_NCCLASS_NOT(cc
))) {
4641 add_char_opt_map_info(&opt
->map
, (UChar
)i
, env
->enc
);
4644 set_mml(&opt
->len
, 1, 1);
4653 max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
4658 switch (NCTYPE(node
)->ctype
) {
4659 case ONIGENC_CTYPE_WORD
:
4660 if (NCTYPE(node
)->not != 0) {
4661 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
4662 if (! ONIGENC_IS_CODE_WORD(env
->enc
, i
)) {
4663 add_char_opt_map_info(&opt
->map
, (UChar
)i
, env
->enc
);
4668 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
4669 if (ONIGENC_IS_CODE_WORD(env
->enc
, i
)) {
4670 add_char_opt_map_info(&opt
->map
, (UChar
)i
, env
->enc
);
4678 min
= ONIGENC_MBC_MINLEN(env
->enc
);
4680 set_mml(&opt
->len
, min
, max
);
4686 OnigDistance min
= ONIGENC_MBC_MINLEN(env
->enc
);
4687 OnigDistance max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
4688 set_mml(&opt
->len
, min
, max
);
4693 switch (NANCHOR(node
)->type
) {
4694 case ANCHOR_BEGIN_BUF
:
4695 case ANCHOR_BEGIN_POSITION
:
4696 case ANCHOR_BEGIN_LINE
:
4697 case ANCHOR_END_BUF
:
4698 case ANCHOR_SEMI_END_BUF
:
4699 case ANCHOR_END_LINE
:
4700 add_opt_anc_info(&opt
->anc
, NANCHOR(node
)->type
);
4703 case ANCHOR_PREC_READ
:
4707 r
= optimize_node_left(NANCHOR(node
)->target
, &nopt
, env
);
4709 if (nopt
.exb
.len
> 0)
4710 copy_opt_exact_info(&opt
->expr
, &nopt
.exb
);
4711 else if (nopt
.exm
.len
> 0)
4712 copy_opt_exact_info(&opt
->expr
, &nopt
.exm
);
4714 opt
->expr
.reach_end
= 0;
4716 if (nopt
.map
.value
> 0)
4717 copy_opt_map_info(&opt
->map
, &nopt
.map
);
4722 case ANCHOR_PREC_READ_NOT
:
4723 case ANCHOR_LOOK_BEHIND
: /* Sorry, I can't make use of it. */
4724 case ANCHOR_LOOK_BEHIND_NOT
:
4733 OnigDistance min
, max
, tmin
, tmax
;
4734 Node
** nodes
= SCANENV_MEM_NODES(env
->scan_env
);
4735 BRefNode
* br
= NBREF(node
);
4737 if (br
->state
& NST_RECURSION
) {
4738 set_mml(&opt
->len
, 0, ONIG_INFINITE_DISTANCE
);
4741 backs
= BACKREFS_P(br
);
4742 r
= get_min_match_length(nodes
[backs
[0]], &min
, env
->scan_env
);
4744 r
= get_max_match_length(nodes
[backs
[0]], &max
, env
->scan_env
);
4746 for (i
= 1; i
< br
->back_num
; i
++) {
4747 r
= get_min_match_length(nodes
[backs
[i
]], &tmin
, env
->scan_env
);
4749 r
= get_max_match_length(nodes
[backs
[i
]], &tmax
, env
->scan_env
);
4751 if (min
> tmin
) min
= tmin
;
4752 if (max
< tmax
) max
= tmax
;
4754 if (r
== 0) set_mml(&opt
->len
, min
, max
);
4758 #ifdef USE_SUBEXP_CALL
4760 if (IS_CALL_RECURSION(NCALL(node
)))
4761 set_mml(&opt
->len
, 0, ONIG_INFINITE_DISTANCE
);
4763 OnigOptionType save
= env
->options
;
4764 env
->options
= NENCLOSE(NCALL(node
)->target
)->option
;
4765 r
= optimize_node_left(NCALL(node
)->target
, opt
, env
);
4766 env
->options
= save
;
4774 OnigDistance min
, max
;
4776 QtfrNode
* qn
= NQTFR(node
);
4778 r
= optimize_node_left(qn
->target
, &nopt
, env
);
4781 if (qn
->lower
== 0 && IS_REPEAT_INFINITE(qn
->upper
)) {
4782 if (env
->mmd
.max
== 0 &&
4783 NTYPE(qn
->target
) == NT_CANY
&& qn
->greedy
) {
4784 if (IS_MULTILINE(env
->options
))
4785 add_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_STAR_ML
);
4787 add_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_STAR
);
4791 if (qn
->lower
> 0) {
4792 copy_node_opt_info(opt
, &nopt
);
4793 if (nopt
.exb
.len
> 0) {
4794 if (nopt
.exb
.reach_end
) {
4795 for (i
= 2; i
< qn
->lower
&&
4796 ! is_full_opt_exact_info(&opt
->exb
); i
++) {
4797 concat_opt_exact_info(&opt
->exb
, &nopt
.exb
, env
->enc
);
4799 if (i
< qn
->lower
) {
4800 opt
->exb
.reach_end
= 0;
4805 if (qn
->lower
!= qn
->upper
) {
4806 opt
->exb
.reach_end
= 0;
4807 opt
->exm
.reach_end
= 0;
4810 opt
->exm
.reach_end
= 0;
4814 min
= distance_multiply(nopt
.len
.min
, qn
->lower
);
4815 if (IS_REPEAT_INFINITE(qn
->upper
))
4816 max
= (nopt
.len
.max
> 0 ? ONIG_INFINITE_DISTANCE
: 0);
4818 max
= distance_multiply(nopt
.len
.max
, qn
->upper
);
4820 set_mml(&opt
->len
, min
, max
);
4826 EncloseNode
* en
= NENCLOSE(node
);
4829 case ENCLOSE_OPTION
:
4831 OnigOptionType save
= env
->options
;
4833 env
->options
= en
->option
;
4834 r
= optimize_node_left(en
->target
, opt
, env
);
4835 env
->options
= save
;
4839 case ENCLOSE_MEMORY
:
4840 #ifdef USE_SUBEXP_CALL
4842 if (en
->opt_count
> MAX_NODE_OPT_INFO_REF_COUNT
) {
4843 OnigDistance min
, max
;
4846 max
= ONIG_INFINITE_DISTANCE
;
4847 if (IS_ENCLOSE_MIN_FIXED(en
)) min
= en
->min_len
;
4848 if (IS_ENCLOSE_MAX_FIXED(en
)) max
= en
->max_len
;
4849 set_mml(&opt
->len
, min
, max
);
4854 r
= optimize_node_left(en
->target
, opt
, env
);
4856 if (is_set_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_STAR_MASK
)) {
4857 if (BIT_STATUS_AT(env
->scan_env
->backrefed_mem
, en
->regnum
))
4858 remove_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_STAR_MASK
);
4863 case ENCLOSE_STOP_BACKTRACK
:
4864 r
= optimize_node_left(en
->target
, opt
, env
);
4872 fprintf(stderr
, "optimize_node_left: undefined node type %d\n",
4875 r
= ONIGERR_TYPE_BUG
;
4883 set_optimize_exact_info(regex_t
* reg
, OptExactInfo
* e
)
4887 if (e
->len
== 0) return 0;
4889 if (e
->ignore_case
) {
4890 reg
->exact
= (UChar
* )xmalloc(e
->len
);
4891 CHECK_NULL_RETURN_MEMERR(reg
->exact
);
4892 xmemcpy(reg
->exact
, e
->s
, e
->len
);
4893 reg
->exact_end
= reg
->exact
+ e
->len
;
4894 reg
->optimize
= ONIG_OPTIMIZE_EXACT_IC
;
4899 reg
->exact
= str_dup(e
->s
, e
->s
+ e
->len
);
4900 CHECK_NULL_RETURN_MEMERR(reg
->exact
);
4901 reg
->exact_end
= reg
->exact
+ e
->len
;
4904 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg
->enc
, reg
->exact
, reg
->exact_end
);
4906 if (e
->len
>= 3 || (e
->len
>= 2 && allow_reverse
)) {
4907 r
= set_bm_skip(reg
->exact
, reg
->exact_end
, reg
->enc
,
4908 reg
->map
, &(reg
->int_map
));
4911 reg
->optimize
= (allow_reverse
!= 0
4912 ? ONIG_OPTIMIZE_EXACT_BM
: ONIG_OPTIMIZE_EXACT_BM_NOT_REV
);
4915 reg
->optimize
= ONIG_OPTIMIZE_EXACT
;
4919 reg
->dmin
= e
->mmd
.min
;
4920 reg
->dmax
= e
->mmd
.max
;
4922 if (reg
->dmin
!= ONIG_INFINITE_DISTANCE
) {
4923 reg
->threshold_len
= reg
->dmin
+ (reg
->exact_end
- reg
->exact
);
4930 set_optimize_map_info(regex_t
* reg
, OptMapInfo
* m
)
4934 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++)
4935 reg
->map
[i
] = m
->map
[i
];
4937 reg
->optimize
= ONIG_OPTIMIZE_MAP
;
4938 reg
->dmin
= m
->mmd
.min
;
4939 reg
->dmax
= m
->mmd
.max
;
4941 if (reg
->dmin
!= ONIG_INFINITE_DISTANCE
) {
4942 reg
->threshold_len
= reg
->dmin
+ 1;
4947 set_sub_anchor(regex_t
* reg
, OptAncInfo
* anc
)
4949 reg
->sub_anchor
|= anc
->left_anchor
& ANCHOR_BEGIN_LINE
;
4950 reg
->sub_anchor
|= anc
->right_anchor
& ANCHOR_END_LINE
;
4954 static void print_optimize_info(FILE* f
, regex_t
* reg
);
4958 set_optimize_info_from_tree(Node
* node
, regex_t
* reg
, ScanEnv
* scan_env
)
4966 env
.options
= reg
->options
;
4967 env
.case_fold_flag
= reg
->case_fold_flag
;
4968 env
.scan_env
= scan_env
;
4969 clear_mml(&env
.mmd
);
4971 r
= optimize_node_left(node
, &opt
, &env
);
4974 reg
->anchor
= opt
.anc
.left_anchor
& (ANCHOR_BEGIN_BUF
|
4975 ANCHOR_BEGIN_POSITION
| ANCHOR_ANYCHAR_STAR
| ANCHOR_ANYCHAR_STAR_ML
);
4977 reg
->anchor
|= opt
.anc
.right_anchor
& (ANCHOR_END_BUF
| ANCHOR_SEMI_END_BUF
);
4979 if (reg
->anchor
& (ANCHOR_END_BUF
| ANCHOR_SEMI_END_BUF
)) {
4980 reg
->anchor_dmin
= opt
.len
.min
;
4981 reg
->anchor_dmax
= opt
.len
.max
;
4984 if (opt
.exb
.len
> 0 || opt
.exm
.len
> 0) {
4985 select_opt_exact_info(reg
->enc
, &opt
.exb
, &opt
.exm
);
4986 if (opt
.map
.value
> 0 &&
4987 comp_opt_exact_or_map_info(&opt
.exb
, &opt
.map
) > 0) {
4991 r
= set_optimize_exact_info(reg
, &opt
.exb
);
4992 set_sub_anchor(reg
, &opt
.exb
.anc
);
4995 else if (opt
.map
.value
> 0) {
4997 set_optimize_map_info(reg
, &opt
.map
);
4998 set_sub_anchor(reg
, &opt
.map
.anc
);
5001 reg
->sub_anchor
|= opt
.anc
.left_anchor
& ANCHOR_BEGIN_LINE
;
5002 if (opt
.len
.max
== 0)
5003 reg
->sub_anchor
|= opt
.anc
.right_anchor
& ANCHOR_END_LINE
;
5006 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5007 print_optimize_info(stderr
, reg
);
5013 clear_optimize_info(regex_t
* reg
)
5015 reg
->optimize
= ONIG_OPTIMIZE_NONE
;
5017 reg
->anchor_dmin
= 0;
5018 reg
->anchor_dmax
= 0;
5019 reg
->sub_anchor
= 0;
5020 reg
->exact_end
= (UChar
* )NULL
;
5021 reg
->threshold_len
= 0;
5022 if (IS_NOT_NULL(reg
->exact
)) {
5024 reg
->exact
= (UChar
* )NULL
;
5030 static void print_enc_string(FILE* fp
, OnigEncoding enc
,
5031 const UChar
*s
, const UChar
*end
)
5033 fprintf(fp
, "\nPATTERN: /");
5035 if (ONIGENC_MBC_MINLEN(enc
) > 1) {
5041 code
= ONIGENC_MBC_TO_CODE(enc
, p
, end
);
5043 fprintf(fp
, " 0x%04x ", (int )code
);
5046 fputc((int )code
, fp
);
5049 p
+= enclen(enc
, p
);
5054 fputc((int )*s
, fp
);
5063 print_distance_range(FILE* f
, OnigDistance a
, OnigDistance b
)
5065 if (a
== ONIG_INFINITE_DISTANCE
)
5068 fprintf(f
, "(%u)", a
);
5072 if (b
== ONIG_INFINITE_DISTANCE
)
5075 fprintf(f
, "(%u)", b
);
5079 print_anchor(FILE* f
, int anchor
)
5085 if (anchor
& ANCHOR_BEGIN_BUF
) {
5086 fprintf(f
, "begin-buf");
5089 if (anchor
& ANCHOR_BEGIN_LINE
) {
5090 if (q
) fprintf(f
, ", ");
5092 fprintf(f
, "begin-line");
5094 if (anchor
& ANCHOR_BEGIN_POSITION
) {
5095 if (q
) fprintf(f
, ", ");
5097 fprintf(f
, "begin-pos");
5099 if (anchor
& ANCHOR_END_BUF
) {
5100 if (q
) fprintf(f
, ", ");
5102 fprintf(f
, "end-buf");
5104 if (anchor
& ANCHOR_SEMI_END_BUF
) {
5105 if (q
) fprintf(f
, ", ");
5107 fprintf(f
, "semi-end-buf");
5109 if (anchor
& ANCHOR_END_LINE
) {
5110 if (q
) fprintf(f
, ", ");
5112 fprintf(f
, "end-line");
5114 if (anchor
& ANCHOR_ANYCHAR_STAR
) {
5115 if (q
) fprintf(f
, ", ");
5117 fprintf(f
, "anychar-star");
5119 if (anchor
& ANCHOR_ANYCHAR_STAR_ML
) {
5120 if (q
) fprintf(f
, ", ");
5121 fprintf(f
, "anychar-star-pl");
5128 print_optimize_info(FILE* f
, regex_t
* reg
)
5130 static const char* on
[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5131 "EXACT_IC", "MAP" };
5133 fprintf(f
, "optimize: %s\n", on
[reg
->optimize
]);
5134 fprintf(f
, " anchor: "); print_anchor(f
, reg
->anchor
);
5135 if ((reg
->anchor
& ANCHOR_END_BUF_MASK
) != 0)
5136 print_distance_range(f
, reg
->anchor_dmin
, reg
->anchor_dmax
);
5139 if (reg
->optimize
) {
5140 fprintf(f
, " sub anchor: "); print_anchor(f
, reg
->sub_anchor
);
5147 fprintf(f
, "exact: [");
5148 for (p
= reg
->exact
; p
< reg
->exact_end
; p
++) {
5151 fprintf(f
, "]: length: %d\n", (reg
->exact_end
- reg
->exact
));
5153 else if (reg
->optimize
& ONIG_OPTIMIZE_MAP
) {
5156 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++)
5157 if (reg
->map
[i
]) n
++;
5159 fprintf(f
, "map: n=%d\n", n
);
5163 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) {
5164 if (reg
->map
[i
] != 0) {
5165 if (c
> 0) fputs(", ", f
);
5167 if (ONIGENC_MBC_MAXLEN(reg
->enc
) == 1 &&
5168 ONIGENC_IS_CODE_PRINT(reg
->enc
, (OnigCodePoint
)i
))
5171 fprintf(f
, "%d", i
);
5178 #endif /* ONIG_DEBUG */
5182 onig_free_body(regex_t
* reg
)
5184 if (IS_NOT_NULL(reg
->p
)) xfree(reg
->p
);
5185 if (IS_NOT_NULL(reg
->exact
)) xfree(reg
->exact
);
5186 if (IS_NOT_NULL(reg
->int_map
)) xfree(reg
->int_map
);
5187 if (IS_NOT_NULL(reg
->int_map_backward
)) xfree(reg
->int_map_backward
);
5188 if (IS_NOT_NULL(reg
->repeat_range
)) xfree(reg
->repeat_range
);
5189 if (IS_NOT_NULL(reg
->chain
)) onig_free(reg
->chain
);
5191 #ifdef USE_NAMED_GROUP
5192 onig_names_free(reg
);
5197 onig_free(regex_t
* reg
)
5199 if (IS_NOT_NULL(reg
)) {
5200 onig_free_body(reg
);
5205 #define REGEX_TRANSFER(to,from) do {\
5206 (to)->state = ONIG_STATE_MODIFY;\
5207 onig_free_body(to);\
5208 xmemcpy(to, from, sizeof(regex_t));\
5213 onig_transfer(regex_t
* to
, regex_t
* from
)
5215 THREAD_ATOMIC_START
;
5216 REGEX_TRANSFER(to
, from
);
5220 #define REGEX_CHAIN_HEAD(reg) do {\
5221 while (IS_NOT_NULL((reg)->chain)) {\
5222 (reg) = (reg)->chain;\
5227 onig_chain_link_add(regex_t
* to
, regex_t
* add
)
5229 THREAD_ATOMIC_START
;
5230 REGEX_CHAIN_HEAD(to
);
5236 onig_chain_reduce(regex_t
* reg
)
5238 regex_t
*head
, *prev
;
5242 if (IS_NOT_NULL(head
)) {
5243 reg
->state
= ONIG_STATE_MODIFY
;
5244 while (IS_NOT_NULL(head
->chain
)) {
5248 prev
->chain
= (regex_t
* )NULL
;
5249 REGEX_TRANSFER(reg
, head
);
5255 onig_clone(regex_t
** to
, regex_t
* from
)
5260 #ifdef USE_MULTI_THREAD_SYSTEM
5261 if (ONIG_STATE(from
) >= ONIG_STATE_NORMAL
) {
5262 ONIG_STATE_INC(from
);
5263 if (IS_NOT_NULL(from
->chain
) && ONIG_STATE(reg
) == ONIG_STATE_NORMAL
) {
5264 onig_chain_reduce(from
);
5265 ONIG_STATE_INC(from
);
5270 while (ONIG_STATE(from
) < ONIG_STATE_NORMAL
) {
5271 if (++n
> THREAD_PASS_LIMIT_COUNT
)
5272 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT
;
5275 ONIG_STATE_INC(from
);
5277 #endif /* USE_MULTI_THREAD_SYSTEM */
5279 r
= onig_alloc_init(®
, ONIG_OPTION_NONE
, ONIGENC_CASE_FOLD_DEFAULT
,
5280 from
->enc
, ONIG_SYNTAX_DEFAULT
);
5282 ONIG_STATE_DEC(from
);
5286 xmemcpy(reg
, from
, sizeof(onig_t
));
5287 reg
->chain
= (regex_t
* )NULL
;
5288 reg
->state
= ONIG_STATE_NORMAL
;
5291 reg
->p
= (UChar
* )xmalloc(reg
->alloc
);
5292 if (IS_NULL(reg
->p
)) goto mem_error
;
5293 xmemcpy(reg
->p
, from
->p
, reg
->alloc
);
5297 reg
->exact
= (UChar
* )xmalloc(from
->exact_end
- from
->exact
);
5298 if (IS_NULL(reg
->exact
)) goto mem_error
;
5299 reg
->exact_end
= reg
->exact
+ (from
->exact_end
- from
->exact
);
5300 xmemcpy(reg
->exact
, from
->exact
, reg
->exact_end
- reg
->exact
);
5303 if (from
->int_map
) {
5304 size
= sizeof(int) * ONIG_CHAR_TABLE_SIZE
;
5305 reg
->int_map
= (int* )xmalloc(size
);
5306 if (IS_NULL(reg
->int_map
)) goto mem_error
;
5307 xmemcpy(reg
->int_map
, from
->int_map
, size
);
5310 if (from
->int_map_backward
) {
5311 size
= sizeof(int) * ONIG_CHAR_TABLE_SIZE
;
5312 reg
->int_map_backward
= (int* )xmalloc(size
);
5313 if (IS_NULL(reg
->int_map_backward
)) goto mem_error
;
5314 xmemcpy(reg
->int_map_backward
, from
->int_map_backward
, size
);
5317 #ifdef USE_NAMED_GROUP
5318 reg
->name_table
= names_clone(from
); /* names_clone is not implemented */
5321 ONIG_STATE_DEC(from
);
5326 ONIG_STATE_DEC(from
);
5327 return ONIGERR_MEMORY
;
5332 static void print_compiled_byte_code_list
P_((FILE* f
, regex_t
* reg
));
5334 #ifdef ONIG_DEBUG_PARSE_TREE
5335 static void print_tree
P_((FILE* f
, Node
* node
));
5339 onig_compile(regex_t
* reg
, const UChar
* pattern
, const UChar
* pattern_end
,
5340 OnigErrorInfo
* einfo
)
5342 #define COMPILE_INIT_SIZE 20
5347 #ifdef USE_SUBEXP_CALL
5348 UnsetAddrList uslist
;
5351 reg
->state
= ONIG_STATE_COMPILING
;
5354 print_enc_string(stderr
, reg
->enc
, pattern
, pattern_end
);
5357 if (reg
->alloc
== 0) {
5358 init_size
= (pattern_end
- pattern
) * 2;
5359 if (init_size
<= 0) init_size
= COMPILE_INIT_SIZE
;
5360 r
= BBUF_INIT(reg
, init_size
);
5361 if (r
!= 0) goto end
;
5367 reg
->num_repeat
= 0;
5368 reg
->num_null_check
= 0;
5369 reg
->repeat_range_alloc
= 0;
5370 reg
->repeat_range
= (OnigRepeatRange
* )NULL
;
5371 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5372 reg
->num_comb_exp_check
= 0;
5375 r
= onig_parse_make_tree(&root
, pattern
, pattern_end
, reg
, &scan_env
);
5376 if (r
!= 0) goto err
;
5378 #ifdef USE_NAMED_GROUP
5379 /* mixed use named group and no-named group */
5380 if (scan_env
.num_named
> 0 &&
5381 IS_SYNTAX_BV(scan_env
.syntax
, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
) &&
5382 !ONIG_IS_OPTION_ON(reg
->options
, ONIG_OPTION_CAPTURE_GROUP
)) {
5383 if (scan_env
.num_named
!= scan_env
.num_mem
)
5384 r
= disable_noname_group_capture(&root
, reg
, &scan_env
);
5386 r
= numbered_ref_check(root
);
5388 if (r
!= 0) goto err
;
5392 #ifdef USE_SUBEXP_CALL
5393 if (scan_env
.num_call
> 0) {
5394 r
= unset_addr_list_init(&uslist
, scan_env
.num_call
);
5395 if (r
!= 0) goto err
;
5396 scan_env
.unset_addr_list
= &uslist
;
5397 r
= setup_subexp_call(root
, &scan_env
);
5398 if (r
!= 0) goto err_unset
;
5399 r
= subexp_recursive_check_trav(root
, &scan_env
);
5400 if (r
< 0) goto err_unset
;
5401 r
= subexp_inf_recursive_check_trav(root
, &scan_env
);
5402 if (r
!= 0) goto err_unset
;
5404 reg
->num_call
= scan_env
.num_call
;
5410 r
= setup_tree(root
, reg
, 0, &scan_env
);
5411 if (r
!= 0) goto err_unset
;
5413 #ifdef ONIG_DEBUG_PARSE_TREE
5414 print_tree(stderr
, root
);
5417 reg
->capture_history
= scan_env
.capture_history
;
5418 reg
->bt_mem_start
= scan_env
.bt_mem_start
;
5419 reg
->bt_mem_start
|= reg
->capture_history
;
5420 if (IS_FIND_CONDITION(reg
->options
))
5421 BIT_STATUS_ON_ALL(reg
->bt_mem_end
);
5423 reg
->bt_mem_end
= scan_env
.bt_mem_end
;
5424 reg
->bt_mem_end
|= reg
->capture_history
;
5427 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5428 if (scan_env
.backrefed_mem
== 0
5429 #ifdef USE_SUBEXP_CALL
5430 || scan_env
.num_call
== 0
5433 setup_comb_exp_check(root
, 0, &scan_env
);
5434 #ifdef USE_SUBEXP_CALL
5435 if (scan_env
.has_recursion
!= 0) {
5436 scan_env
.num_comb_exp_check
= 0;
5440 if (scan_env
.comb_exp_max_regnum
> 0) {
5442 for (i
= 1; i
<= scan_env
.comb_exp_max_regnum
; i
++) {
5443 if (BIT_STATUS_AT(scan_env
.backrefed_mem
, i
) != 0) {
5444 scan_env
.num_comb_exp_check
= 0;
5451 reg
->num_comb_exp_check
= scan_env
.num_comb_exp_check
;
5454 clear_optimize_info(reg
);
5455 #ifndef ONIG_DONT_OPTIMIZE
5456 r
= set_optimize_info_from_tree(root
, reg
, &scan_env
);
5457 if (r
!= 0) goto err_unset
;
5460 if (IS_NOT_NULL(scan_env
.mem_nodes_dynamic
)) {
5461 xfree(scan_env
.mem_nodes_dynamic
);
5462 scan_env
.mem_nodes_dynamic
= (Node
** )NULL
;
5465 r
= compile_tree(root
, reg
);
5467 r
= add_opcode(reg
, OP_END
);
5468 #ifdef USE_SUBEXP_CALL
5469 if (scan_env
.num_call
> 0) {
5470 r
= unset_addr_list_fix(&uslist
, reg
);
5471 unset_addr_list_end(&uslist
);
5476 if ((reg
->num_repeat
!= 0) || (reg
->bt_mem_end
!= 0))
5477 reg
->stack_pop_level
= STACK_POP_LEVEL_ALL
;
5479 if (reg
->bt_mem_start
!= 0)
5480 reg
->stack_pop_level
= STACK_POP_LEVEL_MEM_START
;
5482 reg
->stack_pop_level
= STACK_POP_LEVEL_FREE
;
5485 #ifdef USE_SUBEXP_CALL
5486 else if (scan_env
.num_call
> 0) {
5487 unset_addr_list_end(&uslist
);
5490 onig_node_free(root
);
5492 #ifdef ONIG_DEBUG_COMPILE
5493 #ifdef USE_NAMED_GROUP
5494 onig_print_names(stderr
, reg
);
5496 print_compiled_byte_code_list(stderr
, reg
);
5500 reg
->state
= ONIG_STATE_NORMAL
;
5504 #ifdef USE_SUBEXP_CALL
5505 if (scan_env
.num_call
> 0) {
5506 unset_addr_list_end(&uslist
);
5510 if (IS_NOT_NULL(scan_env
.error
)) {
5511 if (IS_NOT_NULL(einfo
)) {
5512 einfo
->enc
= scan_env
.enc
;
5513 einfo
->par
= scan_env
.error
;
5514 einfo
->par_end
= scan_env
.error_end
;
5518 onig_node_free(root
);
5519 if (IS_NOT_NULL(scan_env
.mem_nodes_dynamic
))
5520 xfree(scan_env
.mem_nodes_dynamic
);
5524 #ifdef USE_RECOMPILE_API
5526 onig_recompile(regex_t
* reg
, const UChar
* pattern
, const UChar
* pattern_end
,
5527 OnigOptionType option
, OnigEncoding enc
, OnigSyntaxType
* syntax
,
5528 OnigErrorInfo
* einfo
)
5533 r
= onig_new(&new_reg
, pattern
, pattern_end
, option
, enc
, syntax
, einfo
);
5535 if (ONIG_STATE(reg
) == ONIG_STATE_NORMAL
) {
5536 onig_transfer(reg
, new_reg
);
5539 onig_chain_link_add(reg
, new_reg
);
5545 static int onig_inited
= 0;
5548 onig_alloc_init(regex_t
** reg
, OnigOptionType option
,
5549 OnigCaseFoldType case_fold_flag
,
5550 OnigEncoding enc
, OnigSyntaxType
* syntax
)
5555 if (ONIGENC_IS_UNDEF(enc
))
5556 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED
;
5558 if ((option
& (ONIG_OPTION_DONT_CAPTURE_GROUP
|ONIG_OPTION_CAPTURE_GROUP
))
5559 == (ONIG_OPTION_DONT_CAPTURE_GROUP
|ONIG_OPTION_CAPTURE_GROUP
)) {
5560 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS
;
5563 *reg
= (regex_t
* )xmalloc(sizeof(regex_t
));
5564 if (IS_NULL(*reg
)) return ONIGERR_MEMORY
;
5565 (*reg
)->state
= ONIG_STATE_MODIFY
;
5567 if ((option
& ONIG_OPTION_NEGATE_SINGLELINE
) != 0) {
5568 option
|= syntax
->options
;
5569 option
&= ~ONIG_OPTION_SINGLELINE
;
5572 option
|= syntax
->options
;
5575 (*reg
)->options
= option
;
5576 (*reg
)->syntax
= syntax
;
5577 (*reg
)->optimize
= 0;
5578 (*reg
)->exact
= (UChar
* )NULL
;
5579 (*reg
)->int_map
= (int* )NULL
;
5580 (*reg
)->int_map_backward
= (int* )NULL
;
5581 (*reg
)->chain
= (regex_t
* )NULL
;
5583 (*reg
)->p
= (UChar
* )NULL
;
5586 (*reg
)->name_table
= (void* )NULL
;
5588 (*reg
)->case_fold_flag
= case_fold_flag
;
5593 onig_new(regex_t
** reg
, const UChar
* pattern
, const UChar
* pattern_end
,
5594 OnigOptionType option
, OnigEncoding enc
, OnigSyntaxType
* syntax
,
5595 OnigErrorInfo
* einfo
)
5599 if (IS_NOT_NULL(einfo
)) einfo
->par
= (UChar
* )NULL
;
5601 r
= onig_alloc_init(reg
, option
, ONIGENC_CASE_FOLD_DEFAULT
,
5605 r
= onig_compile(*reg
, pattern
, pattern_end
, einfo
);
5616 if (onig_inited
!= 0)
5620 THREAD_ATOMIC_START
;
5625 /* onigenc_set_default_caseconv_table((UChar* )0); */
5627 #ifdef ONIG_DEBUG_STATISTICS
5628 onig_statistics_init();
5639 THREAD_ATOMIC_START
;
5641 #ifdef ONIG_DEBUG_STATISTICS
5642 onig_print_statistics(stderr
);
5645 #ifdef USE_SHARED_CCLASS_TABLE
5646 onig_free_shared_cclass_table();
5649 #ifdef USE_PARSE_TREE_NODE_RECYCLE
5650 onig_free_node_list();
5661 onig_is_in_code_range(const UChar
* p
, OnigCodePoint code
)
5663 OnigCodePoint n
, *data
;
5664 OnigCodePoint low
, high
, x
;
5666 GET_CODE_POINT(n
, p
);
5667 data
= (OnigCodePoint
* )p
;
5670 for (low
= 0, high
= n
; low
< high
; ) {
5671 x
= (low
+ high
) >> 1;
5672 if (code
> data
[x
* 2 + 1])
5678 return ((low
< n
&& code
>= data
[low
* 2]) ? 1 : 0);
5682 onig_is_code_in_cc_len(int elen
, OnigCodePoint code
, CClassNode
* cc
)
5686 if (elen
> 1 || (code
>= SINGLE_BYTE_SIZE
)) {
5687 if (IS_NULL(cc
->mbuf
)) {
5691 found
= (onig_is_in_code_range(cc
->mbuf
->p
, code
) != 0 ? 1 : 0);
5695 found
= (BITSET_AT(cc
->bs
, code
) == 0 ? 0 : 1);
5698 if (IS_NCCLASS_NOT(cc
))
5705 onig_is_code_in_cc(OnigEncoding enc
, OnigCodePoint code
, CClassNode
* cc
)
5709 if (ONIGENC_MBC_MINLEN(enc
) > 1) {
5713 len
= ONIGENC_CODE_TO_MBCLEN(enc
, code
);
5715 return onig_is_code_in_cc_len(len
, code
, cc
);
5721 /* arguments type */
5722 #define ARG_SPECIAL -1
5724 #define ARG_RELADDR 1
5725 #define ARG_ABSADDR 2
5726 #define ARG_LENGTH 3
5727 #define ARG_MEMNUM 4
5728 #define ARG_OPTION 5
5729 #define ARG_STATE_CHECK 6
5731 OnigOpInfoType OnigOpInfo
[] = {
5732 { OP_FINISH
, "finish", ARG_NON
},
5733 { OP_END
, "end", ARG_NON
},
5734 { OP_EXACT1
, "exact1", ARG_SPECIAL
},
5735 { OP_EXACT2
, "exact2", ARG_SPECIAL
},
5736 { OP_EXACT3
, "exact3", ARG_SPECIAL
},
5737 { OP_EXACT4
, "exact4", ARG_SPECIAL
},
5738 { OP_EXACT5
, "exact5", ARG_SPECIAL
},
5739 { OP_EXACTN
, "exactn", ARG_SPECIAL
},
5740 { OP_EXACTMB2N1
, "exactmb2-n1", ARG_SPECIAL
},
5741 { OP_EXACTMB2N2
, "exactmb2-n2", ARG_SPECIAL
},
5742 { OP_EXACTMB2N3
, "exactmb2-n3", ARG_SPECIAL
},
5743 { OP_EXACTMB2N
, "exactmb2-n", ARG_SPECIAL
},
5744 { OP_EXACTMB3N
, "exactmb3n" , ARG_SPECIAL
},
5745 { OP_EXACTMBN
, "exactmbn", ARG_SPECIAL
},
5746 { OP_EXACT1_IC
, "exact1-ic", ARG_SPECIAL
},
5747 { OP_EXACTN_IC
, "exactn-ic", ARG_SPECIAL
},
5748 { OP_CCLASS
, "cclass", ARG_SPECIAL
},
5749 { OP_CCLASS_MB
, "cclass-mb", ARG_SPECIAL
},
5750 { OP_CCLASS_MIX
, "cclass-mix", ARG_SPECIAL
},
5751 { OP_CCLASS_NOT
, "cclass-not", ARG_SPECIAL
},
5752 { OP_CCLASS_MB_NOT
, "cclass-mb-not", ARG_SPECIAL
},
5753 { OP_CCLASS_MIX_NOT
, "cclass-mix-not", ARG_SPECIAL
},
5754 { OP_CCLASS_NODE
, "cclass-node", ARG_SPECIAL
},
5755 { OP_ANYCHAR
, "anychar", ARG_NON
},
5756 { OP_ANYCHAR_ML
, "anychar-ml", ARG_NON
},
5757 { OP_ANYCHAR_STAR
, "anychar*", ARG_NON
},
5758 { OP_ANYCHAR_ML_STAR
, "anychar-ml*", ARG_NON
},
5759 { OP_ANYCHAR_STAR_PEEK_NEXT
, "anychar*-peek-next", ARG_SPECIAL
},
5760 { OP_ANYCHAR_ML_STAR_PEEK_NEXT
, "anychar-ml*-peek-next", ARG_SPECIAL
},
5761 { OP_WORD
, "word", ARG_NON
},
5762 { OP_NOT_WORD
, "not-word", ARG_NON
},
5763 { OP_WORD_BOUND
, "word-bound", ARG_NON
},
5764 { OP_NOT_WORD_BOUND
, "not-word-bound", ARG_NON
},
5765 { OP_WORD_BEGIN
, "word-begin", ARG_NON
},
5766 { OP_WORD_END
, "word-end", ARG_NON
},
5767 { OP_BEGIN_BUF
, "begin-buf", ARG_NON
},
5768 { OP_END_BUF
, "end-buf", ARG_NON
},
5769 { OP_BEGIN_LINE
, "begin-line", ARG_NON
},
5770 { OP_END_LINE
, "end-line", ARG_NON
},
5771 { OP_SEMI_END_BUF
, "semi-end-buf", ARG_NON
},
5772 { OP_BEGIN_POSITION
, "begin-position", ARG_NON
},
5773 { OP_BACKREF1
, "backref1", ARG_NON
},
5774 { OP_BACKREF2
, "backref2", ARG_NON
},
5775 { OP_BACKREFN
, "backrefn", ARG_MEMNUM
},
5776 { OP_BACKREFN_IC
, "backrefn-ic", ARG_SPECIAL
},
5777 { OP_BACKREF_MULTI
, "backref_multi", ARG_SPECIAL
},
5778 { OP_BACKREF_MULTI_IC
, "backref_multi-ic", ARG_SPECIAL
},
5779 { OP_BACKREF_WITH_LEVEL
, "backref_at_level", ARG_SPECIAL
},
5780 { OP_MEMORY_START_PUSH
, "mem-start-push", ARG_MEMNUM
},
5781 { OP_MEMORY_START
, "mem-start", ARG_MEMNUM
},
5782 { OP_MEMORY_END_PUSH
, "mem-end-push", ARG_MEMNUM
},
5783 { OP_MEMORY_END_PUSH_REC
, "mem-end-push-rec", ARG_MEMNUM
},
5784 { OP_MEMORY_END
, "mem-end", ARG_MEMNUM
},
5785 { OP_MEMORY_END_REC
, "mem-end-rec", ARG_MEMNUM
},
5786 { OP_SET_OPTION_PUSH
, "set-option-push", ARG_OPTION
},
5787 { OP_SET_OPTION
, "set-option", ARG_OPTION
},
5788 { OP_FAIL
, "fail", ARG_NON
},
5789 { OP_JUMP
, "jump", ARG_RELADDR
},
5790 { OP_PUSH
, "push", ARG_RELADDR
},
5791 { OP_POP
, "pop", ARG_NON
},
5792 { OP_PUSH_OR_JUMP_EXACT1
, "push-or-jump-e1", ARG_SPECIAL
},
5793 { OP_PUSH_IF_PEEK_NEXT
, "push-if-peek-next", ARG_SPECIAL
},
5794 { OP_REPEAT
, "repeat", ARG_SPECIAL
},
5795 { OP_REPEAT_NG
, "repeat-ng", ARG_SPECIAL
},
5796 { OP_REPEAT_INC
, "repeat-inc", ARG_MEMNUM
},
5797 { OP_REPEAT_INC_NG
, "repeat-inc-ng", ARG_MEMNUM
},
5798 { OP_REPEAT_INC_SG
, "repeat-inc-sg", ARG_MEMNUM
},
5799 { OP_REPEAT_INC_NG_SG
, "repeat-inc-ng-sg", ARG_MEMNUM
},
5800 { OP_NULL_CHECK_START
, "null-check-start", ARG_MEMNUM
},
5801 { OP_NULL_CHECK_END
, "null-check-end", ARG_MEMNUM
},
5802 { OP_NULL_CHECK_END_MEMST
,"null-check-end-memst", ARG_MEMNUM
},
5803 { OP_NULL_CHECK_END_MEMST_PUSH
,"null-check-end-memst-push", ARG_MEMNUM
},
5804 { OP_PUSH_POS
, "push-pos", ARG_NON
},
5805 { OP_POP_POS
, "pop-pos", ARG_NON
},
5806 { OP_PUSH_POS_NOT
, "push-pos-not", ARG_RELADDR
},
5807 { OP_FAIL_POS
, "fail-pos", ARG_NON
},
5808 { OP_PUSH_STOP_BT
, "push-stop-bt", ARG_NON
},
5809 { OP_POP_STOP_BT
, "pop-stop-bt", ARG_NON
},
5810 { OP_LOOK_BEHIND
, "look-behind", ARG_SPECIAL
},
5811 { OP_PUSH_LOOK_BEHIND_NOT
, "push-look-behind-not", ARG_SPECIAL
},
5812 { OP_FAIL_LOOK_BEHIND_NOT
, "fail-look-behind-not", ARG_NON
},
5813 { OP_CALL
, "call", ARG_ABSADDR
},
5814 { OP_RETURN
, "return", ARG_NON
},
5815 { OP_STATE_CHECK_PUSH
, "state-check-push", ARG_SPECIAL
},
5816 { OP_STATE_CHECK_PUSH_OR_JUMP
, "state-check-push-or-jump", ARG_SPECIAL
},
5817 { OP_STATE_CHECK
, "state-check", ARG_STATE_CHECK
},
5818 { OP_STATE_CHECK_ANYCHAR_STAR
, "state-check-anychar*", ARG_STATE_CHECK
},
5819 { OP_STATE_CHECK_ANYCHAR_ML_STAR
,
5820 "state-check-anychar-ml*", ARG_STATE_CHECK
},
5829 for (i
= 0; OnigOpInfo
[i
].opcode
>= 0; i
++) {
5830 if (opcode
== OnigOpInfo
[i
].opcode
)
5831 return OnigOpInfo
[i
].name
;
5837 op2arg_type(int opcode
)
5841 for (i
= 0; OnigOpInfo
[i
].opcode
>= 0; i
++) {
5842 if (opcode
== OnigOpInfo
[i
].opcode
)
5843 return OnigOpInfo
[i
].arg_type
;
5849 Indent(FILE* f
, int indent
)
5852 for (i
= 0; i
< indent
; i
++) putc(' ', f
);
5856 p_string(FILE* f
, int len
, UChar
* s
)
5859 while (len
-- > 0) { fputc(*s
++, f
); }
5863 p_len_string(FILE* f
, LengthType len
, int mb_len
, UChar
* s
)
5865 int x
= len
* mb_len
;
5867 fprintf(f
, ":%d:", len
);
5868 while (x
-- > 0) { fputc(*s
++, f
); }
5872 onig_print_compiled_byte_code(FILE* f
, UChar
* bp
, UChar
** nextp
,
5879 StateCheckNumType scn
;
5883 fprintf(f
, "[%s", op2name(*bp
));
5884 arg_type
= op2arg_type(*bp
);
5885 if (arg_type
!= ARG_SPECIAL
) {
5891 GET_RELADDR_INC(addr
, bp
);
5892 fprintf(f
, ":(%d)", addr
);
5895 GET_ABSADDR_INC(addr
, bp
);
5896 fprintf(f
, ":(%d)", addr
);
5899 GET_LENGTH_INC(len
, bp
);
5900 fprintf(f
, ":%d", len
);
5903 mem
= *((MemNumType
* )bp
);
5905 fprintf(f
, ":%d", mem
);
5909 OnigOptionType option
= *((OnigOptionType
* )bp
);
5911 fprintf(f
, ":%d", option
);
5915 case ARG_STATE_CHECK
:
5916 scn
= *((StateCheckNumType
* )bp
);
5917 bp
+= SIZE_STATE_CHECK_NUM
;
5918 fprintf(f
, ":%d", scn
);
5925 case OP_ANYCHAR_STAR_PEEK_NEXT
:
5926 case OP_ANYCHAR_ML_STAR_PEEK_NEXT
:
5927 p_string(f
, 1, bp
++); break;
5929 p_string(f
, 2, bp
); bp
+= 2; break;
5931 p_string(f
, 3, bp
); bp
+= 3; break;
5933 p_string(f
, 4, bp
); bp
+= 4; break;
5935 p_string(f
, 5, bp
); bp
+= 5; break;
5937 GET_LENGTH_INC(len
, bp
);
5938 p_len_string(f
, len
, 1, bp
);
5943 p_string(f
, 2, bp
); bp
+= 2; break;
5945 p_string(f
, 4, bp
); bp
+= 4; break;
5947 p_string(f
, 6, bp
); bp
+= 6; break;
5949 GET_LENGTH_INC(len
, bp
);
5950 p_len_string(f
, len
, 2, bp
);
5954 GET_LENGTH_INC(len
, bp
);
5955 p_len_string(f
, len
, 3, bp
);
5962 GET_LENGTH_INC(mb_len
, bp
);
5963 GET_LENGTH_INC(len
, bp
);
5964 fprintf(f
, ":%d:%d:", mb_len
, len
);
5966 while (n
-- > 0) { fputc(*bp
++, f
); }
5971 len
= enclen(enc
, bp
);
5972 p_string(f
, len
, bp
);
5976 GET_LENGTH_INC(len
, bp
);
5977 p_len_string(f
, len
, 1, bp
);
5982 n
= bitset_on_num((BitSetRef
)bp
);
5984 fprintf(f
, ":%d", n
);
5988 n
= bitset_on_num((BitSetRef
)bp
);
5990 fprintf(f
, ":%d", n
);
5994 case OP_CCLASS_MB_NOT
:
5995 GET_LENGTH_INC(len
, bp
);
5997 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6000 GET_CODE_POINT(code
, q
);
6002 fprintf(f
, ":%d:%d", (int )code
, len
);
6006 case OP_CCLASS_MIX_NOT
:
6007 n
= bitset_on_num((BitSetRef
)bp
);
6009 GET_LENGTH_INC(len
, bp
);
6011 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6014 GET_CODE_POINT(code
, q
);
6016 fprintf(f
, ":%d:%d:%d", n
, (int )code
, len
);
6019 case OP_CCLASS_NODE
:
6023 GET_POINTER_INC(cc
, bp
);
6024 n
= bitset_on_num(cc
->bs
);
6025 fprintf(f
, ":%u:%d", (unsigned int )cc
, n
);
6029 case OP_BACKREFN_IC
:
6030 mem
= *((MemNumType
* )bp
);
6032 fprintf(f
, ":%d", mem
);
6035 case OP_BACKREF_MULTI_IC
:
6036 case OP_BACKREF_MULTI
:
6038 GET_LENGTH_INC(len
, bp
);
6039 for (i
= 0; i
< len
; i
++) {
6040 GET_MEMNUM_INC(mem
, bp
);
6041 if (i
> 0) fputs(", ", f
);
6042 fprintf(f
, "%d", mem
);
6046 case OP_BACKREF_WITH_LEVEL
:
6048 OnigOptionType option
;
6051 GET_OPTION_INC(option
, bp
);
6052 fprintf(f
, ":%d", option
);
6053 GET_LENGTH_INC(level
, bp
);
6054 fprintf(f
, ":%d", level
);
6057 GET_LENGTH_INC(len
, bp
);
6058 for (i
= 0; i
< len
; i
++) {
6059 GET_MEMNUM_INC(mem
, bp
);
6060 if (i
> 0) fputs(", ", f
);
6061 fprintf(f
, "%d", mem
);
6069 mem
= *((MemNumType
* )bp
);
6071 addr
= *((RelAddrType
* )bp
);
6073 fprintf(f
, ":%d:%d", mem
, addr
);
6077 case OP_PUSH_OR_JUMP_EXACT1
:
6078 case OP_PUSH_IF_PEEK_NEXT
:
6079 addr
= *((RelAddrType
* )bp
);
6081 fprintf(f
, ":(%d)", addr
);
6086 case OP_LOOK_BEHIND
:
6087 GET_LENGTH_INC(len
, bp
);
6088 fprintf(f
, ":%d", len
);
6091 case OP_PUSH_LOOK_BEHIND_NOT
:
6092 GET_RELADDR_INC(addr
, bp
);
6093 GET_LENGTH_INC(len
, bp
);
6094 fprintf(f
, ":%d:(%d)", len
, addr
);
6097 case OP_STATE_CHECK_PUSH
:
6098 case OP_STATE_CHECK_PUSH_OR_JUMP
:
6099 scn
= *((StateCheckNumType
* )bp
);
6100 bp
+= SIZE_STATE_CHECK_NUM
;
6101 addr
= *((RelAddrType
* )bp
);
6103 fprintf(f
, ":%d:(%d)", scn
, addr
);
6107 fprintf(stderr
, "onig_print_compiled_byte_code: undefined code %d\n",
6112 if (nextp
) *nextp
= bp
;
6116 print_compiled_byte_code_list(FILE* f
, regex_t
* reg
)
6120 UChar
* end
= reg
->p
+ reg
->used
;
6122 fprintf(f
, "code length: %d\n", reg
->used
);
6133 onig_print_compiled_byte_code(f
, bp
, &bp
, reg
->enc
);
6140 print_indent_tree(FILE* f
, Node
* node
, int indent
)
6147 if (IS_NULL(node
)) {
6148 fprintf(f
, "ERROR: null node!!!\n");
6156 if (NTYPE(node
) == NT_LIST
)
6157 fprintf(f
, "<list:%x>\n", (int )node
);
6159 fprintf(f
, "<alt:%x>\n", (int )node
);
6161 print_indent_tree(f
, NCAR(node
), indent
+ add
);
6162 while (IS_NOT_NULL(node
= NCDR(node
))) {
6163 if (NTYPE(node
) != type
) {
6164 fprintf(f
, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node
));
6167 print_indent_tree(f
, NCAR(node
), indent
+ add
);
6172 fprintf(f
, "<string%s:%x>",
6173 (NSTRING_IS_RAW(node
) ? "-raw" : ""), (int )node
);
6174 for (p
= NSTR(node
)->s
; p
< NSTR(node
)->end
; p
++) {
6175 if (*p
>= 0x20 && *p
< 0x7f)
6178 fprintf(f
, " 0x%02x", *p
);
6184 fprintf(f
, "<cclass:%x>", (int )node
);
6185 if (IS_NCCLASS_NOT(NCCLASS(node
))) fputs(" not", f
);
6186 if (NCCLASS(node
)->mbuf
) {
6187 BBuf
* bbuf
= NCCLASS(node
)->mbuf
;
6188 for (i
= 0; i
< bbuf
->used
; i
++) {
6189 if (i
> 0) fprintf(f
, ",");
6190 fprintf(f
, "%0x", bbuf
->p
[i
]);
6196 fprintf(f
, "<ctype:%x> ", (int )node
);
6197 switch (NCTYPE(node
)->ctype
) {
6198 case ONIGENC_CTYPE_WORD
:
6199 if (NCTYPE(node
)->not != 0)
6200 fputs("not word", f
);
6206 fprintf(f
, "ERROR: undefined ctype.\n");
6212 fprintf(f
, "<anychar:%x>", (int )node
);
6216 fprintf(f
, "<anchor:%x> ", (int )node
);
6217 switch (NANCHOR(node
)->type
) {
6218 case ANCHOR_BEGIN_BUF
: fputs("begin buf", f
); break;
6219 case ANCHOR_END_BUF
: fputs("end buf", f
); break;
6220 case ANCHOR_BEGIN_LINE
: fputs("begin line", f
); break;
6221 case ANCHOR_END_LINE
: fputs("end line", f
); break;
6222 case ANCHOR_SEMI_END_BUF
: fputs("semi end buf", f
); break;
6223 case ANCHOR_BEGIN_POSITION
: fputs("begin position", f
); break;
6225 case ANCHOR_WORD_BOUND
: fputs("word bound", f
); break;
6226 case ANCHOR_NOT_WORD_BOUND
: fputs("not word bound", f
); break;
6227 #ifdef USE_WORD_BEGIN_END
6228 case ANCHOR_WORD_BEGIN
: fputs("word begin", f
); break;
6229 case ANCHOR_WORD_END
: fputs("word end", f
); break;
6231 case ANCHOR_PREC_READ
: fputs("prec read", f
); break;
6232 case ANCHOR_PREC_READ_NOT
: fputs("prec read not", f
); break;
6233 case ANCHOR_LOOK_BEHIND
: fputs("look_behind", f
); break;
6234 case ANCHOR_LOOK_BEHIND_NOT
: fputs("look_behind_not",f
); break;
6237 fprintf(f
, "ERROR: undefined anchor type.\n");
6245 BRefNode
* br
= NBREF(node
);
6247 fprintf(f
, "<backref:%x>", (int )node
);
6248 for (i
= 0; i
< br
->back_num
; i
++) {
6249 if (i
> 0) fputs(", ", f
);
6250 fprintf(f
, "%d", p
[i
]);
6255 #ifdef USE_SUBEXP_CALL
6258 CallNode
* cn
= NCALL(node
);
6259 fprintf(f
, "<call:%x>", (int )node
);
6260 p_string(f
, cn
->name_end
- cn
->name
, cn
->name
);
6266 fprintf(f
, "<quantifier:%x>{%d,%d}%s\n", (int )node
,
6267 NQTFR(node
)->lower
, NQTFR(node
)->upper
,
6268 (NQTFR(node
)->greedy
? "" : "?"));
6269 print_indent_tree(f
, NQTFR(node
)->target
, indent
+ add
);
6273 fprintf(f
, "<enclose:%x> ", (int )node
);
6274 switch (NENCLOSE(node
)->type
) {
6275 case ENCLOSE_OPTION
:
6276 fprintf(f
, "option:%d\n", NENCLOSE(node
)->option
);
6277 print_indent_tree(f
, NENCLOSE(node
)->target
, indent
+ add
);
6279 case ENCLOSE_MEMORY
:
6280 fprintf(f
, "memory:%d", NENCLOSE(node
)->regnum
);
6282 case ENCLOSE_STOP_BACKTRACK
:
6283 fprintf(f
, "stop-bt");
6290 print_indent_tree(f
, NENCLOSE(node
)->target
, indent
+ add
);
6294 fprintf(f
, "print_indent_tree: undefined node type %d\n", NTYPE(node
));
6298 if (type
!= NT_LIST
&& type
!= NT_ALT
&& type
!= NT_QTFR
&&
6303 #endif /* ONIG_DEBUG */
6305 #ifdef ONIG_DEBUG_PARSE_TREE
6307 print_tree(FILE* f
, Node
* node
)
6309 print_indent_tree(f
, node
, 0);