1 /**********************************************************************
2 regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
3 **********************************************************************/
5 * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 OnigCaseFoldType OnigDefaultCaseFoldFlag
= ONIGENC_CASE_FOLD_MIN
;
35 extern OnigCaseFoldType
36 onig_get_default_case_fold_flag(void)
38 return OnigDefaultCaseFoldFlag
;
42 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag
)
44 OnigDefaultCaseFoldFlag
= case_fold_flag
;
49 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50 static unsigned char PadBuf
[WORD_ALIGNMENT_SIZE
];
55 str_dup(UChar
* s
, UChar
* end
)
57 ptrdiff_t len
= end
- s
;
60 UChar
* r
= (UChar
* )xmalloc(len
+ 1);
71 swap_node(Node
* a
, Node
* b
)
74 c
= *a
; *a
= *b
; *b
= c
;
76 if (NTYPE(a
) == NT_STR
) {
77 StrNode
* sn
= NSTR(a
);
79 size_t len
= sn
->end
- sn
->s
;
81 sn
->end
= sn
->s
+ len
;
85 if (NTYPE(b
) == NT_STR
) {
86 StrNode
* sn
= NSTR(b
);
88 size_t len
= sn
->end
- sn
->s
;
90 sn
->end
= sn
->s
+ len
;
96 distance_add(OnigDistance d1
, OnigDistance d2
)
98 if (d1
== ONIG_INFINITE_DISTANCE
|| d2
== ONIG_INFINITE_DISTANCE
)
99 return ONIG_INFINITE_DISTANCE
;
101 if (d1
<= ONIG_INFINITE_DISTANCE
- d2
) return d1
+ d2
;
102 else return ONIG_INFINITE_DISTANCE
;
107 distance_multiply(OnigDistance d
, int m
)
109 if (m
== 0) return 0;
111 if (d
< ONIG_INFINITE_DISTANCE
/ m
)
114 return ONIG_INFINITE_DISTANCE
;
118 bitset_is_empty(BitSetRef bs
)
121 for (i
= 0; i
< BITSET_SIZE
; i
++) {
122 if (bs
[i
] != 0) return 0;
129 bitset_on_num(BitSetRef bs
)
134 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
135 if (BITSET_AT(bs
, i
)) n
++;
141 // Attempt to right size allocated buffers for a regex post compile
143 onig_reg_resize(regex_t
*reg
)
146 if (reg
->alloc
> reg
->used
) {
147 unsigned char *new_ptr
= xrealloc(reg
->p
, reg
->used
);
148 // Skip the right size optimization if memory allocation fails
150 reg
->alloc
= reg
->used
;
161 onig_bbuf_init(BBuf
* buf
, OnigDistance size
)
168 buf
->p
= (UChar
* )xmalloc(size
);
169 if (IS_NULL(buf
->p
)) return(ONIGERR_MEMORY
);
172 buf
->alloc
= (unsigned int )size
;
178 #ifdef USE_SUBEXP_CALL
181 unset_addr_list_init(UnsetAddrList
* uslist
, int size
)
185 p
= (UnsetAddr
* )xmalloc(sizeof(UnsetAddr
)* size
);
186 CHECK_NULL_RETURN_MEMERR(p
);
188 uslist
->alloc
= size
;
194 unset_addr_list_end(UnsetAddrList
* uslist
)
196 if (IS_NOT_NULL(uslist
->us
))
201 unset_addr_list_add(UnsetAddrList
* uslist
, int offset
, struct _Node
* node
)
206 if (uslist
->num
>= uslist
->alloc
) {
207 size
= uslist
->alloc
* 2;
208 p
= (UnsetAddr
* )xrealloc(uslist
->us
, sizeof(UnsetAddr
) * size
);
209 CHECK_NULL_RETURN_MEMERR(p
);
210 uslist
->alloc
= size
;
214 uslist
->us
[uslist
->num
].offset
= offset
;
215 uslist
->us
[uslist
->num
].target
= node
;
219 #endif /* USE_SUBEXP_CALL */
223 add_opcode(regex_t
* reg
, int opcode
)
225 BBUF_ADD1(reg
, opcode
);
229 #ifdef USE_COMBINATION_EXPLOSION_CHECK
231 add_state_check_num(regex_t
* reg
, int num
)
233 StateCheckNumType n
= (StateCheckNumType
)num
;
235 BBUF_ADD(reg
, &n
, SIZE_STATE_CHECK_NUM
);
241 add_rel_addr(regex_t
* reg
, int addr
)
243 RelAddrType ra
= (RelAddrType
)addr
;
245 BBUF_ADD(reg
, &ra
, SIZE_RELADDR
);
250 add_abs_addr(regex_t
* reg
, int addr
)
252 AbsAddrType ra
= (AbsAddrType
)addr
;
254 BBUF_ADD(reg
, &ra
, SIZE_ABSADDR
);
259 add_length(regex_t
* reg
, OnigDistance len
)
261 LengthType l
= (LengthType
)len
;
263 BBUF_ADD(reg
, &l
, SIZE_LENGTH
);
268 add_mem_num(regex_t
* reg
, int num
)
270 MemNumType n
= (MemNumType
)num
;
272 BBUF_ADD(reg
, &n
, SIZE_MEMNUM
);
278 add_pointer(regex_t
* reg
, void* addr
)
280 PointerType ptr
= (PointerType
)addr
;
282 BBUF_ADD(reg
, &ptr
, SIZE_POINTER
);
288 add_option(regex_t
* reg
, OnigOptionType option
)
290 BBUF_ADD(reg
, &option
, SIZE_OPTION
);
295 add_opcode_rel_addr(regex_t
* reg
, int opcode
, int addr
)
299 r
= add_opcode(reg
, opcode
);
301 r
= add_rel_addr(reg
, addr
);
306 add_bytes(regex_t
* reg
, UChar
* bytes
, OnigDistance len
)
308 BBUF_ADD(reg
, bytes
, len
);
313 add_bitset(regex_t
* reg
, BitSetRef bs
)
315 BBUF_ADD(reg
, bs
, SIZE_BITSET
);
320 add_opcode_option(regex_t
* reg
, int opcode
, OnigOptionType option
)
324 r
= add_opcode(reg
, opcode
);
326 r
= add_option(reg
, option
);
330 static int compile_length_tree(Node
* node
, regex_t
* reg
);
331 static int compile_tree(Node
* node
, regex_t
* reg
);
334 #define IS_NEED_STR_LEN_OP_EXACT(op) \
335 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
336 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
339 select_str_opcode(int mb_len
, OnigDistance byte_len
, int ignore_case
)
342 OnigDistance str_len
= (byte_len
+ mb_len
- 1) / mb_len
;
346 case 1: op
= OP_EXACT1_IC
; break;
347 default: op
= OP_EXACTN_IC
; break;
354 case 1: op
= OP_EXACT1
; break;
355 case 2: op
= OP_EXACT2
; break;
356 case 3: op
= OP_EXACT3
; break;
357 case 4: op
= OP_EXACT4
; break;
358 case 5: op
= OP_EXACT5
; break;
359 default: op
= OP_EXACTN
; break;
365 case 1: op
= OP_EXACTMB2N1
; break;
366 case 2: op
= OP_EXACTMB2N2
; break;
367 case 3: op
= OP_EXACTMB2N3
; break;
368 default: op
= OP_EXACTMB2N
; break;
385 compile_tree_empty_check(Node
* node
, regex_t
* reg
, int empty_info
)
388 int saved_num_null_check
= reg
->num_null_check
;
390 if (empty_info
!= 0) {
391 r
= add_opcode(reg
, OP_NULL_CHECK_START
);
393 r
= add_mem_num(reg
, reg
->num_null_check
); /* NULL CHECK ID */
395 reg
->num_null_check
++;
398 r
= compile_tree(node
, reg
);
401 if (empty_info
!= 0) {
402 if (empty_info
== NQ_TARGET_IS_EMPTY
)
403 r
= add_opcode(reg
, OP_NULL_CHECK_END
);
404 else if (empty_info
== NQ_TARGET_IS_EMPTY_MEM
)
405 r
= add_opcode(reg
, OP_NULL_CHECK_END_MEMST
);
406 else if (empty_info
== NQ_TARGET_IS_EMPTY_REC
)
407 r
= add_opcode(reg
, OP_NULL_CHECK_END_MEMST_PUSH
);
410 r
= add_mem_num(reg
, saved_num_null_check
); /* NULL CHECK ID */
415 #ifdef USE_SUBEXP_CALL
417 compile_call(CallNode
* node
, regex_t
* reg
)
421 r
= add_opcode(reg
, OP_CALL
);
423 r
= unset_addr_list_add(node
->unset_addr_list
, BBUF_GET_OFFSET_POS(reg
),
426 r
= add_abs_addr(reg
, 0 /*dummy addr.*/);
432 compile_tree_n_times(Node
* node
, int n
, regex_t
* reg
)
436 for (i
= 0; i
< n
; i
++) {
437 r
= compile_tree(node
, reg
);
444 add_compile_string_length(UChar
* s ARG_UNUSED
, int mb_len
, OnigDistance byte_len
,
445 regex_t
* reg ARG_UNUSED
, int ignore_case
)
448 int op
= select_str_opcode(mb_len
, byte_len
, ignore_case
);
452 if (op
== OP_EXACTMBN
) len
+= SIZE_LENGTH
;
453 if (IS_NEED_STR_LEN_OP_EXACT(op
))
456 len
+= (int )byte_len
;
461 add_compile_string(UChar
* s
, int mb_len
, OnigDistance byte_len
,
462 regex_t
* reg
, int ignore_case
)
464 int op
= select_str_opcode(mb_len
, byte_len
, ignore_case
);
467 if (op
== OP_EXACTMBN
)
468 add_length(reg
, mb_len
);
470 if (IS_NEED_STR_LEN_OP_EXACT(op
)) {
471 if (op
== OP_EXACTN_IC
)
472 add_length(reg
, byte_len
);
474 add_length(reg
, byte_len
/ mb_len
);
477 add_bytes(reg
, s
, byte_len
);
483 compile_length_string_node(Node
* node
, regex_t
* reg
)
485 int rlen
, r
, len
, prev_len
, blen
, ambig
;
486 OnigEncoding enc
= reg
->enc
;
491 if (sn
->end
<= sn
->s
)
494 ambig
= NSTRING_IS_AMBIG(node
);
497 prev_len
= enclen(enc
, p
, sn
->end
);
502 for (; p
< sn
->end
; ) {
503 len
= enclen(enc
, p
, sn
->end
);
504 if (len
== prev_len
|| ambig
) {
508 r
= add_compile_string_length(prev
, prev_len
, blen
, reg
, ambig
);
516 r
= add_compile_string_length(prev
, prev_len
, blen
, reg
, ambig
);
522 compile_length_string_raw_node(StrNode
* sn
, regex_t
* reg
)
524 if (sn
->end
<= sn
->s
)
527 return add_compile_string_length(sn
->s
, 1 /* sb */, sn
->end
- sn
->s
, reg
, 0);
531 compile_string_node(Node
* node
, regex_t
* reg
)
533 int r
, len
, prev_len
, blen
, ambig
;
534 OnigEncoding enc
= reg
->enc
;
535 UChar
*p
, *prev
, *end
;
539 if (sn
->end
<= sn
->s
)
543 ambig
= NSTRING_IS_AMBIG(node
);
546 prev_len
= enclen(enc
, p
, end
);
551 len
= enclen(enc
, p
, end
);
552 if (len
== prev_len
|| ambig
) {
556 r
= add_compile_string(prev
, prev_len
, blen
, reg
, ambig
);
566 return add_compile_string(prev
, prev_len
, blen
, reg
, ambig
);
570 compile_string_raw_node(StrNode
* sn
, regex_t
* reg
)
572 if (sn
->end
<= sn
->s
)
575 return add_compile_string(sn
->s
, 1 /* sb */, sn
->end
- sn
->s
, reg
, 0);
579 add_multi_byte_cclass(BBuf
* mbuf
, regex_t
* reg
)
581 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
582 add_length(reg
, mbuf
->used
);
583 return add_bytes(reg
, mbuf
->p
, mbuf
->used
);
586 UChar
* p
= BBUF_GET_ADD_ADDRESS(reg
) + SIZE_LENGTH
;
588 GET_ALIGNMENT_PAD_SIZE(p
, pad_size
);
589 add_length(reg
, mbuf
->used
+ (WORD_ALIGNMENT_SIZE
- 1));
590 if (pad_size
!= 0) add_bytes(reg
, PadBuf
, pad_size
);
592 r
= add_bytes(reg
, mbuf
->p
, mbuf
->used
);
594 /* padding for return value from compile_length_cclass_node() to be fix. */
595 pad_size
= (WORD_ALIGNMENT_SIZE
- 1) - pad_size
;
596 if (pad_size
!= 0) add_bytes(reg
, PadBuf
, pad_size
);
602 compile_length_cclass_node(CClassNode
* cc
, regex_t
* reg
)
606 if (IS_NULL(cc
->mbuf
)) {
607 len
= SIZE_OPCODE
+ SIZE_BITSET
;
610 if (ONIGENC_MBC_MINLEN(reg
->enc
) > 1 || bitset_is_empty(cc
->bs
)) {
614 len
= SIZE_OPCODE
+ SIZE_BITSET
;
616 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
617 len
+= SIZE_LENGTH
+ cc
->mbuf
->used
;
619 len
+= SIZE_LENGTH
+ cc
->mbuf
->used
+ (WORD_ALIGNMENT_SIZE
- 1);
627 compile_cclass_node(CClassNode
* cc
, regex_t
* reg
)
631 if (IS_NULL(cc
->mbuf
)) {
632 if (IS_NCCLASS_NOT(cc
))
633 add_opcode(reg
, OP_CCLASS_NOT
);
635 add_opcode(reg
, OP_CCLASS
);
637 r
= add_bitset(reg
, cc
->bs
);
640 if (ONIGENC_MBC_MINLEN(reg
->enc
) > 1 || bitset_is_empty(cc
->bs
)) {
641 if (IS_NCCLASS_NOT(cc
))
642 add_opcode(reg
, OP_CCLASS_MB_NOT
);
644 add_opcode(reg
, OP_CCLASS_MB
);
646 r
= add_multi_byte_cclass(cc
->mbuf
, reg
);
649 if (IS_NCCLASS_NOT(cc
))
650 add_opcode(reg
, OP_CCLASS_MIX_NOT
);
652 add_opcode(reg
, OP_CCLASS_MIX
);
654 r
= add_bitset(reg
, cc
->bs
);
656 r
= add_multi_byte_cclass(cc
->mbuf
, reg
);
664 entry_repeat_range(regex_t
* reg
, int id
, int lower
, int upper
)
666 #define REPEAT_RANGE_ALLOC 4
670 if (reg
->repeat_range_alloc
== 0) {
671 p
= (OnigRepeatRange
* )xmalloc(sizeof(OnigRepeatRange
) * REPEAT_RANGE_ALLOC
);
672 CHECK_NULL_RETURN_MEMERR(p
);
673 reg
->repeat_range
= p
;
674 reg
->repeat_range_alloc
= REPEAT_RANGE_ALLOC
;
676 else if (reg
->repeat_range_alloc
<= id
) {
678 n
= reg
->repeat_range_alloc
+ REPEAT_RANGE_ALLOC
;
679 p
= (OnigRepeatRange
* )xrealloc(reg
->repeat_range
,
680 sizeof(OnigRepeatRange
) * n
);
681 CHECK_NULL_RETURN_MEMERR(p
);
682 reg
->repeat_range
= p
;
683 reg
->repeat_range_alloc
= n
;
686 p
= reg
->repeat_range
;
690 p
[id
].upper
= (IS_REPEAT_INFINITE(upper
) ? 0x7fffffff : upper
);
695 compile_range_repeat_node(QtfrNode
* qn
, int target_len
, int empty_info
,
699 int num_repeat
= reg
->num_repeat
;
701 r
= add_opcode(reg
, qn
->greedy
? OP_REPEAT
: OP_REPEAT_NG
);
703 r
= add_mem_num(reg
, num_repeat
); /* OP_REPEAT ID */
706 r
= add_rel_addr(reg
, target_len
+ SIZE_OP_REPEAT_INC
);
709 r
= entry_repeat_range(reg
, num_repeat
, qn
->lower
, qn
->upper
);
712 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
716 #ifdef USE_SUBEXP_CALL
719 IS_QUANTIFIER_IN_REPEAT(qn
)) {
720 r
= add_opcode(reg
, qn
->greedy
? OP_REPEAT_INC_SG
: OP_REPEAT_INC_NG_SG
);
723 r
= add_opcode(reg
, qn
->greedy
? OP_REPEAT_INC
: OP_REPEAT_INC_NG
);
726 r
= add_mem_num(reg
, num_repeat
); /* OP_REPEAT ID */
731 is_anychar_star_quantifier(QtfrNode
* qn
)
733 if (qn
->greedy
&& IS_REPEAT_INFINITE(qn
->upper
) &&
734 NTYPE(qn
->target
) == NT_CANY
)
740 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
741 #define CKN_ON (ckn > 0)
743 #ifdef USE_COMBINATION_EXPLOSION_CHECK
746 compile_length_quantifier_node(QtfrNode
* qn
, regex_t
* reg
)
748 int len
, mod_tlen
, cklen
;
750 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
751 int empty_info
= qn
->target_empty_info
;
752 int tlen
= compile_length_tree(qn
->target
, reg
);
754 if (tlen
< 0) return tlen
;
756 ckn
= ((reg
->num_comb_exp_check
> 0) ? qn
->comb_exp_check_num
: 0);
758 cklen
= (CKN_ON
? SIZE_STATE_CHECK_NUM
: 0);
761 if (NTYPE(qn
->target
) == NT_CANY
) {
762 if (qn
->greedy
&& infinite
) {
763 if (IS_NOT_NULL(qn
->next_head_exact
) && !CKN_ON
)
764 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT
+ tlen
* qn
->lower
+ cklen
;
766 return SIZE_OP_ANYCHAR_STAR
+ tlen
* qn
->lower
+ cklen
;
771 mod_tlen
= tlen
+ (SIZE_OP_NULL_CHECK_START
+ SIZE_OP_NULL_CHECK_END
);
775 if (infinite
&& qn
->lower
<= 1) {
782 len
+= SIZE_OP_PUSH
+ cklen
+ mod_tlen
+ SIZE_OP_JUMP
;
790 len
+= mod_tlen
+ SIZE_OP_PUSH
+ cklen
;
793 else if (qn
->upper
== 0) {
794 if (qn
->is_referred
!= 0) /* /(?<n>..){0}/ */
795 len
= SIZE_OP_JUMP
+ tlen
;
799 else if (qn
->upper
== 1 && qn
->greedy
) {
800 if (qn
->lower
== 0) {
802 len
= SIZE_OP_STATE_CHECK_PUSH
+ tlen
;
805 len
= SIZE_OP_PUSH
+ tlen
;
812 else if (!qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
813 len
= SIZE_OP_PUSH
+ cklen
+ SIZE_OP_JUMP
+ tlen
;
816 len
= SIZE_OP_REPEAT_INC
817 + mod_tlen
+ SIZE_OPCODE
+ SIZE_RELADDR
+ SIZE_MEMNUM
;
819 len
+= SIZE_OP_STATE_CHECK
;
826 compile_quantifier_node(QtfrNode
* qn
, regex_t
* reg
)
830 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
831 int empty_info
= qn
->target_empty_info
;
832 int tlen
= compile_length_tree(qn
->target
, reg
);
834 if (tlen
< 0) return tlen
;
836 ckn
= ((reg
->num_comb_exp_check
> 0) ? qn
->comb_exp_check_num
: 0);
838 if (is_anychar_star_quantifier(qn
)) {
839 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
841 if (IS_NOT_NULL(qn
->next_head_exact
) && !CKN_ON
) {
842 if (IS_MULTILINE(reg
->options
))
843 r
= add_opcode(reg
, OP_ANYCHAR_ML_STAR_PEEK_NEXT
);
845 r
= add_opcode(reg
, OP_ANYCHAR_STAR_PEEK_NEXT
);
848 r
= add_state_check_num(reg
, ckn
);
852 return add_bytes(reg
, NSTR(qn
->next_head_exact
)->s
, 1);
855 if (IS_MULTILINE(reg
->options
)) {
856 r
= add_opcode(reg
, (CKN_ON
?
857 OP_STATE_CHECK_ANYCHAR_ML_STAR
858 : OP_ANYCHAR_ML_STAR
));
861 r
= add_opcode(reg
, (CKN_ON
?
862 OP_STATE_CHECK_ANYCHAR_STAR
867 r
= add_state_check_num(reg
, ckn
);
874 mod_tlen
= tlen
+ (SIZE_OP_NULL_CHECK_START
+ SIZE_OP_NULL_CHECK_END
);
878 if (infinite
&& qn
->lower
<= 1) {
880 if (qn
->lower
== 1) {
881 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
882 (CKN_ON
? SIZE_OP_STATE_CHECK_PUSH
: SIZE_OP_PUSH
));
887 r
= add_opcode(reg
, OP_STATE_CHECK_PUSH
);
889 r
= add_state_check_num(reg
, ckn
);
891 r
= add_rel_addr(reg
, mod_tlen
+ SIZE_OP_JUMP
);
894 r
= add_opcode_rel_addr(reg
, OP_PUSH
, mod_tlen
+ SIZE_OP_JUMP
);
897 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
899 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
900 -(mod_tlen
+ (int )SIZE_OP_JUMP
901 + (int )(CKN_ON
? SIZE_OP_STATE_CHECK_PUSH
: SIZE_OP_PUSH
)));
904 if (qn
->lower
== 0) {
905 r
= add_opcode_rel_addr(reg
, OP_JUMP
, mod_tlen
);
908 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
911 r
= add_opcode(reg
, OP_STATE_CHECK_PUSH_OR_JUMP
);
913 r
= add_state_check_num(reg
, ckn
);
915 r
= add_rel_addr(reg
,
916 -(mod_tlen
+ (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP
));
919 r
= add_opcode_rel_addr(reg
, OP_PUSH
, -(mod_tlen
+ (int )SIZE_OP_PUSH
));
922 else if (qn
->upper
== 0) {
923 if (qn
->is_referred
!= 0) { /* /(?<n>..){0}/ */
924 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
926 r
= compile_tree(qn
->target
, reg
);
931 else if (qn
->upper
== 1 && qn
->greedy
) {
932 if (qn
->lower
== 0) {
934 r
= add_opcode(reg
, OP_STATE_CHECK_PUSH
);
936 r
= add_state_check_num(reg
, ckn
);
938 r
= add_rel_addr(reg
, tlen
);
941 r
= add_opcode_rel_addr(reg
, OP_PUSH
, tlen
);
946 r
= compile_tree(qn
->target
, reg
);
948 else if (!qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
950 r
= add_opcode(reg
, OP_STATE_CHECK_PUSH
);
952 r
= add_state_check_num(reg
, ckn
);
954 r
= add_rel_addr(reg
, SIZE_OP_JUMP
);
957 r
= add_opcode_rel_addr(reg
, OP_PUSH
, SIZE_OP_JUMP
);
961 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
963 r
= compile_tree(qn
->target
, reg
);
966 r
= compile_range_repeat_node(qn
, mod_tlen
, empty_info
, reg
);
969 r
= add_opcode(reg
, OP_STATE_CHECK
);
971 r
= add_state_check_num(reg
, ckn
);
977 #else /* USE_COMBINATION_EXPLOSION_CHECK */
980 compile_length_quantifier_node(QtfrNode
* qn
, regex_t
* reg
)
983 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
984 int empty_info
= qn
->target_empty_info
;
985 int tlen
= compile_length_tree(qn
->target
, reg
);
987 if (tlen
< 0) return tlen
;
990 if (NTYPE(qn
->target
) == NT_CANY
) {
991 if (qn
->greedy
&& infinite
) {
992 if (IS_NOT_NULL(qn
->next_head_exact
))
993 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT
+ tlen
* qn
->lower
;
995 return SIZE_OP_ANYCHAR_STAR
+ tlen
* qn
->lower
;
1000 mod_tlen
= tlen
+ (SIZE_OP_NULL_CHECK_START
+ SIZE_OP_NULL_CHECK_END
);
1005 (qn
->lower
<= 1 || tlen
* qn
->lower
<= QUANTIFIER_EXPAND_LIMIT_SIZE
)) {
1006 if (qn
->lower
== 1 && tlen
> QUANTIFIER_EXPAND_LIMIT_SIZE
) {
1010 len
= tlen
* qn
->lower
;
1014 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
1015 if (IS_NOT_NULL(qn
->head_exact
))
1016 len
+= SIZE_OP_PUSH_OR_JUMP_EXACT1
+ mod_tlen
+ SIZE_OP_JUMP
;
1019 if (IS_NOT_NULL(qn
->next_head_exact
))
1020 len
+= SIZE_OP_PUSH_IF_PEEK_NEXT
+ mod_tlen
+ SIZE_OP_JUMP
;
1022 len
+= SIZE_OP_PUSH
+ mod_tlen
+ SIZE_OP_JUMP
;
1025 len
+= SIZE_OP_JUMP
+ mod_tlen
+ SIZE_OP_PUSH
;
1027 else if (qn
->upper
== 0 && qn
->is_referred
!= 0) { /* /(?<n>..){0}/ */
1028 len
= SIZE_OP_JUMP
+ tlen
;
1030 else if (!infinite
&& qn
->greedy
&&
1031 (qn
->upper
== 1 || (tlen
+ SIZE_OP_PUSH
) * qn
->upper
1032 <= QUANTIFIER_EXPAND_LIMIT_SIZE
)) {
1033 len
= tlen
* qn
->lower
;
1034 len
+= (SIZE_OP_PUSH
+ tlen
) * (qn
->upper
- qn
->lower
);
1036 else if (!qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
1037 len
= SIZE_OP_PUSH
+ SIZE_OP_JUMP
+ tlen
;
1040 len
= SIZE_OP_REPEAT_INC
1041 + mod_tlen
+ SIZE_OPCODE
+ SIZE_RELADDR
+ SIZE_MEMNUM
;
1048 compile_quantifier_node(QtfrNode
* qn
, regex_t
* reg
)
1051 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
1052 int empty_info
= qn
->target_empty_info
;
1053 int tlen
= compile_length_tree(qn
->target
, reg
);
1055 if (tlen
< 0) return tlen
;
1057 if (is_anychar_star_quantifier(qn
)) {
1058 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
1060 if (IS_NOT_NULL(qn
->next_head_exact
)) {
1061 if (IS_MULTILINE(reg
->options
))
1062 r
= add_opcode(reg
, OP_ANYCHAR_ML_STAR_PEEK_NEXT
);
1064 r
= add_opcode(reg
, OP_ANYCHAR_STAR_PEEK_NEXT
);
1066 return add_bytes(reg
, NSTR(qn
->next_head_exact
)->s
, 1);
1069 if (IS_MULTILINE(reg
->options
))
1070 return add_opcode(reg
, OP_ANYCHAR_ML_STAR
);
1072 return add_opcode(reg
, OP_ANYCHAR_STAR
);
1076 if (empty_info
!= 0)
1077 mod_tlen
= tlen
+ (SIZE_OP_NULL_CHECK_START
+ SIZE_OP_NULL_CHECK_END
);
1082 (qn
->lower
<= 1 || tlen
* qn
->lower
<= QUANTIFIER_EXPAND_LIMIT_SIZE
)) {
1083 if (qn
->lower
== 1 && tlen
> QUANTIFIER_EXPAND_LIMIT_SIZE
) {
1085 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
1086 if (IS_NOT_NULL(qn
->head_exact
))
1087 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_PUSH_OR_JUMP_EXACT1
);
1090 if (IS_NOT_NULL(qn
->next_head_exact
))
1091 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_PUSH_IF_PEEK_NEXT
);
1093 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_PUSH
);
1096 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_JUMP
);
1101 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
1106 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
1107 if (IS_NOT_NULL(qn
->head_exact
)) {
1108 r
= add_opcode_rel_addr(reg
, OP_PUSH_OR_JUMP_EXACT1
,
1109 mod_tlen
+ SIZE_OP_JUMP
);
1111 add_bytes(reg
, NSTR(qn
->head_exact
)->s
, 1);
1112 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
1114 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
1115 -(mod_tlen
+ (int )SIZE_OP_JUMP
+ (int )SIZE_OP_PUSH_OR_JUMP_EXACT1
));
1119 if (IS_NOT_NULL(qn
->next_head_exact
)) {
1120 r
= add_opcode_rel_addr(reg
, OP_PUSH_IF_PEEK_NEXT
,
1121 mod_tlen
+ SIZE_OP_JUMP
);
1123 add_bytes(reg
, NSTR(qn
->next_head_exact
)->s
, 1);
1124 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
1126 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
1127 -(mod_tlen
+ (int )SIZE_OP_JUMP
+ (int )SIZE_OP_PUSH_IF_PEEK_NEXT
));
1130 r
= add_opcode_rel_addr(reg
, OP_PUSH
, mod_tlen
+ SIZE_OP_JUMP
);
1132 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
1134 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
1135 -(mod_tlen
+ (int )SIZE_OP_JUMP
+ (int )SIZE_OP_PUSH
));
1139 r
= add_opcode_rel_addr(reg
, OP_JUMP
, mod_tlen
);
1141 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
1143 r
= add_opcode_rel_addr(reg
, OP_PUSH
, -(mod_tlen
+ (int )SIZE_OP_PUSH
));
1146 else if (qn
->upper
== 0 && qn
->is_referred
!= 0) { /* /(?<n>..){0}/ */
1147 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
1149 r
= compile_tree(qn
->target
, reg
);
1151 else if (!infinite
&& qn
->greedy
&&
1152 (qn
->upper
== 1 || (tlen
+ SIZE_OP_PUSH
) * qn
->upper
1153 <= QUANTIFIER_EXPAND_LIMIT_SIZE
)) {
1154 int n
= qn
->upper
- qn
->lower
;
1156 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
1159 for (i
= 0; i
< n
; i
++) {
1160 r
= add_opcode_rel_addr(reg
, OP_PUSH
,
1161 (n
- i
) * tlen
+ (n
- i
- 1) * SIZE_OP_PUSH
);
1163 r
= compile_tree(qn
->target
, reg
);
1167 else if (!qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
1168 r
= add_opcode_rel_addr(reg
, OP_PUSH
, SIZE_OP_JUMP
);
1170 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
1172 r
= compile_tree(qn
->target
, reg
);
1175 r
= compile_range_repeat_node(qn
, mod_tlen
, empty_info
, reg
);
1179 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1182 compile_length_option_node(EncloseNode
* node
, regex_t
* reg
)
1185 OnigOptionType prev
= reg
->options
;
1187 reg
->options
= node
->option
;
1188 tlen
= compile_length_tree(node
->target
, reg
);
1189 reg
->options
= prev
;
1191 if (tlen
< 0) return tlen
;
1193 if (IS_DYNAMIC_OPTION(prev
^ node
->option
)) {
1194 return SIZE_OP_SET_OPTION_PUSH
+ SIZE_OP_SET_OPTION
+ SIZE_OP_FAIL
1195 + tlen
+ SIZE_OP_SET_OPTION
;
1202 compile_option_node(EncloseNode
* node
, regex_t
* reg
)
1205 OnigOptionType prev
= reg
->options
;
1207 if (IS_DYNAMIC_OPTION(prev
^ node
->option
)) {
1208 r
= add_opcode_option(reg
, OP_SET_OPTION_PUSH
, node
->option
);
1210 r
= add_opcode_option(reg
, OP_SET_OPTION
, prev
);
1212 r
= add_opcode(reg
, OP_FAIL
);
1216 reg
->options
= node
->option
;
1217 r
= compile_tree(node
->target
, reg
);
1218 reg
->options
= prev
;
1220 if (IS_DYNAMIC_OPTION(prev
^ node
->option
)) {
1222 r
= add_opcode_option(reg
, OP_SET_OPTION
, prev
);
1228 compile_length_enclose_node(EncloseNode
* node
, regex_t
* reg
)
1233 if (node
->type
== ENCLOSE_OPTION
)
1234 return compile_length_option_node(node
, reg
);
1237 tlen
= compile_length_tree(node
->target
, reg
);
1238 if (tlen
< 0) return tlen
;
1243 switch (node
->type
) {
1244 case ENCLOSE_MEMORY
:
1245 #ifdef USE_SUBEXP_CALL
1246 if (IS_ENCLOSE_CALLED(node
)) {
1247 len
= SIZE_OP_MEMORY_START_PUSH
+ tlen
1248 + SIZE_OP_CALL
+ SIZE_OP_JUMP
+ SIZE_OP_RETURN
;
1249 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1250 len
+= (IS_ENCLOSE_RECURSION(node
)
1251 ? SIZE_OP_MEMORY_END_PUSH_REC
: SIZE_OP_MEMORY_END_PUSH
);
1253 len
+= (IS_ENCLOSE_RECURSION(node
)
1254 ? SIZE_OP_MEMORY_END_REC
: SIZE_OP_MEMORY_END
);
1256 else if (IS_ENCLOSE_RECURSION(node
)) {
1257 len
= SIZE_OP_MEMORY_START_PUSH
;
1258 len
+= tlen
+ (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
)
1259 ? SIZE_OP_MEMORY_END_PUSH_REC
: SIZE_OP_MEMORY_END_REC
);
1264 if (BIT_STATUS_AT(reg
->bt_mem_start
, node
->regnum
))
1265 len
= SIZE_OP_MEMORY_START_PUSH
;
1267 len
= SIZE_OP_MEMORY_START
;
1269 len
+= tlen
+ (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
)
1270 ? SIZE_OP_MEMORY_END_PUSH
: SIZE_OP_MEMORY_END
);
1274 case ENCLOSE_STOP_BACKTRACK
:
1275 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node
)) {
1276 QtfrNode
* qn
= NQTFR(node
->target
);
1277 tlen
= compile_length_tree(qn
->target
, reg
);
1278 if (tlen
< 0) return tlen
;
1280 len
= tlen
* qn
->lower
1281 + SIZE_OP_PUSH
+ tlen
+ SIZE_OP_POP
+ SIZE_OP_JUMP
;
1284 len
= SIZE_OP_PUSH_STOP_BT
+ tlen
+ SIZE_OP_POP_STOP_BT
;
1288 case ENCLOSE_CONDITION
:
1289 len
= SIZE_OP_CONDITION
;
1290 if (NTYPE(node
->target
) == NT_ALT
) {
1291 Node
* x
= node
->target
;
1293 tlen
= compile_length_tree(NCAR(x
), reg
); /* yes-node */
1294 if (tlen
< 0) return tlen
;
1295 len
+= tlen
+ SIZE_OP_JUMP
;
1296 if (NCDR(x
) == NULL
) return ONIGERR_PARSER_BUG
;
1298 tlen
= compile_length_tree(NCAR(x
), reg
); /* no-node */
1299 if (tlen
< 0) return tlen
;
1301 if (NCDR(x
) != NULL
) return ONIGERR_INVALID_CONDITION_PATTERN
;
1304 return ONIGERR_PARSER_BUG
;
1308 case ENCLOSE_ABSENT
:
1309 len
= SIZE_OP_PUSH_ABSENT_POS
+ SIZE_OP_ABSENT
+ tlen
+ SIZE_OP_ABSENT_END
;
1313 return ONIGERR_TYPE_BUG
;
1320 static int get_char_length_tree(Node
* node
, regex_t
* reg
, int* len
);
1323 compile_enclose_node(EncloseNode
* node
, regex_t
* reg
)
1327 if (node
->type
== ENCLOSE_OPTION
)
1328 return compile_option_node(node
, reg
);
1330 switch (node
->type
) {
1331 case ENCLOSE_MEMORY
:
1332 #ifdef USE_SUBEXP_CALL
1333 if (IS_ENCLOSE_CALLED(node
)) {
1334 r
= add_opcode(reg
, OP_CALL
);
1336 node
->call_addr
= BBUF_GET_OFFSET_POS(reg
) + SIZE_ABSADDR
+ SIZE_OP_JUMP
;
1337 node
->state
|= NST_ADDR_FIXED
;
1338 r
= add_abs_addr(reg
, (int )node
->call_addr
);
1340 len
= compile_length_tree(node
->target
, reg
);
1341 len
+= (SIZE_OP_MEMORY_START_PUSH
+ SIZE_OP_RETURN
);
1342 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1343 len
+= (IS_ENCLOSE_RECURSION(node
)
1344 ? SIZE_OP_MEMORY_END_PUSH_REC
: SIZE_OP_MEMORY_END_PUSH
);
1346 len
+= (IS_ENCLOSE_RECURSION(node
)
1347 ? SIZE_OP_MEMORY_END_REC
: SIZE_OP_MEMORY_END
);
1349 r
= add_opcode_rel_addr(reg
, OP_JUMP
, len
);
1353 if (BIT_STATUS_AT(reg
->bt_mem_start
, node
->regnum
))
1354 r
= add_opcode(reg
, OP_MEMORY_START_PUSH
);
1356 r
= add_opcode(reg
, OP_MEMORY_START
);
1358 r
= add_mem_num(reg
, node
->regnum
);
1360 r
= compile_tree(node
->target
, reg
);
1362 #ifdef USE_SUBEXP_CALL
1363 if (IS_ENCLOSE_CALLED(node
)) {
1364 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1365 r
= add_opcode(reg
, (IS_ENCLOSE_RECURSION(node
)
1366 ? OP_MEMORY_END_PUSH_REC
: OP_MEMORY_END_PUSH
));
1368 r
= add_opcode(reg
, (IS_ENCLOSE_RECURSION(node
)
1369 ? OP_MEMORY_END_REC
: OP_MEMORY_END
));
1372 r
= add_mem_num(reg
, node
->regnum
);
1374 r
= add_opcode(reg
, OP_RETURN
);
1376 else if (IS_ENCLOSE_RECURSION(node
)) {
1377 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1378 r
= add_opcode(reg
, OP_MEMORY_END_PUSH_REC
);
1380 r
= add_opcode(reg
, OP_MEMORY_END_REC
);
1382 r
= add_mem_num(reg
, node
->regnum
);
1387 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1388 r
= add_opcode(reg
, OP_MEMORY_END_PUSH
);
1390 r
= add_opcode(reg
, OP_MEMORY_END
);
1392 r
= add_mem_num(reg
, node
->regnum
);
1396 case ENCLOSE_STOP_BACKTRACK
:
1397 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node
)) {
1398 QtfrNode
* qn
= NQTFR(node
->target
);
1399 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
1402 len
= compile_length_tree(qn
->target
, reg
);
1403 if (len
< 0) return len
;
1405 r
= add_opcode_rel_addr(reg
, OP_PUSH
, len
+ SIZE_OP_POP
+ SIZE_OP_JUMP
);
1407 r
= compile_tree(qn
->target
, reg
);
1409 r
= add_opcode(reg
, OP_POP
);
1411 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
1412 -((int )SIZE_OP_PUSH
+ len
+ (int )SIZE_OP_POP
+ (int )SIZE_OP_JUMP
));
1415 r
= add_opcode(reg
, OP_PUSH_STOP_BT
);
1417 r
= compile_tree(node
->target
, reg
);
1419 r
= add_opcode(reg
, OP_POP_STOP_BT
);
1423 case ENCLOSE_CONDITION
:
1424 r
= add_opcode(reg
, OP_CONDITION
);
1426 r
= add_mem_num(reg
, node
->regnum
);
1429 if (NTYPE(node
->target
) == NT_ALT
) {
1430 Node
* x
= node
->target
;
1433 len
= compile_length_tree(NCAR(x
), reg
); /* yes-node */
1434 if (len
< 0) return len
;
1435 if (NCDR(x
) == NULL
) return ONIGERR_PARSER_BUG
;
1437 len2
= compile_length_tree(NCAR(x
), reg
); /* no-node */
1438 if (len2
< 0) return len2
;
1439 if (NCDR(x
) != NULL
) return ONIGERR_INVALID_CONDITION_PATTERN
;
1442 r
= add_rel_addr(reg
, len
+ SIZE_OP_JUMP
);
1444 r
= compile_tree(NCAR(x
), reg
); /* yes-node */
1446 r
= add_opcode_rel_addr(reg
, OP_JUMP
, len2
);
1449 r
= compile_tree(NCAR(x
), reg
); /* no-node */
1452 return ONIGERR_PARSER_BUG
;
1456 case ENCLOSE_ABSENT
:
1457 len
= compile_length_tree(node
->target
, reg
);
1458 if (len
< 0) return len
;
1460 r
= add_opcode(reg
, OP_PUSH_ABSENT_POS
);
1462 r
= add_opcode_rel_addr(reg
, OP_ABSENT
, len
+ SIZE_OP_ABSENT_END
);
1464 r
= compile_tree(node
->target
, reg
);
1466 r
= add_opcode(reg
, OP_ABSENT_END
);
1470 return ONIGERR_TYPE_BUG
;
1478 compile_length_anchor_node(AnchorNode
* node
, regex_t
* reg
)
1484 tlen
= compile_length_tree(node
->target
, reg
);
1485 if (tlen
< 0) return tlen
;
1488 switch (node
->type
) {
1489 case ANCHOR_PREC_READ
:
1490 len
= SIZE_OP_PUSH_POS
+ tlen
+ SIZE_OP_POP_POS
;
1492 case ANCHOR_PREC_READ_NOT
:
1493 len
= SIZE_OP_PUSH_POS_NOT
+ tlen
+ SIZE_OP_FAIL_POS
;
1495 case ANCHOR_LOOK_BEHIND
:
1496 len
= SIZE_OP_LOOK_BEHIND
+ tlen
;
1498 case ANCHOR_LOOK_BEHIND_NOT
:
1499 len
= SIZE_OP_PUSH_LOOK_BEHIND_NOT
+ tlen
+ SIZE_OP_FAIL_LOOK_BEHIND_NOT
;
1511 compile_anchor_node(AnchorNode
* node
, regex_t
* reg
)
1515 switch (node
->type
) {
1516 case ANCHOR_BEGIN_BUF
: r
= add_opcode(reg
, OP_BEGIN_BUF
); break;
1517 case ANCHOR_END_BUF
: r
= add_opcode(reg
, OP_END_BUF
); break;
1518 case ANCHOR_BEGIN_LINE
: r
= add_opcode(reg
, OP_BEGIN_LINE
); break;
1519 case ANCHOR_END_LINE
: r
= add_opcode(reg
, OP_END_LINE
); break;
1520 case ANCHOR_SEMI_END_BUF
: r
= add_opcode(reg
, OP_SEMI_END_BUF
); break;
1521 case ANCHOR_BEGIN_POSITION
: r
= add_opcode(reg
, OP_BEGIN_POSITION
); break;
1523 case ANCHOR_WORD_BOUND
:
1524 if (node
->ascii_range
) r
= add_opcode(reg
, OP_ASCII_WORD_BOUND
);
1525 else r
= add_opcode(reg
, OP_WORD_BOUND
);
1527 case ANCHOR_NOT_WORD_BOUND
:
1528 if (node
->ascii_range
) r
= add_opcode(reg
, OP_NOT_ASCII_WORD_BOUND
);
1529 else r
= add_opcode(reg
, OP_NOT_WORD_BOUND
);
1531 #ifdef USE_WORD_BEGIN_END
1532 case ANCHOR_WORD_BEGIN
:
1533 if (node
->ascii_range
) r
= add_opcode(reg
, OP_ASCII_WORD_BEGIN
);
1534 else r
= add_opcode(reg
, OP_WORD_BEGIN
);
1536 case ANCHOR_WORD_END
:
1537 if (node
->ascii_range
) r
= add_opcode(reg
, OP_ASCII_WORD_END
);
1538 else r
= add_opcode(reg
, OP_WORD_END
);
1541 case ANCHOR_KEEP
: r
= add_opcode(reg
, OP_KEEP
); break;
1543 case ANCHOR_PREC_READ
:
1544 r
= add_opcode(reg
, OP_PUSH_POS
);
1546 r
= compile_tree(node
->target
, reg
);
1548 r
= add_opcode(reg
, OP_POP_POS
);
1551 case ANCHOR_PREC_READ_NOT
:
1552 len
= compile_length_tree(node
->target
, reg
);
1553 if (len
< 0) return len
;
1554 r
= add_opcode_rel_addr(reg
, OP_PUSH_POS_NOT
, len
+ SIZE_OP_FAIL_POS
);
1556 r
= compile_tree(node
->target
, reg
);
1558 r
= add_opcode(reg
, OP_FAIL_POS
);
1561 case ANCHOR_LOOK_BEHIND
:
1564 r
= add_opcode(reg
, OP_LOOK_BEHIND
);
1566 if (node
->char_len
< 0) {
1567 r
= get_char_length_tree(node
->target
, reg
, &n
);
1568 if (r
) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
1572 r
= add_length(reg
, n
);
1574 r
= compile_tree(node
->target
, reg
);
1578 case ANCHOR_LOOK_BEHIND_NOT
:
1581 len
= compile_length_tree(node
->target
, reg
);
1582 r
= add_opcode_rel_addr(reg
, OP_PUSH_LOOK_BEHIND_NOT
,
1583 len
+ SIZE_OP_FAIL_LOOK_BEHIND_NOT
);
1585 if (node
->char_len
< 0) {
1586 r
= get_char_length_tree(node
->target
, reg
, &n
);
1587 if (r
) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
1591 r
= add_length(reg
, n
);
1593 r
= compile_tree(node
->target
, reg
);
1595 r
= add_opcode(reg
, OP_FAIL_LOOK_BEHIND_NOT
);
1600 return ONIGERR_TYPE_BUG
;
1608 compile_length_tree(Node
* node
, regex_t
* reg
)
1617 r
= compile_length_tree(NCAR(node
), reg
);
1618 if (r
< 0) return r
;
1620 } while (IS_NOT_NULL(node
= NCDR(node
)));
1629 r
= compile_length_tree(NCAR(node
), reg
);
1630 if (r
< 0) return r
;
1633 } while (IS_NOT_NULL(node
= NCDR(node
)));
1635 r
+= (SIZE_OP_PUSH
+ SIZE_OP_JUMP
) * (n
- 1);
1640 if (NSTRING_IS_RAW(node
))
1641 r
= compile_length_string_raw_node(NSTR(node
), reg
);
1643 r
= compile_length_string_node(node
, reg
);
1647 r
= compile_length_cclass_node(NCCLASS(node
), reg
);
1657 BRefNode
* br
= NBREF(node
);
1659 #ifdef USE_BACKREF_WITH_LEVEL
1660 if (IS_BACKREF_NEST_LEVEL(br
)) {
1661 r
= SIZE_OPCODE
+ SIZE_OPTION
+ SIZE_LENGTH
+
1662 SIZE_LENGTH
+ (SIZE_MEMNUM
* br
->back_num
);
1666 if (br
->back_num
== 1) {
1667 r
= ((!IS_IGNORECASE(reg
->options
) && br
->back_static
[0] <= 2)
1668 ? SIZE_OPCODE
: (SIZE_OPCODE
+ SIZE_MEMNUM
));
1671 r
= SIZE_OPCODE
+ SIZE_LENGTH
+ (SIZE_MEMNUM
* br
->back_num
);
1676 #ifdef USE_SUBEXP_CALL
1683 r
= compile_length_quantifier_node(NQTFR(node
), reg
);
1687 r
= compile_length_enclose_node(NENCLOSE(node
), reg
);
1691 r
= compile_length_anchor_node(NANCHOR(node
), reg
);
1695 return ONIGERR_TYPE_BUG
;
1703 compile_tree(Node
* node
, regex_t
* reg
)
1705 int n
, type
, len
, pos
, r
= 0;
1711 r
= compile_tree(NCAR(node
), reg
);
1712 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
1720 len
+= compile_length_tree(NCAR(x
), reg
);
1721 if (NCDR(x
) != NULL
) {
1722 len
+= SIZE_OP_PUSH
+ SIZE_OP_JUMP
;
1724 } while (IS_NOT_NULL(x
= NCDR(x
)));
1725 pos
= reg
->used
+ len
; /* goal position */
1728 len
= compile_length_tree(NCAR(node
), reg
);
1729 if (IS_NOT_NULL(NCDR(node
))) {
1730 r
= add_opcode_rel_addr(reg
, OP_PUSH
, len
+ SIZE_OP_JUMP
);
1733 r
= compile_tree(NCAR(node
), reg
);
1735 if (IS_NOT_NULL(NCDR(node
))) {
1736 len
= pos
- (reg
->used
+ SIZE_OP_JUMP
);
1737 r
= add_opcode_rel_addr(reg
, OP_JUMP
, len
);
1740 } while (IS_NOT_NULL(node
= NCDR(node
)));
1745 if (NSTRING_IS_RAW(node
))
1746 r
= compile_string_raw_node(NSTR(node
), reg
);
1748 r
= compile_string_node(node
, reg
);
1752 r
= compile_cclass_node(NCCLASS(node
), reg
);
1759 switch (NCTYPE(node
)->ctype
) {
1760 case ONIGENC_CTYPE_WORD
:
1761 if (NCTYPE(node
)->ascii_range
!= 0) {
1762 if (NCTYPE(node
)->not != 0) op
= OP_NOT_ASCII_WORD
;
1763 else op
= OP_ASCII_WORD
;
1766 if (NCTYPE(node
)->not != 0) op
= OP_NOT_WORD
;
1771 return ONIGERR_TYPE_BUG
;
1774 r
= add_opcode(reg
, op
);
1779 if (IS_MULTILINE(reg
->options
))
1780 r
= add_opcode(reg
, OP_ANYCHAR_ML
);
1782 r
= add_opcode(reg
, OP_ANYCHAR
);
1787 BRefNode
* br
= NBREF(node
);
1789 #ifdef USE_BACKREF_WITH_LEVEL
1790 if (IS_BACKREF_NEST_LEVEL(br
)) {
1791 r
= add_opcode(reg
, OP_BACKREF_WITH_LEVEL
);
1793 r
= add_option(reg
, (reg
->options
& ONIG_OPTION_IGNORECASE
));
1795 r
= add_length(reg
, br
->nest_level
);
1798 goto add_bacref_mems
;
1802 if (br
->back_num
== 1) {
1803 n
= br
->back_static
[0];
1804 if (IS_IGNORECASE(reg
->options
)) {
1805 r
= add_opcode(reg
, OP_BACKREFN_IC
);
1807 r
= add_mem_num(reg
, n
);
1811 case 1: r
= add_opcode(reg
, OP_BACKREF1
); break;
1812 case 2: r
= add_opcode(reg
, OP_BACKREF2
); break;
1814 r
= add_opcode(reg
, OP_BACKREFN
);
1816 r
= add_mem_num(reg
, n
);
1825 if (IS_IGNORECASE(reg
->options
)) {
1826 r
= add_opcode(reg
, OP_BACKREF_MULTI_IC
);
1829 r
= add_opcode(reg
, OP_BACKREF_MULTI
);
1833 #ifdef USE_BACKREF_WITH_LEVEL
1836 r
= add_length(reg
, br
->back_num
);
1839 for (i
= br
->back_num
- 1; i
>= 0; i
--) {
1840 r
= add_mem_num(reg
, p
[i
]);
1847 #ifdef USE_SUBEXP_CALL
1849 r
= compile_call(NCALL(node
), reg
);
1854 r
= compile_quantifier_node(NQTFR(node
), reg
);
1858 r
= compile_enclose_node(NENCLOSE(node
), reg
);
1862 r
= compile_anchor_node(NANCHOR(node
), reg
);
1867 fprintf(stderr
, "compile_tree: undefined node type %d\n", NTYPE(node
));
1875 #ifdef USE_NAMED_GROUP
1878 noname_disable_map(Node
** plink
, GroupNumRemap
* map
, int* counter
)
1881 Node
* node
= *plink
;
1883 switch (NTYPE(node
)) {
1887 r
= noname_disable_map(&(NCAR(node
)), map
, counter
);
1888 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
1893 Node
** ptarget
= &(NQTFR(node
)->target
);
1894 Node
* old
= *ptarget
;
1895 r
= noname_disable_map(ptarget
, map
, counter
);
1896 if (*ptarget
!= old
&& NTYPE(*ptarget
) == NT_QTFR
) {
1897 onig_reduce_nested_quantifier(node
, *ptarget
);
1904 EncloseNode
* en
= NENCLOSE(node
);
1905 if (en
->type
== ENCLOSE_MEMORY
) {
1906 if (IS_ENCLOSE_NAMED_GROUP(en
)) {
1908 map
[en
->regnum
].new_val
= *counter
;
1909 en
->regnum
= *counter
;
1911 else if (en
->regnum
!= 0) {
1912 *plink
= en
->target
;
1913 en
->target
= NULL_NODE
;
1914 onig_node_free(node
);
1915 r
= noname_disable_map(plink
, map
, counter
);
1919 r
= noname_disable_map(&(en
->target
), map
, counter
);
1924 if (NANCHOR(node
)->target
)
1925 r
= noname_disable_map(&(NANCHOR(node
)->target
), map
, counter
);
1936 renumber_node_backref(Node
* node
, GroupNumRemap
* map
, const int num_mem
)
1938 int i
, pos
, n
, old_num
;
1940 BRefNode
* bn
= NBREF(node
);
1942 if (! IS_BACKREF_NAME_REF(bn
))
1943 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
;
1945 old_num
= bn
->back_num
;
1946 if (IS_NULL(bn
->back_dynamic
))
1947 backs
= bn
->back_static
;
1949 backs
= bn
->back_dynamic
;
1951 for (i
= 0, pos
= 0; i
< old_num
; i
++) {
1952 if (backs
[i
] > num_mem
) return ONIGERR_INVALID_BACKREF
;
1953 n
= map
[backs
[i
]].new_val
;
1965 renumber_by_map(Node
* node
, GroupNumRemap
* map
, const int num_mem
)
1969 switch (NTYPE(node
)) {
1973 r
= renumber_by_map(NCAR(node
), map
, num_mem
);
1974 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
1977 r
= renumber_by_map(NQTFR(node
)->target
, map
, num_mem
);
1981 EncloseNode
* en
= NENCLOSE(node
);
1982 if (en
->type
== ENCLOSE_CONDITION
) {
1983 if (en
->regnum
> num_mem
) return ONIGERR_INVALID_BACKREF
;
1984 en
->regnum
= map
[en
->regnum
].new_val
;
1986 r
= renumber_by_map(en
->target
, map
, num_mem
);
1991 r
= renumber_node_backref(node
, map
, num_mem
);
1995 if (NANCHOR(node
)->target
)
1996 r
= renumber_by_map(NANCHOR(node
)->target
, map
, num_mem
);
2007 numbered_ref_check(Node
* node
)
2011 switch (NTYPE(node
)) {
2015 r
= numbered_ref_check(NCAR(node
));
2016 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2019 r
= numbered_ref_check(NQTFR(node
)->target
);
2022 r
= numbered_ref_check(NENCLOSE(node
)->target
);
2026 if (! IS_BACKREF_NAME_REF(NBREF(node
)))
2027 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
;
2031 if (NANCHOR(node
)->target
)
2032 r
= numbered_ref_check(NANCHOR(node
)->target
);
2043 disable_noname_group_capture(Node
** root
, regex_t
* reg
, ScanEnv
* env
)
2045 int r
, i
, pos
, counter
;
2049 map
= (GroupNumRemap
* )xalloca(sizeof(GroupNumRemap
) * (env
->num_mem
+ 1));
2050 CHECK_NULL_RETURN_MEMERR(map
);
2051 for (i
= 1; i
<= env
->num_mem
; i
++) {
2055 r
= noname_disable_map(root
, map
, &counter
);
2056 if (r
!= 0) return r
;
2058 r
= renumber_by_map(*root
, map
, env
->num_mem
);
2059 if (r
!= 0) return r
;
2061 for (i
= 1, pos
= 1; i
<= env
->num_mem
; i
++) {
2062 if (map
[i
].new_val
> 0) {
2063 SCANENV_MEM_NODES(env
)[pos
] = SCANENV_MEM_NODES(env
)[i
];
2068 loc
= env
->capture_history
;
2069 BIT_STATUS_CLEAR(env
->capture_history
);
2070 for (i
= 1; i
<= ONIG_MAX_CAPTURE_HISTORY_GROUP
; i
++) {
2071 if (BIT_STATUS_AT(loc
, i
)) {
2072 BIT_STATUS_ON_AT_SIMPLE(env
->capture_history
, map
[i
].new_val
);
2076 env
->num_mem
= env
->num_named
;
2077 reg
->num_mem
= env
->num_named
;
2079 return onig_renumber_name_table(reg
, map
);
2081 #endif /* USE_NAMED_GROUP */
2083 #ifdef USE_SUBEXP_CALL
2085 unset_addr_list_fix(UnsetAddrList
* uslist
, regex_t
* reg
)
2091 for (i
= 0; i
< uslist
->num
; i
++) {
2092 en
= NENCLOSE(uslist
->us
[i
].target
);
2093 if (! IS_ENCLOSE_ADDR_FIXED(en
)) return ONIGERR_PARSER_BUG
;
2094 addr
= en
->call_addr
;
2095 offset
= uslist
->us
[i
].offset
;
2097 BBUF_WRITE(reg
, offset
, &addr
, SIZE_ABSADDR
);
2103 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2105 quantifiers_memory_node_info(Node
* node
)
2109 switch (NTYPE(node
)) {
2115 v
= quantifiers_memory_node_info(NCAR(node
));
2117 } while (v
>= 0 && IS_NOT_NULL(node
= NCDR(node
)));
2121 # ifdef USE_SUBEXP_CALL
2123 if (IS_CALL_RECURSION(NCALL(node
))) {
2124 return NQ_TARGET_IS_EMPTY_REC
; /* tiny version */
2127 r
= quantifiers_memory_node_info(NCALL(node
)->target
);
2133 QtfrNode
* qn
= NQTFR(node
);
2134 if (qn
->upper
!= 0) {
2135 r
= quantifiers_memory_node_info(qn
->target
);
2142 EncloseNode
* en
= NENCLOSE(node
);
2144 case ENCLOSE_MEMORY
:
2145 return NQ_TARGET_IS_EMPTY_MEM
;
2148 case ENCLOSE_OPTION
:
2149 case ENCLOSE_STOP_BACKTRACK
:
2150 case ENCLOSE_CONDITION
:
2151 case ENCLOSE_ABSENT
:
2152 r
= quantifiers_memory_node_info(en
->target
);
2172 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2175 get_min_match_length(Node
* node
, OnigDistance
*min
, ScanEnv
* env
)
2181 switch (NTYPE(node
)) {
2186 Node
** nodes
= SCANENV_MEM_NODES(env
);
2187 BRefNode
* br
= NBREF(node
);
2188 if (br
->state
& NST_RECURSION
) break;
2190 backs
= BACKREFS_P(br
);
2191 if (backs
[0] > env
->num_mem
) return ONIGERR_INVALID_BACKREF
;
2192 r
= get_min_match_length(nodes
[backs
[0]], min
, env
);
2194 for (i
= 1; i
< br
->back_num
; i
++) {
2195 if (backs
[i
] > env
->num_mem
) return ONIGERR_INVALID_BACKREF
;
2196 r
= get_min_match_length(nodes
[backs
[i
]], &tmin
, env
);
2198 if (*min
> tmin
) *min
= tmin
;
2203 #ifdef USE_SUBEXP_CALL
2205 if (IS_CALL_RECURSION(NCALL(node
))) {
2206 EncloseNode
* en
= NENCLOSE(NCALL(node
)->target
);
2207 if (IS_ENCLOSE_MIN_FIXED(en
))
2211 r
= get_min_match_length(NCALL(node
)->target
, min
, env
);
2217 r
= get_min_match_length(NCAR(node
), &tmin
, env
);
2218 if (r
== 0) *min
+= tmin
;
2219 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2228 r
= get_min_match_length(x
, &tmin
, env
);
2230 if (y
== node
) *min
= tmin
;
2231 else if (*min
> tmin
) *min
= tmin
;
2232 } while (r
== 0 && IS_NOT_NULL(y
= NCDR(y
)));
2238 StrNode
* sn
= NSTR(node
);
2239 *min
= sn
->end
- sn
->s
;
2254 QtfrNode
* qn
= NQTFR(node
);
2256 if (qn
->lower
> 0) {
2257 r
= get_min_match_length(qn
->target
, min
, env
);
2259 *min
= distance_multiply(*min
, qn
->lower
);
2266 EncloseNode
* en
= NENCLOSE(node
);
2268 case ENCLOSE_MEMORY
:
2269 if (IS_ENCLOSE_MIN_FIXED(en
))
2272 if (IS_ENCLOSE_MARK1(NENCLOSE(node
)))
2273 *min
= 0; /* recursive */
2275 SET_ENCLOSE_STATUS(node
, NST_MARK1
);
2276 r
= get_min_match_length(en
->target
, min
, env
);
2277 CLEAR_ENCLOSE_STATUS(node
, NST_MARK1
);
2280 SET_ENCLOSE_STATUS(node
, NST_MIN_FIXED
);
2286 case ENCLOSE_OPTION
:
2287 case ENCLOSE_STOP_BACKTRACK
:
2288 case ENCLOSE_CONDITION
:
2289 r
= get_min_match_length(en
->target
, min
, env
);
2292 case ENCLOSE_ABSENT
:
2307 get_max_match_length(Node
* node
, OnigDistance
*max
, ScanEnv
* env
)
2313 switch (NTYPE(node
)) {
2316 r
= get_max_match_length(NCAR(node
), &tmax
, env
);
2318 *max
= distance_add(*max
, tmax
);
2319 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2324 r
= get_max_match_length(NCAR(node
), &tmax
, env
);
2325 if (r
== 0 && *max
< tmax
) *max
= tmax
;
2326 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2331 StrNode
* sn
= NSTR(node
);
2332 *max
= sn
->end
- sn
->s
;
2337 *max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
2342 *max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
2349 Node
** nodes
= SCANENV_MEM_NODES(env
);
2350 BRefNode
* br
= NBREF(node
);
2351 if (br
->state
& NST_RECURSION
) {
2352 *max
= ONIG_INFINITE_DISTANCE
;
2355 backs
= BACKREFS_P(br
);
2356 for (i
= 0; i
< br
->back_num
; i
++) {
2357 if (backs
[i
] > env
->num_mem
) return ONIGERR_INVALID_BACKREF
;
2358 r
= get_max_match_length(nodes
[backs
[i
]], &tmax
, env
);
2360 if (*max
< tmax
) *max
= tmax
;
2365 #ifdef USE_SUBEXP_CALL
2367 if (! IS_CALL_RECURSION(NCALL(node
)))
2368 r
= get_max_match_length(NCALL(node
)->target
, max
, env
);
2370 *max
= ONIG_INFINITE_DISTANCE
;
2376 QtfrNode
* qn
= NQTFR(node
);
2378 if (qn
->upper
!= 0) {
2379 r
= get_max_match_length(qn
->target
, max
, env
);
2380 if (r
== 0 && *max
!= 0) {
2381 if (! IS_REPEAT_INFINITE(qn
->upper
))
2382 *max
= distance_multiply(*max
, qn
->upper
);
2384 *max
= ONIG_INFINITE_DISTANCE
;
2392 EncloseNode
* en
= NENCLOSE(node
);
2394 case ENCLOSE_MEMORY
:
2395 if (IS_ENCLOSE_MAX_FIXED(en
))
2398 if (IS_ENCLOSE_MARK1(NENCLOSE(node
)))
2399 *max
= ONIG_INFINITE_DISTANCE
;
2401 SET_ENCLOSE_STATUS(node
, NST_MARK1
);
2402 r
= get_max_match_length(en
->target
, max
, env
);
2403 CLEAR_ENCLOSE_STATUS(node
, NST_MARK1
);
2406 SET_ENCLOSE_STATUS(node
, NST_MAX_FIXED
);
2412 case ENCLOSE_OPTION
:
2413 case ENCLOSE_STOP_BACKTRACK
:
2414 case ENCLOSE_CONDITION
:
2415 r
= get_max_match_length(en
->target
, max
, env
);
2418 case ENCLOSE_ABSENT
:
2432 #define GET_CHAR_LEN_VARLEN -1
2433 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2435 /* fixed size pattern node only */
2437 get_char_length_tree1(Node
* node
, regex_t
* reg
, int* len
, int level
)
2444 switch (NTYPE(node
)) {
2447 r
= get_char_length_tree1(NCAR(node
), reg
, &tlen
, level
);
2449 *len
= (int )distance_add(*len
, tlen
);
2450 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2458 r
= get_char_length_tree1(NCAR(node
), reg
, &tlen
, level
);
2459 while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
))) {
2460 r
= get_char_length_tree1(NCAR(node
), reg
, &tlen2
, level
);
2469 r
= GET_CHAR_LEN_TOP_ALT_VARLEN
;
2471 r
= GET_CHAR_LEN_VARLEN
;
2481 StrNode
* sn
= NSTR(node
);
2483 while (s
< sn
->end
) {
2484 s
+= enclen(reg
->enc
, s
, sn
->end
);
2492 QtfrNode
* qn
= NQTFR(node
);
2493 if (qn
->lower
== qn
->upper
) {
2494 r
= get_char_length_tree1(qn
->target
, reg
, &tlen
, level
);
2496 *len
= (int )distance_multiply(tlen
, qn
->lower
);
2499 r
= GET_CHAR_LEN_VARLEN
;
2503 #ifdef USE_SUBEXP_CALL
2505 if (! IS_CALL_RECURSION(NCALL(node
)))
2506 r
= get_char_length_tree1(NCALL(node
)->target
, reg
, len
, level
);
2508 r
= GET_CHAR_LEN_VARLEN
;
2523 EncloseNode
* en
= NENCLOSE(node
);
2525 case ENCLOSE_MEMORY
:
2526 #ifdef USE_SUBEXP_CALL
2527 if (IS_ENCLOSE_CLEN_FIXED(en
))
2528 *len
= en
->char_len
;
2530 r
= get_char_length_tree1(en
->target
, reg
, len
, level
);
2532 en
->char_len
= *len
;
2533 SET_ENCLOSE_STATUS(node
, NST_CLEN_FIXED
);
2538 case ENCLOSE_OPTION
:
2539 case ENCLOSE_STOP_BACKTRACK
:
2540 case ENCLOSE_CONDITION
:
2541 r
= get_char_length_tree1(en
->target
, reg
, len
, level
);
2543 case ENCLOSE_ABSENT
:
2554 r
= GET_CHAR_LEN_VARLEN
;
2562 get_char_length_tree(Node
* node
, regex_t
* reg
, int* len
)
2564 return get_char_length_tree1(node
, reg
, len
, 0);
2567 /* x is not included y ==> 1 : 0 */
2569 is_not_included(Node
* x
, Node
* y
, regex_t
* reg
)
2584 if (NCTYPE(y
)->ctype
== NCTYPE(x
)->ctype
&&
2585 NCTYPE(y
)->not != NCTYPE(x
)->not &&
2586 NCTYPE(y
)->ascii_range
== NCTYPE(x
)->ascii_range
)
2596 tmp
= x
; x
= y
; y
= tmp
;
2613 CClassNode
* xc
= NCCLASS(x
);
2616 switch (NCTYPE(y
)->ctype
) {
2617 case ONIGENC_CTYPE_WORD
:
2618 if (NCTYPE(y
)->not == 0) {
2619 if (IS_NULL(xc
->mbuf
) && !IS_NCCLASS_NOT(xc
)) {
2620 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
2621 if (BITSET_AT(xc
->bs
, i
)) {
2622 if (NCTYPE(y
)->ascii_range
) {
2623 if (IS_CODE_SB_WORD(reg
->enc
, i
)) return 0;
2626 if (ONIGENC_IS_CODE_WORD(reg
->enc
, i
)) return 0;
2635 if (IS_NOT_NULL(xc
->mbuf
)) return 0;
2636 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
2638 if (NCTYPE(y
)->ascii_range
)
2639 is_word
= IS_CODE_SB_WORD(reg
->enc
, i
);
2641 is_word
= ONIGENC_IS_CODE_WORD(reg
->enc
, i
);
2643 if (!IS_NCCLASS_NOT(xc
)) {
2644 if (BITSET_AT(xc
->bs
, i
))
2648 if (! BITSET_AT(xc
->bs
, i
))
2665 CClassNode
* yc
= NCCLASS(y
);
2667 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
2668 v
= BITSET_AT(xc
->bs
, i
);
2669 if ((v
!= 0 && !IS_NCCLASS_NOT(xc
)) ||
2670 (v
== 0 && IS_NCCLASS_NOT(xc
))) {
2671 v
= BITSET_AT(yc
->bs
, i
);
2672 if ((v
!= 0 && !IS_NCCLASS_NOT(yc
)) ||
2673 (v
== 0 && IS_NCCLASS_NOT(yc
)))
2677 if ((IS_NULL(xc
->mbuf
) && !IS_NCCLASS_NOT(xc
)) ||
2678 (IS_NULL(yc
->mbuf
) && !IS_NCCLASS_NOT(yc
)))
2696 StrNode
* xs
= NSTR(x
);
2697 if (NSTRING_LEN(x
) == 0)
2702 switch (NCTYPE(y
)->ctype
) {
2703 case ONIGENC_CTYPE_WORD
:
2704 if (NCTYPE(y
)->ascii_range
) {
2705 if (ONIGENC_IS_MBC_ASCII_WORD(reg
->enc
, xs
->s
, xs
->end
))
2706 return NCTYPE(y
)->not;
2708 return !(NCTYPE(y
)->not);
2711 if (ONIGENC_IS_MBC_WORD(reg
->enc
, xs
->s
, xs
->end
))
2712 return NCTYPE(y
)->not;
2714 return !(NCTYPE(y
)->not);
2724 CClassNode
* cc
= NCCLASS(y
);
2726 code
= ONIGENC_MBC_TO_CODE(reg
->enc
, xs
->s
,
2727 xs
->s
+ ONIGENC_MBC_MAXLEN(reg
->enc
));
2728 return (onig_is_code_in_cc(reg
->enc
, code
, cc
) != 0 ? 0 : 1);
2735 StrNode
* ys
= NSTR(y
);
2736 len
= NSTRING_LEN(x
);
2737 if (len
> NSTRING_LEN(y
)) len
= NSTRING_LEN(y
);
2738 if (NSTRING_IS_AMBIG(x
) || NSTRING_IS_AMBIG(y
)) {
2743 for (i
= 0, p
= ys
->s
, q
= xs
->s
; (OnigDistance
)i
< len
; i
++, p
++, q
++) {
2744 if (*p
!= *q
) return 1;
2764 get_head_value_node(Node
* node
, int exact
, regex_t
* reg
)
2766 Node
* n
= NULL_NODE
;
2768 switch (NTYPE(node
)) {
2772 #ifdef USE_SUBEXP_CALL
2785 n
= get_head_value_node(NCAR(node
), exact
, reg
);
2790 StrNode
* sn
= NSTR(node
);
2792 if (sn
->end
<= sn
->s
)
2796 !NSTRING_IS_RAW(node
) && IS_IGNORECASE(reg
->options
)) {
2806 QtfrNode
* qn
= NQTFR(node
);
2807 if (qn
->lower
> 0) {
2808 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
2809 if (IS_NOT_NULL(qn
->head_exact
))
2813 n
= get_head_value_node(qn
->target
, exact
, reg
);
2820 EncloseNode
* en
= NENCLOSE(node
);
2822 case ENCLOSE_OPTION
:
2824 OnigOptionType options
= reg
->options
;
2826 reg
->options
= NENCLOSE(node
)->option
;
2827 n
= get_head_value_node(NENCLOSE(node
)->target
, exact
, reg
);
2828 reg
->options
= options
;
2832 case ENCLOSE_MEMORY
:
2833 case ENCLOSE_STOP_BACKTRACK
:
2834 case ENCLOSE_CONDITION
:
2835 n
= get_head_value_node(en
->target
, exact
, reg
);
2838 case ENCLOSE_ABSENT
:
2845 if (NANCHOR(node
)->type
== ANCHOR_PREC_READ
)
2846 n
= get_head_value_node(NANCHOR(node
)->target
, exact
, reg
);
2857 check_type_tree(Node
* node
, int type_mask
, int enclose_mask
, int anchor_mask
)
2862 if ((NTYPE2BIT(type
) & type_mask
) == 0)
2869 r
= check_type_tree(NCAR(node
), type_mask
, enclose_mask
,
2871 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2875 r
= check_type_tree(NQTFR(node
)->target
, type_mask
, enclose_mask
,
2881 EncloseNode
* en
= NENCLOSE(node
);
2882 if ((en
->type
& enclose_mask
) == 0)
2885 r
= check_type_tree(en
->target
, type_mask
, enclose_mask
, anchor_mask
);
2890 type
= NANCHOR(node
)->type
;
2891 if ((type
& anchor_mask
) == 0)
2894 if (NANCHOR(node
)->target
)
2895 r
= check_type_tree(NANCHOR(node
)->target
,
2896 type_mask
, enclose_mask
, anchor_mask
);
2905 #ifdef USE_SUBEXP_CALL
2907 # define RECURSION_EXIST 1
2908 # define RECURSION_INFINITE 2
2911 subexp_inf_recursive_check(Node
* node
, ScanEnv
* env
, int head
)
2926 ret
= subexp_inf_recursive_check(NCAR(x
), env
, head
);
2927 if (ret
< 0 || ret
== RECURSION_INFINITE
) return ret
;
2930 ret
= get_min_match_length(NCAR(x
), &min
, env
);
2931 if (ret
!= 0) return ret
;
2932 if (min
!= 0) head
= 0;
2934 } while (IS_NOT_NULL(x
= NCDR(x
)));
2941 r
= RECURSION_EXIST
;
2943 ret
= subexp_inf_recursive_check(NCAR(node
), env
, head
);
2944 if (ret
< 0 || ret
== RECURSION_INFINITE
) return ret
;
2946 } while (IS_NOT_NULL(node
= NCDR(node
)));
2951 r
= subexp_inf_recursive_check(NQTFR(node
)->target
, env
, head
);
2952 if (r
== RECURSION_EXIST
) {
2953 if (NQTFR(node
)->lower
== 0) r
= 0;
2959 AnchorNode
* an
= NANCHOR(node
);
2961 case ANCHOR_PREC_READ
:
2962 case ANCHOR_PREC_READ_NOT
:
2963 case ANCHOR_LOOK_BEHIND
:
2964 case ANCHOR_LOOK_BEHIND_NOT
:
2965 r
= subexp_inf_recursive_check(an
->target
, env
, head
);
2972 r
= subexp_inf_recursive_check(NCALL(node
)->target
, env
, head
);
2976 if (IS_ENCLOSE_MARK2(NENCLOSE(node
)))
2978 else if (IS_ENCLOSE_MARK1(NENCLOSE(node
)))
2979 return (head
== 0 ? RECURSION_EXIST
: RECURSION_INFINITE
);
2981 SET_ENCLOSE_STATUS(node
, NST_MARK2
);
2982 r
= subexp_inf_recursive_check(NENCLOSE(node
)->target
, env
, head
);
2983 CLEAR_ENCLOSE_STATUS(node
, NST_MARK2
);
2995 subexp_inf_recursive_check_trav(Node
* node
, ScanEnv
* env
)
3005 r
= subexp_inf_recursive_check_trav(NCAR(node
), env
);
3006 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
3010 r
= subexp_inf_recursive_check_trav(NQTFR(node
)->target
, env
);
3015 AnchorNode
* an
= NANCHOR(node
);
3017 case ANCHOR_PREC_READ
:
3018 case ANCHOR_PREC_READ_NOT
:
3019 case ANCHOR_LOOK_BEHIND
:
3020 case ANCHOR_LOOK_BEHIND_NOT
:
3021 r
= subexp_inf_recursive_check_trav(an
->target
, env
);
3029 EncloseNode
* en
= NENCLOSE(node
);
3031 if (IS_ENCLOSE_RECURSION(en
)) {
3032 SET_ENCLOSE_STATUS(node
, NST_MARK1
);
3033 r
= subexp_inf_recursive_check(en
->target
, env
, 1);
3034 if (r
> 0) return ONIGERR_NEVER_ENDING_RECURSION
;
3035 CLEAR_ENCLOSE_STATUS(node
, NST_MARK1
);
3037 r
= subexp_inf_recursive_check_trav(en
->target
, env
);
3050 subexp_recursive_check(Node
* node
)
3054 switch (NTYPE(node
)) {
3058 r
|= subexp_recursive_check(NCAR(node
));
3059 } while (IS_NOT_NULL(node
= NCDR(node
)));
3063 r
= subexp_recursive_check(NQTFR(node
)->target
);
3068 AnchorNode
* an
= NANCHOR(node
);
3070 case ANCHOR_PREC_READ
:
3071 case ANCHOR_PREC_READ_NOT
:
3072 case ANCHOR_LOOK_BEHIND
:
3073 case ANCHOR_LOOK_BEHIND_NOT
:
3074 r
= subexp_recursive_check(an
->target
);
3081 r
= subexp_recursive_check(NCALL(node
)->target
);
3082 if (r
!= 0) SET_CALL_RECURSION(node
);
3086 if (IS_ENCLOSE_MARK2(NENCLOSE(node
)))
3088 else if (IS_ENCLOSE_MARK1(NENCLOSE(node
)))
3089 return 1; /* recursion */
3091 SET_ENCLOSE_STATUS(node
, NST_MARK2
);
3092 r
= subexp_recursive_check(NENCLOSE(node
)->target
);
3093 CLEAR_ENCLOSE_STATUS(node
, NST_MARK2
);
3106 subexp_recursive_check_trav(Node
* node
, ScanEnv
* env
)
3108 # define FOUND_CALLED_NODE 1
3120 ret
= subexp_recursive_check_trav(NCAR(node
), env
);
3121 if (ret
== FOUND_CALLED_NODE
) r
= FOUND_CALLED_NODE
;
3122 else if (ret
< 0) return ret
;
3123 } while (IS_NOT_NULL(node
= NCDR(node
)));
3128 r
= subexp_recursive_check_trav(NQTFR(node
)->target
, env
);
3129 if (NQTFR(node
)->upper
== 0) {
3130 if (r
== FOUND_CALLED_NODE
)
3131 NQTFR(node
)->is_referred
= 1;
3137 AnchorNode
* an
= NANCHOR(node
);
3139 case ANCHOR_PREC_READ
:
3140 case ANCHOR_PREC_READ_NOT
:
3141 case ANCHOR_LOOK_BEHIND
:
3142 case ANCHOR_LOOK_BEHIND_NOT
:
3143 r
= subexp_recursive_check_trav(an
->target
, env
);
3151 EncloseNode
* en
= NENCLOSE(node
);
3153 if (! IS_ENCLOSE_RECURSION(en
)) {
3154 if (IS_ENCLOSE_CALLED(en
)) {
3155 SET_ENCLOSE_STATUS(node
, NST_MARK1
);
3156 r
= subexp_recursive_check(en
->target
);
3157 if (r
!= 0) SET_ENCLOSE_STATUS(node
, NST_RECURSION
);
3158 CLEAR_ENCLOSE_STATUS(node
, NST_MARK1
);
3161 r
= subexp_recursive_check_trav(en
->target
, env
);
3162 if (IS_ENCLOSE_CALLED(en
))
3163 r
|= FOUND_CALLED_NODE
;
3175 setup_subexp_call(Node
* node
, ScanEnv
* env
)
3184 r
= setup_subexp_call(NCAR(node
), env
);
3185 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
3190 r
= setup_subexp_call(NCAR(node
), env
);
3191 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
3195 r
= setup_subexp_call(NQTFR(node
)->target
, env
);
3198 r
= setup_subexp_call(NENCLOSE(node
)->target
, env
);
3203 CallNode
* cn
= NCALL(node
);
3204 Node
** nodes
= SCANENV_MEM_NODES(env
);
3206 if (cn
->group_num
!= 0) {
3207 int gnum
= cn
->group_num
;
3209 # ifdef USE_NAMED_GROUP
3210 if (env
->num_named
> 0 &&
3211 IS_SYNTAX_BV(env
->syntax
, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
) &&
3212 !ONIG_IS_OPTION_ON(env
->option
, ONIG_OPTION_CAPTURE_GROUP
)) {
3213 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
;
3216 if (gnum
> env
->num_mem
) {
3217 onig_scan_env_set_error_string(env
,
3218 ONIGERR_UNDEFINED_GROUP_REFERENCE
, cn
->name
, cn
->name_end
);
3219 return ONIGERR_UNDEFINED_GROUP_REFERENCE
;
3222 # ifdef USE_NAMED_GROUP
3225 cn
->target
= nodes
[cn
->group_num
];
3226 if (IS_NULL(cn
->target
)) {
3227 onig_scan_env_set_error_string(env
,
3228 ONIGERR_UNDEFINED_NAME_REFERENCE
, cn
->name
, cn
->name_end
);
3229 return ONIGERR_UNDEFINED_NAME_REFERENCE
;
3231 SET_ENCLOSE_STATUS(cn
->target
, NST_CALLED
);
3232 BIT_STATUS_ON_AT(env
->bt_mem_start
, cn
->group_num
);
3233 cn
->unset_addr_list
= env
->unset_addr_list
;
3235 # ifdef USE_NAMED_GROUP
3236 # ifdef USE_PERL_SUBEXP_CALL
3237 else if (cn
->name
== cn
->name_end
) {
3244 int n
= onig_name_to_group_numbers(env
->reg
, cn
->name
, cn
->name_end
,
3247 onig_scan_env_set_error_string(env
,
3248 ONIGERR_UNDEFINED_NAME_REFERENCE
, cn
->name
, cn
->name_end
);
3249 return ONIGERR_UNDEFINED_NAME_REFERENCE
;
3252 ! IS_SYNTAX_BV(env
->syntax
, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL
)) {
3253 onig_scan_env_set_error_string(env
,
3254 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL
, cn
->name
, cn
->name_end
);
3255 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL
;
3258 cn
->group_num
= refs
[0];
3268 AnchorNode
* an
= NANCHOR(node
);
3271 case ANCHOR_PREC_READ
:
3272 case ANCHOR_PREC_READ_NOT
:
3273 case ANCHOR_LOOK_BEHIND
:
3274 case ANCHOR_LOOK_BEHIND_NOT
:
3275 r
= setup_subexp_call(an
->target
, env
);
3289 /* divide different length alternatives in look-behind.
3290 (?<=A|B) ==> (?<=A)|(?<=B)
3291 (?<!A|B) ==> (?<!A)(?<!B)
3294 divide_look_behind_alternatives(Node
* node
)
3296 Node
*head
, *np
, *insert_node
;
3297 AnchorNode
* an
= NANCHOR(node
);
3298 int anc_type
= an
->type
;
3302 swap_node(node
, head
);
3304 NANCHOR(head
)->target
= np
;
3307 while ((np
= NCDR(np
)) != NULL_NODE
) {
3308 insert_node
= onig_node_new_anchor(anc_type
);
3309 CHECK_NULL_RETURN_MEMERR(insert_node
);
3310 NANCHOR(insert_node
)->target
= NCAR(np
);
3311 NCAR(np
) = insert_node
;
3314 if (anc_type
== ANCHOR_LOOK_BEHIND_NOT
) {
3317 SET_NTYPE(np
, NT_LIST
); /* alt -> list */
3318 } while ((np
= NCDR(np
)) != NULL_NODE
);
3324 setup_look_behind(Node
* node
, regex_t
* reg
, ScanEnv
* env
)
3327 AnchorNode
* an
= NANCHOR(node
);
3329 r
= get_char_length_tree(an
->target
, reg
, &len
);
3332 else if (r
== GET_CHAR_LEN_VARLEN
)
3333 r
= ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
3334 else if (r
== GET_CHAR_LEN_TOP_ALT_VARLEN
) {
3335 if (IS_SYNTAX_BV(env
->syntax
, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
))
3336 r
= divide_look_behind_alternatives(node
);
3338 r
= ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
3345 next_setup(Node
* node
, Node
* next_node
, regex_t
* reg
)
3351 if (type
== NT_QTFR
) {
3352 QtfrNode
* qn
= NQTFR(node
);
3353 if (qn
->greedy
&& IS_REPEAT_INFINITE(qn
->upper
)) {
3354 #ifdef USE_QTFR_PEEK_NEXT
3355 Node
* n
= get_head_value_node(next_node
, 1, reg
);
3356 /* '\0': for UTF-16BE etc... */
3357 if (IS_NOT_NULL(n
) && NSTR(n
)->s
[0] != '\0') {
3358 qn
->next_head_exact
= n
;
3361 /* automatic possessification a*b ==> (?>a*)b */
3362 if (qn
->lower
<= 1) {
3363 int ttype
= NTYPE(qn
->target
);
3364 if (IS_NODE_TYPE_SIMPLE(ttype
)) {
3366 x
= get_head_value_node(qn
->target
, 0, reg
);
3367 if (IS_NOT_NULL(x
)) {
3368 y
= get_head_value_node(next_node
, 0, reg
);
3369 if (IS_NOT_NULL(y
) && is_not_included(x
, y
, reg
)) {
3370 Node
* en
= onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK
);
3371 CHECK_NULL_RETURN_MEMERR(en
);
3372 SET_ENCLOSE_STATUS(en
, NST_STOP_BT_SIMPLE_REPEAT
);
3373 swap_node(node
, en
);
3374 NENCLOSE(node
)->target
= en
;
3381 else if (type
== NT_ENCLOSE
) {
3382 EncloseNode
* en
= NENCLOSE(node
);
3383 if (en
->type
== ENCLOSE_MEMORY
) {
3393 update_string_node_case_fold(regex_t
* reg
, Node
*node
)
3395 UChar
*p
, *end
, buf
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
3396 UChar
*sbuf
, *ebuf
, *sp
;
3398 OnigDistance sbuf_size
;
3399 StrNode
* sn
= NSTR(node
);
3402 sbuf_size
= (end
- sn
->s
) * 2;
3403 sbuf
= (UChar
* )xmalloc(sbuf_size
);
3404 CHECK_NULL_RETURN_MEMERR(sbuf
);
3405 ebuf
= sbuf
+ sbuf_size
;
3410 len
= ONIGENC_MBC_CASE_FOLD(reg
->enc
, reg
->case_fold_flag
, &p
, end
, buf
);
3411 for (i
= 0; i
< len
; i
++) {
3413 UChar
* p
= (UChar
* )xrealloc(sbuf
, sbuf_size
* 2);
3416 return ONIGERR_MEMORY
;
3419 sp
= sbuf
+ sbuf_size
;
3421 ebuf
= sbuf
+ sbuf_size
;
3428 r
= onig_node_str_set(node
, sbuf
, sp
);
3435 expand_case_fold_make_rem_string(Node
** rnode
, UChar
*s
, UChar
*end
,
3441 node
= onig_node_new_str(s
, end
);
3442 if (IS_NULL(node
)) return ONIGERR_MEMORY
;
3444 r
= update_string_node_case_fold(reg
, node
);
3446 onig_node_free(node
);
3450 NSTRING_SET_AMBIG(node
);
3451 NSTRING_SET_DONT_GET_OPT_INFO(node
);
3457 is_case_fold_variable_len(int item_num
, OnigCaseFoldCodeItem items
[],
3462 for (i
= 0; i
< item_num
; i
++) {
3463 if (items
[i
].byte_len
!= slen
) {
3466 if (items
[i
].code_len
!= 1) {
3474 expand_case_fold_string_alt(int item_num
, OnigCaseFoldCodeItem items
[],
3475 UChar
*p
, int slen
, UChar
*end
,
3476 regex_t
* reg
, Node
**rnode
)
3478 int r
, i
, j
, len
, varlen
;
3479 Node
*anode
, *var_anode
, *snode
, *xnode
, *an
;
3480 UChar buf
[ONIGENC_CODE_TO_MBC_MAXLEN
];
3482 *rnode
= var_anode
= NULL_NODE
;
3485 for (i
= 0; i
< item_num
; i
++) {
3486 if (items
[i
].byte_len
!= slen
) {
3493 *rnode
= var_anode
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3494 if (IS_NULL(var_anode
)) return ONIGERR_MEMORY
;
3496 xnode
= onig_node_new_list(NULL
, NULL
);
3497 if (IS_NULL(xnode
)) goto mem_err
;
3498 NCAR(var_anode
) = xnode
;
3500 anode
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3501 if (IS_NULL(anode
)) goto mem_err
;
3502 NCAR(xnode
) = anode
;
3505 *rnode
= anode
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3506 if (IS_NULL(anode
)) return ONIGERR_MEMORY
;
3509 snode
= onig_node_new_str(p
, p
+ slen
);
3510 if (IS_NULL(snode
)) goto mem_err
;
3512 NCAR(anode
) = snode
;
3514 for (i
= 0; i
< item_num
; i
++) {
3515 snode
= onig_node_new_str(NULL
, NULL
);
3516 if (IS_NULL(snode
)) goto mem_err
;
3518 for (j
= 0; j
< items
[i
].code_len
; j
++) {
3519 len
= ONIGENC_CODE_TO_MBC(reg
->enc
, items
[i
].code
[j
], buf
);
3525 r
= onig_node_str_cat(snode
, buf
, buf
+ len
);
3526 if (r
!= 0) goto mem_err2
;
3529 an
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3534 if (items
[i
].byte_len
!= slen
) {
3536 UChar
*q
= p
+ items
[i
].byte_len
;
3539 r
= expand_case_fold_make_rem_string(&rem
, q
, end
, reg
);
3545 xnode
= onig_node_list_add(NULL_NODE
, snode
);
3546 if (IS_NULL(xnode
)) {
3548 onig_node_free(rem
);
3551 if (IS_NULL(onig_node_list_add(xnode
, rem
))) {
3553 onig_node_free(xnode
);
3554 onig_node_free(rem
);
3564 NCDR(var_anode
) = an
;
3577 onig_node_free(snode
);
3580 onig_node_free(*rnode
);
3582 return ONIGERR_MEMORY
;
3586 expand_case_fold_string(Node
* node
, regex_t
* reg
)
3588 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3590 int r
, n
, len
, alt_num
;
3592 UChar
*start
, *end
, *p
;
3593 Node
*top_root
, *root
, *snode
, *prev_node
;
3594 OnigCaseFoldCodeItem items
[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
];
3595 StrNode
* sn
= NSTR(node
);
3597 if (NSTRING_IS_AMBIG(node
)) return 0;
3601 if (start
>= end
) return 0;
3604 top_root
= root
= prev_node
= snode
= NULL_NODE
;
3608 n
= ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg
->enc
, reg
->case_fold_flag
,
3615 len
= enclen(reg
->enc
, p
, end
);
3617 varlen
= is_case_fold_variable_len(n
, items
, len
);
3618 if (n
== 0 || varlen
== 0) {
3619 if (IS_NULL(snode
)) {
3620 if (IS_NULL(root
) && IS_NOT_NULL(prev_node
)) {
3621 onig_node_free(top_root
);
3622 top_root
= root
= onig_node_list_add(NULL_NODE
, prev_node
);
3623 if (IS_NULL(root
)) {
3624 onig_node_free(prev_node
);
3629 prev_node
= snode
= onig_node_new_str(NULL
, NULL
);
3630 if (IS_NULL(snode
)) goto mem_err
;
3631 if (IS_NOT_NULL(root
)) {
3632 if (IS_NULL(onig_node_list_add(root
, snode
))) {
3633 onig_node_free(snode
);
3639 r
= onig_node_str_cat(snode
, p
, p
+ len
);
3640 if (r
!= 0) goto err
;
3644 if (alt_num
> THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION
) break;
3646 if (IS_NOT_NULL(snode
)) {
3647 r
= update_string_node_case_fold(reg
, snode
);
3649 NSTRING_SET_AMBIG(snode
);
3652 if (IS_NULL(root
) && IS_NOT_NULL(prev_node
)) {
3653 onig_node_free(top_root
);
3654 top_root
= root
= onig_node_list_add(NULL_NODE
, prev_node
);
3655 if (IS_NULL(root
)) {
3656 onig_node_free(prev_node
);
3661 r
= expand_case_fold_string_alt(n
, items
, p
, len
, end
, reg
, &prev_node
);
3662 if (r
< 0) goto mem_err
;
3664 if (IS_NULL(root
)) {
3665 top_root
= prev_node
;
3668 if (IS_NULL(onig_node_list_add(root
, prev_node
))) {
3669 onig_node_free(prev_node
);
3674 root
= NCAR(prev_node
);
3677 if (IS_NOT_NULL(root
)) {
3678 if (IS_NULL(onig_node_list_add(root
, prev_node
))) {
3679 onig_node_free(prev_node
);
3690 if (IS_NOT_NULL(snode
)) {
3691 r
= update_string_node_case_fold(reg
, snode
);
3693 NSTRING_SET_AMBIG(snode
);
3700 r
= expand_case_fold_make_rem_string(&srem
, p
, end
, reg
);
3701 if (r
!= 0) goto mem_err
;
3703 if (IS_NOT_NULL(prev_node
) && IS_NULL(root
)) {
3704 onig_node_free(top_root
);
3705 top_root
= root
= onig_node_list_add(NULL_NODE
, prev_node
);
3706 if (IS_NULL(root
)) {
3707 onig_node_free(srem
);
3708 onig_node_free(prev_node
);
3713 if (IS_NULL(root
)) {
3717 if (IS_NULL(onig_node_list_add(root
, srem
))) {
3718 onig_node_free(srem
);
3725 top_root
= (IS_NOT_NULL(top_root
) ? top_root
: prev_node
);
3726 swap_node(node
, top_root
);
3727 onig_node_free(top_root
);
3734 onig_node_free(top_root
);
3739 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3741 # define CEC_THRES_NUM_BIG_REPEAT 512
3742 # define CEC_INFINITE_NUM 0x7fffffff
3744 # define CEC_IN_INFINITE_REPEAT (1<<0)
3745 # define CEC_IN_FINITE_REPEAT (1<<1)
3746 # define CEC_CONT_BIG_REPEAT (1<<2)
3749 setup_comb_exp_check(Node
* node
, int state
, ScanEnv
* env
)
3759 r
= setup_comb_exp_check(NCAR(node
), r
, env
);
3760 } while (r
>= 0 && IS_NOT_NULL(node
= NCDR(node
)));
3768 ret
= setup_comb_exp_check(NCAR(node
), state
, env
);
3770 } while (ret
>= 0 && IS_NOT_NULL(node
= NCDR(node
)));
3776 int child_state
= state
;
3778 QtfrNode
* qn
= NQTFR(node
);
3779 Node
* target
= qn
->target
;
3782 if (! IS_REPEAT_INFINITE(qn
->upper
)) {
3783 if (qn
->upper
> 1) {
3784 /* {0,1}, {1,1} are allowed */
3785 child_state
|= CEC_IN_FINITE_REPEAT
;
3787 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3788 if (env
->backrefed_mem
== 0) {
3789 if (NTYPE(qn
->target
) == NT_ENCLOSE
) {
3790 EncloseNode
* en
= NENCLOSE(qn
->target
);
3791 if (en
->type
== ENCLOSE_MEMORY
) {
3792 if (NTYPE(en
->target
) == NT_QTFR
) {
3793 QtfrNode
* q
= NQTFR(en
->target
);
3794 if (IS_REPEAT_INFINITE(q
->upper
)
3795 && q
->greedy
== qn
->greedy
) {
3796 qn
->upper
= (qn
->lower
== 0 ? 1 : qn
->lower
);
3798 child_state
= state
;
3807 if (state
& CEC_IN_FINITE_REPEAT
) {
3808 qn
->comb_exp_check_num
= -1;
3811 if (IS_REPEAT_INFINITE(qn
->upper
)) {
3812 var_num
= CEC_INFINITE_NUM
;
3813 child_state
|= CEC_IN_INFINITE_REPEAT
;
3816 var_num
= qn
->upper
- qn
->lower
;
3819 if (var_num
>= CEC_THRES_NUM_BIG_REPEAT
)
3820 add_state
|= CEC_CONT_BIG_REPEAT
;
3822 if (((state
& CEC_IN_INFINITE_REPEAT
) != 0 && var_num
!= 0) ||
3823 ((state
& CEC_CONT_BIG_REPEAT
) != 0 &&
3824 var_num
>= CEC_THRES_NUM_BIG_REPEAT
)) {
3825 if (qn
->comb_exp_check_num
== 0) {
3826 env
->num_comb_exp_check
++;
3827 qn
->comb_exp_check_num
= env
->num_comb_exp_check
;
3828 if (env
->curr_max_regnum
> env
->comb_exp_max_regnum
)
3829 env
->comb_exp_max_regnum
= env
->curr_max_regnum
;
3834 r
= setup_comb_exp_check(target
, child_state
, env
);
3841 EncloseNode
* en
= NENCLOSE(node
);
3844 case ENCLOSE_MEMORY
:
3846 if (env
->curr_max_regnum
< en
->regnum
)
3847 env
->curr_max_regnum
= en
->regnum
;
3849 r
= setup_comb_exp_check(en
->target
, state
, env
);
3854 r
= setup_comb_exp_check(en
->target
, state
, env
);
3860 # ifdef USE_SUBEXP_CALL
3862 if (IS_CALL_RECURSION(NCALL(node
)))
3863 env
->has_recursion
= 1;
3865 r
= setup_comb_exp_check(NCALL(node
)->target
, state
, env
);
3877 #define IN_ALT (1<<0)
3878 #define IN_NOT (1<<1)
3879 #define IN_REPEAT (1<<2)
3880 #define IN_VAR_REPEAT (1<<3)
3881 #define IN_CALL (1<<4)
3882 #define IN_RECCALL (1<<5)
3884 /* setup_tree does the following work.
3885 1. check empty loop. (set qn->target_empty_info)
3886 2. expand ignore-case in char class.
3887 3. set memory status bit flags. (reg->mem_stats)
3888 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3889 5. find invalid patterns in look-behind.
3890 6. expand repeated string.
3893 setup_tree(Node
* node
, regex_t
* reg
, int state
, ScanEnv
* env
)
3903 Node
* prev
= NULL_NODE
;
3905 r
= setup_tree(NCAR(node
), reg
, state
, env
);
3906 if (IS_NOT_NULL(prev
) && r
== 0) {
3907 r
= next_setup(prev
, NCAR(node
), reg
);
3910 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
3916 r
= setup_tree(NCAR(node
), reg
, (state
| IN_ALT
), env
);
3917 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
3924 if (IS_IGNORECASE(reg
->options
) && !NSTRING_IS_RAW(node
)) {
3925 r
= expand_case_fold_string(node
, reg
);
3933 #ifdef USE_SUBEXP_CALL
3942 Node
** nodes
= SCANENV_MEM_NODES(env
);
3943 BRefNode
* br
= NBREF(node
);
3945 for (i
= 0; i
< br
->back_num
; i
++) {
3946 if (p
[i
] > env
->num_mem
) return ONIGERR_INVALID_BACKREF
;
3947 BIT_STATUS_ON_AT(env
->backrefed_mem
, p
[i
]);
3948 BIT_STATUS_ON_AT(env
->bt_mem_start
, p
[i
]);
3949 #ifdef USE_BACKREF_WITH_LEVEL
3950 if (IS_BACKREF_NEST_LEVEL(br
)) {
3951 BIT_STATUS_ON_AT(env
->bt_mem_end
, p
[i
]);
3954 SET_ENCLOSE_STATUS(nodes
[p
[i
]], NST_MEM_BACKREFED
);
3962 QtfrNode
* qn
= NQTFR(node
);
3963 Node
* target
= qn
->target
;
3965 if ((state
& IN_REPEAT
) != 0) {
3966 qn
->state
|= NST_IN_REPEAT
;
3969 if (IS_REPEAT_INFINITE(qn
->upper
) || qn
->upper
>= 1) {
3970 r
= get_min_match_length(target
, &d
, env
);
3973 qn
->target_empty_info
= NQ_TARGET_IS_EMPTY
;
3974 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3975 r
= quantifiers_memory_node_info(target
);
3978 qn
->target_empty_info
= r
;
3982 r
= get_max_match_length(target
, &d
, env
);
3983 if (r
== 0 && d
== 0) {
3984 /* ()* ==> ()?, ()+ ==> () */
3986 if (qn
->lower
> 1) qn
->lower
= 1;
3987 if (NTYPE(target
) == NT_STR
) {
3988 qn
->upper
= qn
->lower
= 0; /* /(?:)+/ ==> // */
3996 if (qn
->lower
!= qn
->upper
)
3997 state
|= IN_VAR_REPEAT
;
3998 r
= setup_tree(target
, reg
, state
, env
);
4002 #define EXPAND_STRING_MAX_LENGTH 100
4003 if (NTYPE(target
) == NT_STR
) {
4004 if (qn
->lower
> 1) {
4005 int i
, n
= qn
->lower
;
4006 OnigDistance len
= NSTRING_LEN(target
);
4007 StrNode
* sn
= NSTR(target
);
4010 np
= onig_node_new_str(sn
->s
, sn
->end
);
4011 if (IS_NULL(np
)) return ONIGERR_MEMORY
;
4012 NSTR(np
)->flag
= sn
->flag
;
4014 for (i
= 1; i
< n
&& (i
+1) * len
<= EXPAND_STRING_MAX_LENGTH
; i
++) {
4015 r
= onig_node_str_cat(np
, sn
->s
, sn
->end
);
4021 if (i
< qn
->upper
|| IS_REPEAT_INFINITE(qn
->upper
)) {
4025 if (! IS_REPEAT_INFINITE(qn
->upper
))
4028 np1
= onig_node_new_list(np
, NULL
);
4031 return ONIGERR_MEMORY
;
4033 swap_node(np1
, node
);
4034 np2
= onig_node_list_add(node
, np1
);
4036 onig_node_free(np1
);
4037 return ONIGERR_MEMORY
;
4041 swap_node(np
, node
);
4044 break; /* break case NT_QTFR: */
4048 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
4049 if (qn
->greedy
&& (qn
->target_empty_info
!= 0)) {
4050 if (NTYPE(target
) == NT_QTFR
) {
4051 QtfrNode
* tqn
= NQTFR(target
);
4052 if (IS_NOT_NULL(tqn
->head_exact
)) {
4053 qn
->head_exact
= tqn
->head_exact
;
4054 tqn
->head_exact
= NULL
;
4058 qn
->head_exact
= get_head_value_node(qn
->target
, 1, reg
);
4067 EncloseNode
* en
= NENCLOSE(node
);
4070 case ENCLOSE_OPTION
:
4072 OnigOptionType options
= reg
->options
;
4073 reg
->options
= NENCLOSE(node
)->option
;
4074 r
= setup_tree(NENCLOSE(node
)->target
, reg
, state
, env
);
4075 reg
->options
= options
;
4079 case ENCLOSE_MEMORY
:
4080 if ((state
& (IN_ALT
| IN_NOT
| IN_VAR_REPEAT
| IN_CALL
)) != 0) {
4081 BIT_STATUS_ON_AT(env
->bt_mem_start
, en
->regnum
);
4082 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
4084 if (IS_ENCLOSE_CALLED(en
))
4086 if (IS_ENCLOSE_RECURSION(en
))
4087 state
|= IN_RECCALL
;
4088 else if ((state
& IN_RECCALL
) != 0)
4089 SET_CALL_RECURSION(node
);
4090 r
= setup_tree(en
->target
, reg
, state
, env
);
4093 case ENCLOSE_STOP_BACKTRACK
:
4095 Node
* target
= en
->target
;
4096 r
= setup_tree(target
, reg
, state
, env
);
4097 if (NTYPE(target
) == NT_QTFR
) {
4098 QtfrNode
* tqn
= NQTFR(target
);
4099 if (IS_REPEAT_INFINITE(tqn
->upper
) && tqn
->lower
<= 1 &&
4100 tqn
->greedy
!= 0) { /* (?>a*), a*+ etc... */
4101 int qtype
= NTYPE(tqn
->target
);
4102 if (IS_NODE_TYPE_SIMPLE(qtype
))
4103 SET_ENCLOSE_STATUS(node
, NST_STOP_BT_SIMPLE_REPEAT
);
4109 case ENCLOSE_CONDITION
:
4110 #ifdef USE_NAMED_GROUP
4111 if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node
)) &&
4112 env
->num_named
> 0 &&
4113 IS_SYNTAX_BV(env
->syntax
, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
) &&
4114 !ONIG_IS_OPTION_ON(env
->option
, ONIG_OPTION_CAPTURE_GROUP
)) {
4115 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
;
4118 if (NENCLOSE(node
)->regnum
> env
->num_mem
)
4119 return ONIGERR_INVALID_BACKREF
;
4120 r
= setup_tree(NENCLOSE(node
)->target
, reg
, state
, env
);
4123 case ENCLOSE_ABSENT
:
4124 r
= setup_tree(NENCLOSE(node
)->target
, reg
, state
, env
);
4132 AnchorNode
* an
= NANCHOR(node
);
4135 case ANCHOR_PREC_READ
:
4136 r
= setup_tree(an
->target
, reg
, state
, env
);
4138 case ANCHOR_PREC_READ_NOT
:
4139 r
= setup_tree(an
->target
, reg
, (state
| IN_NOT
), env
);
4142 /* allowed node types in look-behind */
4143 #define ALLOWED_TYPE_IN_LB \
4144 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4145 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4147 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4148 #define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
4150 #define ALLOWED_ANCHOR_IN_LB \
4151 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4152 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4153 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4154 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4155 #define ALLOWED_ANCHOR_IN_LB_NOT \
4156 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4157 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4158 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4159 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4161 case ANCHOR_LOOK_BEHIND
:
4163 r
= check_type_tree(an
->target
, ALLOWED_TYPE_IN_LB
,
4164 ALLOWED_ENCLOSE_IN_LB
, ALLOWED_ANCHOR_IN_LB
);
4165 if (r
< 0) return r
;
4166 if (r
> 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
4167 if (NTYPE(node
) != NT_ANCHOR
) goto restart
;
4168 r
= setup_tree(an
->target
, reg
, state
, env
);
4169 if (r
!= 0) return r
;
4170 r
= setup_look_behind(node
, reg
, env
);
4174 case ANCHOR_LOOK_BEHIND_NOT
:
4176 r
= check_type_tree(an
->target
, ALLOWED_TYPE_IN_LB
,
4177 ALLOWED_ENCLOSE_IN_LB_NOT
, ALLOWED_ANCHOR_IN_LB_NOT
);
4178 if (r
< 0) return r
;
4179 if (r
> 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
4180 if (NTYPE(node
) != NT_ANCHOR
) goto restart
;
4181 r
= setup_tree(an
->target
, reg
, (state
| IN_NOT
), env
);
4182 if (r
!= 0) return r
;
4183 r
= setup_look_behind(node
, reg
, env
);
4197 #ifndef USE_SUNDAY_QUICK_SEARCH
4198 /* set skip map for Boyer-Moore search */
4200 set_bm_skip(UChar
* s
, UChar
* end
, regex_t
* reg
,
4201 UChar skip
[], int** int_skip
, int ignore_case
)
4203 OnigDistance i
, len
;
4204 int clen
, flen
, n
, j
, k
;
4205 UChar
*p
, buf
[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
][ONIGENC_MBC_CASE_FOLD_MAXLEN
];
4206 OnigCaseFoldCodeItem items
[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
];
4207 OnigEncoding enc
= reg
->enc
;
4210 if (len
< ONIG_CHAR_TABLE_SIZE
) {
4211 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) skip
[i
] = (UChar
)len
;
4214 for (i
= 0; i
< len
- 1; i
+= clen
) {
4217 n
= ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc
, reg
->case_fold_flag
,
4219 clen
= enclen(enc
, p
, end
);
4221 clen
= (int )(end
- p
);
4223 for (j
= 0; j
< n
; j
++) {
4224 if ((items
[j
].code_len
!= 1) || (items
[j
].byte_len
!= clen
))
4225 return 1; /* different length isn't supported. */
4226 flen
= ONIGENC_CODE_TO_MBC(enc
, items
[j
].code
[0], buf
[j
]);
4228 return 1; /* different length isn't supported. */
4230 for (j
= 0; j
< clen
; j
++) {
4231 skip
[s
[i
+ j
]] = (UChar
)(len
- 1 - i
- j
);
4232 for (k
= 0; k
< n
; k
++) {
4233 skip
[buf
[k
][j
]] = (UChar
)(len
- 1 - i
- j
);
4239 # if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4240 /* This should not happen. */
4241 return ONIGERR_TYPE_BUG
;
4243 if (IS_NULL(*int_skip
)) {
4244 *int_skip
= (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE
);
4245 if (IS_NULL(*int_skip
)) return ONIGERR_MEMORY
;
4247 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) (*int_skip
)[i
] = (int )len
;
4250 for (i
= 0; i
< len
- 1; i
+= clen
) {
4253 n
= ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc
, reg
->case_fold_flag
,
4255 clen
= enclen(enc
, p
, end
);
4257 clen
= (int )(end
- p
);
4259 for (j
= 0; j
< n
; j
++) {
4260 if ((items
[j
].code_len
!= 1) || (items
[j
].byte_len
!= clen
))
4261 return 1; /* different length isn't supported. */
4262 flen
= ONIGENC_CODE_TO_MBC(enc
, items
[j
].code
[0], buf
[j
]);
4264 return 1; /* different length isn't supported. */
4266 for (j
= 0; j
< clen
; j
++) {
4267 (*int_skip
)[s
[i
+ j
]] = (int )(len
- 1 - i
- j
);
4268 for (k
= 0; k
< n
; k
++) {
4269 (*int_skip
)[buf
[k
][j
]] = (int )(len
- 1 - i
- j
);
4278 #else /* USE_SUNDAY_QUICK_SEARCH */
4280 /* set skip map for Sunday's quick search */
4282 set_bm_skip(UChar
* s
, UChar
* end
, regex_t
* reg
,
4283 UChar skip
[], int** int_skip
, int ignore_case
)
4285 OnigDistance i
, len
;
4286 int clen
, flen
, n
, j
, k
;
4287 UChar
*p
, buf
[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
][ONIGENC_MBC_CASE_FOLD_MAXLEN
];
4288 OnigCaseFoldCodeItem items
[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
];
4289 OnigEncoding enc
= reg
->enc
;
4292 if (len
< ONIG_CHAR_TABLE_SIZE
) {
4293 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) skip
[i
] = (UChar
)(len
+ 1);
4296 for (i
= 0; i
< len
; i
+= clen
) {
4299 n
= ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc
, reg
->case_fold_flag
,
4301 clen
= enclen(enc
, p
, end
);
4303 clen
= (int )(end
- p
);
4305 for (j
= 0; j
< n
; j
++) {
4306 if ((items
[j
].code_len
!= 1) || (items
[j
].byte_len
!= clen
))
4307 return 1; /* different length isn't supported. */
4308 flen
= ONIGENC_CODE_TO_MBC(enc
, items
[j
].code
[0], buf
[j
]);
4310 return 1; /* different length isn't supported. */
4312 for (j
= 0; j
< clen
; j
++) {
4313 skip
[s
[i
+ j
]] = (UChar
)(len
- i
- j
);
4314 for (k
= 0; k
< n
; k
++) {
4315 skip
[buf
[k
][j
]] = (UChar
)(len
- i
- j
);
4321 # if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4322 /* This should not happen. */
4323 return ONIGERR_TYPE_BUG
;
4325 if (IS_NULL(*int_skip
)) {
4326 *int_skip
= (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE
);
4327 if (IS_NULL(*int_skip
)) return ONIGERR_MEMORY
;
4329 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) (*int_skip
)[i
] = (int )(len
+ 1);
4332 for (i
= 0; i
< len
; i
+= clen
) {
4335 n
= ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc
, reg
->case_fold_flag
,
4337 clen
= enclen(enc
, p
, end
);
4339 clen
= (int )(end
- p
);
4341 for (j
= 0; j
< n
; j
++) {
4342 if ((items
[j
].code_len
!= 1) || (items
[j
].byte_len
!= clen
))
4343 return 1; /* different length isn't supported. */
4344 flen
= ONIGENC_CODE_TO_MBC(enc
, items
[j
].code
[0], buf
[j
]);
4346 return 1; /* different length isn't supported. */
4348 for (j
= 0; j
< clen
; j
++) {
4349 (*int_skip
)[s
[i
+ j
]] = (int )(len
- i
- j
);
4350 for (k
= 0; k
< n
; k
++) {
4351 (*int_skip
)[buf
[k
][j
]] = (int )(len
- i
- j
);
4359 #endif /* USE_SUNDAY_QUICK_SEARCH */
4362 OnigDistance min
; /* min byte length */
4363 OnigDistance max
; /* max byte length */
4369 OnigOptionType options
;
4370 OnigCaseFoldType case_fold_flag
;
4380 MinMaxLen mmd
; /* info position */
4384 int ignore_case
; /* -1: unset, 0: case sensitive, 1: ignore case */
4386 UChar s
[OPT_EXACT_MAXLEN
];
4390 MinMaxLen mmd
; /* info position */
4393 int value
; /* weighted value */
4394 UChar map
[ONIG_CHAR_TABLE_SIZE
];
4401 OptExactInfo exb
; /* boundary */
4402 OptExactInfo exm
; /* middle */
4403 OptExactInfo expr
; /* prec read (?=...) */
4405 OptMapInfo map
; /* boundary */
4410 map_position_value(OnigEncoding enc
, int i
)
4412 static const short int ByteValTable
[] = {
4413 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4414 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4415 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4416 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4417 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4418 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4419 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4420 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4423 if (i
< numberof(ByteValTable
)) {
4424 if (i
== 0 && ONIGENC_MBC_MINLEN(enc
) > 1)
4427 return (int )ByteValTable
[i
];
4430 return 4; /* Take it easy. */
4434 distance_value(MinMaxLen
* mm
)
4436 /* 1000 / (min-max-dist + 1) */
4437 static const short int dist_vals
[] = {
4438 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4439 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4440 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4441 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4442 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4443 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4444 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4445 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4446 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4447 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4452 if (mm
->max
== ONIG_INFINITE_DISTANCE
) return 0;
4454 d
= mm
->max
- mm
->min
;
4455 if (d
< numberof(dist_vals
))
4456 /* return dist_vals[d] * 16 / (mm->min + 12); */
4457 return (int )dist_vals
[d
];
4463 comp_distance_value(MinMaxLen
* d1
, MinMaxLen
* d2
, int v1
, int v2
)
4465 if (v2
<= 0) return -1;
4466 if (v1
<= 0) return 1;
4468 v1
*= distance_value(d1
);
4469 v2
*= distance_value(d2
);
4471 if (v2
> v1
) return 1;
4472 if (v2
< v1
) return -1;
4474 if (d2
->min
< d1
->min
) return 1;
4475 if (d2
->min
> d1
->min
) return -1;
4480 is_equal_mml(MinMaxLen
* a
, MinMaxLen
* b
)
4482 return (a
->min
== b
->min
&& a
->max
== b
->max
) ? 1 : 0;
4487 set_mml(MinMaxLen
* mml
, OnigDistance min
, OnigDistance max
)
4494 clear_mml(MinMaxLen
* mml
)
4496 mml
->min
= mml
->max
= 0;
4500 copy_mml(MinMaxLen
* to
, MinMaxLen
* from
)
4502 to
->min
= from
->min
;
4503 to
->max
= from
->max
;
4507 add_mml(MinMaxLen
* to
, MinMaxLen
* from
)
4509 to
->min
= distance_add(to
->min
, from
->min
);
4510 to
->max
= distance_add(to
->max
, from
->max
);
4515 add_len_mml(MinMaxLen
* to
, OnigDistance len
)
4517 to
->min
= distance_add(to
->min
, len
);
4518 to
->max
= distance_add(to
->max
, len
);
4523 alt_merge_mml(MinMaxLen
* to
, MinMaxLen
* from
)
4525 if (to
->min
> from
->min
) to
->min
= from
->min
;
4526 if (to
->max
< from
->max
) to
->max
= from
->max
;
4530 copy_opt_env(OptEnv
* to
, OptEnv
* from
)
4536 clear_opt_anc_info(OptAncInfo
* anc
)
4538 anc
->left_anchor
= 0;
4539 anc
->right_anchor
= 0;
4543 copy_opt_anc_info(OptAncInfo
* to
, OptAncInfo
* from
)
4549 concat_opt_anc_info(OptAncInfo
* to
, OptAncInfo
* left
, OptAncInfo
* right
,
4550 OnigDistance left_len
, OnigDistance right_len
)
4552 clear_opt_anc_info(to
);
4554 to
->left_anchor
= left
->left_anchor
;
4555 if (left_len
== 0) {
4556 to
->left_anchor
|= right
->left_anchor
;
4559 to
->right_anchor
= right
->right_anchor
;
4560 if (right_len
== 0) {
4561 to
->right_anchor
|= left
->right_anchor
;
4564 to
->right_anchor
|= (left
->right_anchor
& ANCHOR_PREC_READ_NOT
);
4569 is_left_anchor(int anc
)
4571 if (anc
== ANCHOR_END_BUF
|| anc
== ANCHOR_SEMI_END_BUF
||
4572 anc
== ANCHOR_END_LINE
|| anc
== ANCHOR_PREC_READ
||
4573 anc
== ANCHOR_PREC_READ_NOT
)
4580 is_set_opt_anc_info(OptAncInfo
* to
, int anc
)
4582 if ((to
->left_anchor
& anc
) != 0) return 1;
4584 return ((to
->right_anchor
& anc
) != 0 ? 1 : 0);
4588 add_opt_anc_info(OptAncInfo
* to
, int anc
)
4590 if (is_left_anchor(anc
))
4591 to
->left_anchor
|= anc
;
4593 to
->right_anchor
|= anc
;
4597 remove_opt_anc_info(OptAncInfo
* to
, int anc
)
4599 if (is_left_anchor(anc
))
4600 to
->left_anchor
&= ~anc
;
4602 to
->right_anchor
&= ~anc
;
4606 alt_merge_opt_anc_info(OptAncInfo
* to
, OptAncInfo
* add
)
4608 to
->left_anchor
&= add
->left_anchor
;
4609 to
->right_anchor
&= add
->right_anchor
;
4613 is_full_opt_exact_info(OptExactInfo
* ex
)
4615 return (ex
->len
>= OPT_EXACT_MAXLEN
? 1 : 0);
4619 clear_opt_exact_info(OptExactInfo
* ex
)
4621 clear_mml(&ex
->mmd
);
4622 clear_opt_anc_info(&ex
->anc
);
4624 ex
->ignore_case
= -1; /* unset */
4630 copy_opt_exact_info(OptExactInfo
* to
, OptExactInfo
* from
)
4636 concat_opt_exact_info(OptExactInfo
* to
, OptExactInfo
* add
, OnigEncoding enc
)
4642 if (to
->ignore_case
< 0)
4643 to
->ignore_case
= add
->ignore_case
;
4644 else if (to
->ignore_case
!= add
->ignore_case
)
4645 return ; /* avoid */
4649 for (i
= to
->len
; p
< end
; ) {
4650 len
= enclen(enc
, p
, end
);
4651 if (i
+ len
> OPT_EXACT_MAXLEN
) break;
4652 for (j
= 0; j
< len
&& p
< end
; j
++)
4657 to
->reach_end
= (p
== end
? add
->reach_end
: 0);
4659 concat_opt_anc_info(&tanc
, &to
->anc
, &add
->anc
, 1, 1);
4660 if (! to
->reach_end
) tanc
.right_anchor
= 0;
4661 copy_opt_anc_info(&to
->anc
, &tanc
);
4665 concat_opt_exact_info_str(OptExactInfo
* to
, UChar
* s
, UChar
* end
,
4666 int raw ARG_UNUSED
, OnigEncoding enc
)
4671 for (i
= to
->len
, p
= s
; p
< end
&& i
< OPT_EXACT_MAXLEN
; ) {
4672 len
= enclen(enc
, p
, end
);
4673 if (i
+ len
> OPT_EXACT_MAXLEN
) break;
4674 for (j
= 0; j
< len
&& p
< end
; j
++)
4682 alt_merge_opt_exact_info(OptExactInfo
* to
, OptExactInfo
* add
, OptEnv
* env
)
4686 if (add
->len
== 0 || to
->len
== 0) {
4687 clear_opt_exact_info(to
);
4691 if (! is_equal_mml(&to
->mmd
, &add
->mmd
)) {
4692 clear_opt_exact_info(to
);
4696 for (i
= 0; i
< to
->len
&& i
< add
->len
; ) {
4697 if (to
->s
[i
] != add
->s
[i
]) break;
4698 len
= enclen(env
->enc
, to
->s
+ i
, to
->s
+ to
->len
);
4700 for (j
= 1; j
< len
; j
++) {
4701 if (to
->s
[i
+j
] != add
->s
[i
+j
]) break;
4707 if (! add
->reach_end
|| i
< add
->len
|| i
< to
->len
) {
4711 if (to
->ignore_case
< 0)
4712 to
->ignore_case
= add
->ignore_case
;
4713 else if (add
->ignore_case
>= 0)
4714 to
->ignore_case
|= add
->ignore_case
;
4716 alt_merge_opt_anc_info(&to
->anc
, &add
->anc
);
4717 if (! to
->reach_end
) to
->anc
.right_anchor
= 0;
4721 select_opt_exact_info(OnigEncoding enc
, OptExactInfo
* now
, OptExactInfo
* alt
)
4732 copy_opt_exact_info(now
, alt
);
4735 else if (v1
<= 2 && v2
<= 2) {
4736 /* ByteValTable[x] is big value --> low price */
4737 v2
= map_position_value(enc
, now
->s
[0]);
4738 v1
= map_position_value(enc
, alt
->s
[0]);
4740 if (now
->len
> 1) v1
+= 5;
4741 if (alt
->len
> 1) v2
+= 5;
4744 if (now
->ignore_case
<= 0) v1
*= 2;
4745 if (alt
->ignore_case
<= 0) v2
*= 2;
4747 if (comp_distance_value(&now
->mmd
, &alt
->mmd
, v1
, v2
) > 0)
4748 copy_opt_exact_info(now
, alt
);
4752 clear_opt_map_info(OptMapInfo
* map
)
4754 static const OptMapInfo clean_info
= {
4757 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4758 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4759 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4760 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4761 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4762 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4763 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4764 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4765 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4766 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4767 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4768 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4769 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4770 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4771 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4772 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4776 xmemcpy(map
, &clean_info
, sizeof(OptMapInfo
));
4780 copy_opt_map_info(OptMapInfo
* to
, OptMapInfo
* from
)
4786 add_char_opt_map_info(OptMapInfo
* map
, UChar c
, OnigEncoding enc
)
4788 if (map
->map
[c
] == 0) {
4790 map
->value
+= map_position_value(enc
, c
);
4795 add_char_amb_opt_map_info(OptMapInfo
* map
, UChar
* p
, UChar
* end
,
4796 OnigEncoding enc
, OnigCaseFoldType case_fold_flag
)
4798 OnigCaseFoldCodeItem items
[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
];
4799 UChar buf
[ONIGENC_CODE_TO_MBC_MAXLEN
];
4802 add_char_opt_map_info(map
, p
[0], enc
);
4804 case_fold_flag
= DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag
);
4805 n
= ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc
, case_fold_flag
, p
, end
, items
);
4806 if (n
< 0) return n
;
4808 for (i
= 0; i
< n
; i
++) {
4809 ONIGENC_CODE_TO_MBC(enc
, items
[i
].code
[0], buf
);
4810 add_char_opt_map_info(map
, buf
[0], enc
);
4817 select_opt_map_info(OptMapInfo
* now
, OptMapInfo
* alt
)
4819 const int z
= 1<<15; /* 32768: something big value */
4823 if (alt
->value
== 0) return ;
4824 if (now
->value
== 0) {
4825 copy_opt_map_info(now
, alt
);
4829 v1
= z
/ now
->value
;
4830 v2
= z
/ alt
->value
;
4831 if (comp_distance_value(&now
->mmd
, &alt
->mmd
, v1
, v2
) > 0)
4832 copy_opt_map_info(now
, alt
);
4836 comp_opt_exact_or_map_info(OptExactInfo
* e
, OptMapInfo
* m
)
4838 #define COMP_EM_BASE 20
4841 if (m
->value
<= 0) return -1;
4843 ve
= COMP_EM_BASE
* e
->len
* (e
->ignore_case
> 0 ? 1 : 2);
4844 vm
= COMP_EM_BASE
* 5 * 2 / m
->value
;
4845 return comp_distance_value(&e
->mmd
, &m
->mmd
, ve
, vm
);
4849 alt_merge_opt_map_info(OnigEncoding enc
, OptMapInfo
* to
, OptMapInfo
* add
)
4853 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4854 if (to
->value
== 0) return ;
4855 if (add
->value
== 0 || to
->mmd
.max
< add
->mmd
.min
) {
4856 clear_opt_map_info(to
);
4860 alt_merge_mml(&to
->mmd
, &add
->mmd
);
4863 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) {
4868 val
+= map_position_value(enc
, i
);
4872 alt_merge_opt_anc_info(&to
->anc
, &add
->anc
);
4876 set_bound_node_opt_info(NodeOptInfo
* opt
, MinMaxLen
* mmd
)
4878 copy_mml(&(opt
->exb
.mmd
), mmd
);
4879 copy_mml(&(opt
->expr
.mmd
), mmd
);
4880 copy_mml(&(opt
->map
.mmd
), mmd
);
4884 clear_node_opt_info(NodeOptInfo
* opt
)
4886 clear_mml(&opt
->len
);
4887 clear_opt_anc_info(&opt
->anc
);
4888 clear_opt_exact_info(&opt
->exb
);
4889 clear_opt_exact_info(&opt
->exm
);
4890 clear_opt_exact_info(&opt
->expr
);
4891 clear_opt_map_info(&opt
->map
);
4895 copy_node_opt_info(NodeOptInfo
* to
, NodeOptInfo
* from
)
4901 concat_left_node_opt_info(OnigEncoding enc
, NodeOptInfo
* to
, NodeOptInfo
* add
)
4903 int exb_reach
, exm_reach
;
4906 concat_opt_anc_info(&tanc
, &to
->anc
, &add
->anc
, to
->len
.max
, add
->len
.max
);
4907 copy_opt_anc_info(&to
->anc
, &tanc
);
4909 if (add
->exb
.len
> 0 && to
->len
.max
== 0) {
4910 concat_opt_anc_info(&tanc
, &to
->anc
, &add
->exb
.anc
,
4911 to
->len
.max
, add
->len
.max
);
4912 copy_opt_anc_info(&add
->exb
.anc
, &tanc
);
4915 if (add
->map
.value
> 0 && to
->len
.max
== 0) {
4916 if (add
->map
.mmd
.max
== 0)
4917 add
->map
.anc
.left_anchor
|= to
->anc
.left_anchor
;
4920 exb_reach
= to
->exb
.reach_end
;
4921 exm_reach
= to
->exm
.reach_end
;
4923 if (add
->len
.max
!= 0)
4924 to
->exb
.reach_end
= to
->exm
.reach_end
= 0;
4926 if (add
->exb
.len
> 0) {
4928 concat_opt_exact_info(&to
->exb
, &add
->exb
, enc
);
4929 clear_opt_exact_info(&add
->exb
);
4931 else if (exm_reach
) {
4932 concat_opt_exact_info(&to
->exm
, &add
->exb
, enc
);
4933 clear_opt_exact_info(&add
->exb
);
4936 select_opt_exact_info(enc
, &to
->exm
, &add
->exb
);
4937 select_opt_exact_info(enc
, &to
->exm
, &add
->exm
);
4939 if (to
->expr
.len
> 0) {
4940 if (add
->len
.max
> 0) {
4941 if (to
->expr
.len
> (int )add
->len
.max
)
4942 to
->expr
.len
= (int )add
->len
.max
;
4944 if (to
->expr
.mmd
.max
== 0)
4945 select_opt_exact_info(enc
, &to
->exb
, &to
->expr
);
4947 select_opt_exact_info(enc
, &to
->exm
, &to
->expr
);
4950 else if (add
->expr
.len
> 0) {
4951 copy_opt_exact_info(&to
->expr
, &add
->expr
);
4954 select_opt_map_info(&to
->map
, &add
->map
);
4956 add_mml(&to
->len
, &add
->len
);
4960 alt_merge_node_opt_info(NodeOptInfo
* to
, NodeOptInfo
* add
, OptEnv
* env
)
4962 alt_merge_opt_anc_info (&to
->anc
, &add
->anc
);
4963 alt_merge_opt_exact_info(&to
->exb
, &add
->exb
, env
);
4964 alt_merge_opt_exact_info(&to
->exm
, &add
->exm
, env
);
4965 alt_merge_opt_exact_info(&to
->expr
, &add
->expr
, env
);
4966 alt_merge_opt_map_info(env
->enc
, &to
->map
, &add
->map
);
4968 alt_merge_mml(&to
->len
, &add
->len
);
4972 #define MAX_NODE_OPT_INFO_REF_COUNT 5
4975 optimize_node_left(Node
* node
, NodeOptInfo
* opt
, OptEnv
* env
)
4980 clear_node_opt_info(opt
);
4981 set_bound_node_opt_info(opt
, &env
->mmd
);
4991 copy_opt_env(&nenv
, env
);
4993 r
= optimize_node_left(NCAR(nd
), &nopt
, &nenv
);
4995 add_mml(&nenv
.mmd
, &nopt
.len
);
4996 concat_left_node_opt_info(env
->enc
, opt
, &nopt
);
4998 } while (r
== 0 && IS_NOT_NULL(nd
= NCDR(nd
)));
5008 r
= optimize_node_left(NCAR(nd
), &nopt
, env
);
5010 if (nd
== node
) copy_node_opt_info(opt
, &nopt
);
5011 else alt_merge_node_opt_info(opt
, &nopt
, env
);
5013 } while ((r
== 0) && IS_NOT_NULL(nd
= NCDR(nd
)));
5019 StrNode
* sn
= NSTR(node
);
5020 OnigDistance slen
= sn
->end
- sn
->s
;
5021 int is_raw
= NSTRING_IS_RAW(node
);
5023 if (! NSTRING_IS_AMBIG(node
)) {
5024 concat_opt_exact_info_str(&opt
->exb
, sn
->s
, sn
->end
,
5026 opt
->exb
.ignore_case
= 0;
5028 add_char_opt_map_info(&opt
->map
, *(sn
->s
), env
->enc
);
5030 set_mml(&opt
->len
, slen
, slen
);
5035 if (NSTRING_IS_DONT_GET_OPT_INFO(node
)) {
5036 int n
= onigenc_strlen(env
->enc
, sn
->s
, sn
->end
);
5037 max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
) * (OnigDistance
)n
;
5040 concat_opt_exact_info_str(&opt
->exb
, sn
->s
, sn
->end
,
5042 opt
->exb
.ignore_case
= 1;
5045 r
= add_char_amb_opt_map_info(&opt
->map
, sn
->s
, sn
->end
,
5046 env
->enc
, env
->case_fold_flag
);
5053 set_mml(&opt
->len
, slen
, max
);
5056 if ((OnigDistance
)opt
->exb
.len
== slen
)
5057 opt
->exb
.reach_end
= 1;
5064 CClassNode
* cc
= NCCLASS(node
);
5066 /* no need to check ignore case. (set in setup_tree()) */
5068 if (IS_NOT_NULL(cc
->mbuf
) || IS_NCCLASS_NOT(cc
)) {
5069 OnigDistance min
= ONIGENC_MBC_MINLEN(env
->enc
);
5070 OnigDistance max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
5072 set_mml(&opt
->len
, min
, max
);
5075 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
5076 z
= BITSET_AT(cc
->bs
, i
);
5077 if ((z
&& !IS_NCCLASS_NOT(cc
)) || (!z
&& IS_NCCLASS_NOT(cc
))) {
5078 add_char_opt_map_info(&opt
->map
, (UChar
)i
, env
->enc
);
5081 set_mml(&opt
->len
, 1, 1);
5091 max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
5096 maxcode
= NCTYPE(node
)->ascii_range
? 0x80 : SINGLE_BYTE_SIZE
;
5097 switch (NCTYPE(node
)->ctype
) {
5098 case ONIGENC_CTYPE_WORD
:
5099 if (NCTYPE(node
)->not != 0) {
5100 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
5101 if (! ONIGENC_IS_CODE_WORD(env
->enc
, i
) || i
>= maxcode
) {
5102 add_char_opt_map_info(&opt
->map
, (UChar
)i
, env
->enc
);
5107 for (i
= 0; i
< maxcode
; i
++) {
5108 if (ONIGENC_IS_CODE_WORD(env
->enc
, i
)) {
5109 add_char_opt_map_info(&opt
->map
, (UChar
)i
, env
->enc
);
5117 min
= ONIGENC_MBC_MINLEN(env
->enc
);
5119 set_mml(&opt
->len
, min
, max
);
5125 OnigDistance min
= ONIGENC_MBC_MINLEN(env
->enc
);
5126 OnigDistance max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
5127 set_mml(&opt
->len
, min
, max
);
5132 switch (NANCHOR(node
)->type
) {
5133 case ANCHOR_BEGIN_BUF
:
5134 case ANCHOR_BEGIN_POSITION
:
5135 case ANCHOR_BEGIN_LINE
:
5136 case ANCHOR_END_BUF
:
5137 case ANCHOR_SEMI_END_BUF
:
5138 case ANCHOR_END_LINE
:
5139 case ANCHOR_LOOK_BEHIND
: /* just for (?<=x).* */
5140 case ANCHOR_PREC_READ_NOT
: /* just for (?!x).* */
5141 add_opt_anc_info(&opt
->anc
, NANCHOR(node
)->type
);
5144 case ANCHOR_PREC_READ
:
5148 r
= optimize_node_left(NANCHOR(node
)->target
, &nopt
, env
);
5150 if (nopt
.exb
.len
> 0)
5151 copy_opt_exact_info(&opt
->expr
, &nopt
.exb
);
5152 else if (nopt
.exm
.len
> 0)
5153 copy_opt_exact_info(&opt
->expr
, &nopt
.exm
);
5155 opt
->expr
.reach_end
= 0;
5157 if (nopt
.map
.value
> 0)
5158 copy_opt_map_info(&opt
->map
, &nopt
.map
);
5163 case ANCHOR_LOOK_BEHIND_NOT
:
5172 OnigDistance min
, max
, tmin
, tmax
;
5173 Node
** nodes
= SCANENV_MEM_NODES(env
->scan_env
);
5174 BRefNode
* br
= NBREF(node
);
5176 if (br
->state
& NST_RECURSION
) {
5177 set_mml(&opt
->len
, 0, ONIG_INFINITE_DISTANCE
);
5180 backs
= BACKREFS_P(br
);
5181 r
= get_min_match_length(nodes
[backs
[0]], &min
, env
->scan_env
);
5183 r
= get_max_match_length(nodes
[backs
[0]], &max
, env
->scan_env
);
5185 for (i
= 1; i
< br
->back_num
; i
++) {
5186 r
= get_min_match_length(nodes
[backs
[i
]], &tmin
, env
->scan_env
);
5188 r
= get_max_match_length(nodes
[backs
[i
]], &tmax
, env
->scan_env
);
5190 if (min
> tmin
) min
= tmin
;
5191 if (max
< tmax
) max
= tmax
;
5193 if (r
== 0) set_mml(&opt
->len
, min
, max
);
5197 #ifdef USE_SUBEXP_CALL
5199 if (IS_CALL_RECURSION(NCALL(node
)))
5200 set_mml(&opt
->len
, 0, ONIG_INFINITE_DISTANCE
);
5202 OnigOptionType save
= env
->options
;
5203 env
->options
= NENCLOSE(NCALL(node
)->target
)->option
;
5204 r
= optimize_node_left(NCALL(node
)->target
, opt
, env
);
5205 env
->options
= save
;
5213 OnigDistance min
, max
;
5215 QtfrNode
* qn
= NQTFR(node
);
5217 r
= optimize_node_left(qn
->target
, &nopt
, env
);
5220 if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn
->upper
)) {
5221 if (env
->mmd
.max
== 0 &&
5222 NTYPE(qn
->target
) == NT_CANY
&& qn
->greedy
) {
5223 if (IS_MULTILINE(env
->options
))
5224 /* implicit anchor: /.*a/ ==> /\A.*a/ */
5225 add_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_STAR_ML
);
5227 add_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_STAR
);
5231 if (qn
->lower
> 0) {
5232 copy_node_opt_info(opt
, &nopt
);
5233 if (nopt
.exb
.len
> 0) {
5234 if (nopt
.exb
.reach_end
) {
5235 for (i
= 2; i
<= qn
->lower
&&
5236 ! is_full_opt_exact_info(&opt
->exb
); i
++) {
5237 concat_opt_exact_info(&opt
->exb
, &nopt
.exb
, env
->enc
);
5239 if (i
< qn
->lower
) {
5240 opt
->exb
.reach_end
= 0;
5245 if (qn
->lower
!= qn
->upper
) {
5246 opt
->exb
.reach_end
= 0;
5247 opt
->exm
.reach_end
= 0;
5250 opt
->exm
.reach_end
= 0;
5254 min
= distance_multiply(nopt
.len
.min
, qn
->lower
);
5255 if (IS_REPEAT_INFINITE(qn
->upper
))
5256 max
= (nopt
.len
.max
> 0 ? ONIG_INFINITE_DISTANCE
: 0);
5258 max
= distance_multiply(nopt
.len
.max
, qn
->upper
);
5260 set_mml(&opt
->len
, min
, max
);
5266 EncloseNode
* en
= NENCLOSE(node
);
5269 case ENCLOSE_OPTION
:
5271 OnigOptionType save
= env
->options
;
5273 env
->options
= en
->option
;
5274 r
= optimize_node_left(en
->target
, opt
, env
);
5275 env
->options
= save
;
5279 case ENCLOSE_MEMORY
:
5280 #ifdef USE_SUBEXP_CALL
5282 if (en
->opt_count
> MAX_NODE_OPT_INFO_REF_COUNT
) {
5283 OnigDistance min
, max
;
5286 max
= ONIG_INFINITE_DISTANCE
;
5287 if (IS_ENCLOSE_MIN_FIXED(en
)) min
= en
->min_len
;
5288 if (IS_ENCLOSE_MAX_FIXED(en
)) max
= en
->max_len
;
5289 set_mml(&opt
->len
, min
, max
);
5294 r
= optimize_node_left(en
->target
, opt
, env
);
5296 if (is_set_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_STAR_MASK
)) {
5297 if (BIT_STATUS_AT(env
->scan_env
->backrefed_mem
, en
->regnum
))
5298 remove_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_STAR_MASK
);
5303 case ENCLOSE_STOP_BACKTRACK
:
5304 case ENCLOSE_CONDITION
:
5305 r
= optimize_node_left(en
->target
, opt
, env
);
5308 case ENCLOSE_ABSENT
:
5309 set_mml(&opt
->len
, 0, ONIG_INFINITE_DISTANCE
);
5317 fprintf(stderr
, "optimize_node_left: undefined node type %d\n",
5320 r
= ONIGERR_TYPE_BUG
;
5328 set_optimize_exact_info(regex_t
* reg
, OptExactInfo
* e
)
5333 if (e
->len
== 0) return 0;
5335 reg
->exact
= (UChar
* )xmalloc(e
->len
);
5336 CHECK_NULL_RETURN_MEMERR(reg
->exact
);
5337 xmemcpy(reg
->exact
, e
->s
, e
->len
);
5338 reg
->exact_end
= reg
->exact
+ e
->len
;
5341 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg
->enc
, reg
->exact
, reg
->exact_end
);
5343 if (e
->ignore_case
> 0) {
5344 if (e
->len
>= 3 || (e
->len
>= 2 && allow_reverse
)) {
5345 r
= set_bm_skip(reg
->exact
, reg
->exact_end
, reg
,
5346 reg
->map
, &(reg
->int_map
), 1);
5348 reg
->optimize
= (allow_reverse
!= 0
5349 ? ONIG_OPTIMIZE_EXACT_BM_IC
: ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC
);
5352 reg
->optimize
= ONIG_OPTIMIZE_EXACT_IC
;
5356 reg
->optimize
= ONIG_OPTIMIZE_EXACT_IC
;
5360 if (e
->len
>= 3 || (e
->len
>= 2 && allow_reverse
)) {
5361 r
= set_bm_skip(reg
->exact
, reg
->exact_end
, reg
,
5362 reg
->map
, &(reg
->int_map
), 0);
5364 reg
->optimize
= (allow_reverse
!= 0
5365 ? ONIG_OPTIMIZE_EXACT_BM
: ONIG_OPTIMIZE_EXACT_BM_NOT_REV
);
5368 reg
->optimize
= ONIG_OPTIMIZE_EXACT
;
5372 reg
->optimize
= ONIG_OPTIMIZE_EXACT
;
5376 reg
->dmin
= e
->mmd
.min
;
5377 reg
->dmax
= e
->mmd
.max
;
5379 if (reg
->dmin
!= ONIG_INFINITE_DISTANCE
) {
5380 reg
->threshold_len
= (int )(reg
->dmin
+ (reg
->exact_end
- reg
->exact
));
5387 set_optimize_map_info(regex_t
* reg
, OptMapInfo
* m
)
5391 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++)
5392 reg
->map
[i
] = m
->map
[i
];
5394 reg
->optimize
= ONIG_OPTIMIZE_MAP
;
5395 reg
->dmin
= m
->mmd
.min
;
5396 reg
->dmax
= m
->mmd
.max
;
5398 if (reg
->dmin
!= ONIG_INFINITE_DISTANCE
) {
5399 reg
->threshold_len
= (int )(reg
->dmin
+ 1);
5404 set_sub_anchor(regex_t
* reg
, OptAncInfo
* anc
)
5406 reg
->sub_anchor
|= anc
->left_anchor
& ANCHOR_BEGIN_LINE
;
5407 reg
->sub_anchor
|= anc
->right_anchor
& ANCHOR_END_LINE
;
5410 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5411 static void print_optimize_info(FILE* f
, regex_t
* reg
);
5415 set_optimize_info_from_tree(Node
* node
, regex_t
* reg
, ScanEnv
* scan_env
)
5423 env
.options
= reg
->options
;
5424 env
.case_fold_flag
= reg
->case_fold_flag
;
5425 env
.scan_env
= scan_env
;
5426 clear_mml(&env
.mmd
);
5428 r
= optimize_node_left(node
, &opt
, &env
);
5431 reg
->anchor
= opt
.anc
.left_anchor
& (ANCHOR_BEGIN_BUF
|
5432 ANCHOR_BEGIN_POSITION
| ANCHOR_ANYCHAR_STAR
| ANCHOR_ANYCHAR_STAR_ML
|
5433 ANCHOR_LOOK_BEHIND
);
5435 if ((opt
.anc
.left_anchor
& (ANCHOR_LOOK_BEHIND
| ANCHOR_PREC_READ_NOT
)) != 0)
5436 reg
->anchor
&= ~ANCHOR_ANYCHAR_STAR_ML
;
5438 reg
->anchor
|= opt
.anc
.right_anchor
& (ANCHOR_END_BUF
| ANCHOR_SEMI_END_BUF
|
5439 ANCHOR_PREC_READ_NOT
);
5441 if (reg
->anchor
& (ANCHOR_END_BUF
| ANCHOR_SEMI_END_BUF
)) {
5442 reg
->anchor_dmin
= opt
.len
.min
;
5443 reg
->anchor_dmax
= opt
.len
.max
;
5446 if (opt
.exb
.len
> 0 || opt
.exm
.len
> 0) {
5447 select_opt_exact_info(reg
->enc
, &opt
.exb
, &opt
.exm
);
5448 if (opt
.map
.value
> 0 &&
5449 comp_opt_exact_or_map_info(&opt
.exb
, &opt
.map
) > 0) {
5453 r
= set_optimize_exact_info(reg
, &opt
.exb
);
5454 set_sub_anchor(reg
, &opt
.exb
.anc
);
5457 else if (opt
.map
.value
> 0) {
5459 set_optimize_map_info(reg
, &opt
.map
);
5460 set_sub_anchor(reg
, &opt
.map
.anc
);
5463 reg
->sub_anchor
|= opt
.anc
.left_anchor
& ANCHOR_BEGIN_LINE
;
5464 if (opt
.len
.max
== 0)
5465 reg
->sub_anchor
|= opt
.anc
.right_anchor
& ANCHOR_END_LINE
;
5468 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5469 print_optimize_info(stderr
, reg
);
5475 clear_optimize_info(regex_t
* reg
)
5477 reg
->optimize
= ONIG_OPTIMIZE_NONE
;
5479 reg
->anchor_dmin
= 0;
5480 reg
->anchor_dmax
= 0;
5481 reg
->sub_anchor
= 0;
5482 reg
->exact_end
= (UChar
* )NULL
;
5483 reg
->threshold_len
= 0;
5484 if (IS_NOT_NULL(reg
->exact
)) {
5486 reg
->exact
= (UChar
* )NULL
;
5492 static void print_enc_string(FILE* fp
, OnigEncoding enc
,
5493 const UChar
*s
, const UChar
*end
)
5495 fprintf(fp
, "\nPATTERN: /");
5497 if (ONIGENC_MBC_MINLEN(enc
) > 1) {
5503 code
= ONIGENC_MBC_TO_CODE(enc
, p
, end
);
5505 fprintf(fp
, " 0x%04x ", (int )code
);
5508 fputc((int )code
, fp
);
5511 p
+= enclen(enc
, p
, end
);
5516 fputc((int )*s
, fp
);
5521 fprintf(fp
, "/ (%s)\n", enc
->name
);
5523 #endif /* ONIG_DEBUG */
5525 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5527 print_distance_range(FILE* f
, OnigDistance a
, OnigDistance b
)
5529 if (a
== ONIG_INFINITE_DISTANCE
)
5532 fprintf(f
, "(%"PRIuPTR
")", a
);
5536 if (b
== ONIG_INFINITE_DISTANCE
)
5539 fprintf(f
, "(%"PRIuPTR
")", b
);
5543 print_anchor(FILE* f
, int anchor
)
5549 if (anchor
& ANCHOR_BEGIN_BUF
) {
5550 fprintf(f
, "begin-buf");
5553 if (anchor
& ANCHOR_BEGIN_LINE
) {
5554 if (q
) fprintf(f
, ", ");
5556 fprintf(f
, "begin-line");
5558 if (anchor
& ANCHOR_BEGIN_POSITION
) {
5559 if (q
) fprintf(f
, ", ");
5561 fprintf(f
, "begin-pos");
5563 if (anchor
& ANCHOR_END_BUF
) {
5564 if (q
) fprintf(f
, ", ");
5566 fprintf(f
, "end-buf");
5568 if (anchor
& ANCHOR_SEMI_END_BUF
) {
5569 if (q
) fprintf(f
, ", ");
5571 fprintf(f
, "semi-end-buf");
5573 if (anchor
& ANCHOR_END_LINE
) {
5574 if (q
) fprintf(f
, ", ");
5576 fprintf(f
, "end-line");
5578 if (anchor
& ANCHOR_ANYCHAR_STAR
) {
5579 if (q
) fprintf(f
, ", ");
5581 fprintf(f
, "anychar-star");
5583 if (anchor
& ANCHOR_ANYCHAR_STAR_ML
) {
5584 if (q
) fprintf(f
, ", ");
5585 fprintf(f
, "anychar-star-ml");
5592 print_optimize_info(FILE* f
, regex_t
* reg
)
5594 static const char* on
[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5596 "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
5598 fprintf(f
, "optimize: %s\n", on
[reg
->optimize
]);
5599 fprintf(f
, " anchor: "); print_anchor(f
, reg
->anchor
);
5600 if ((reg
->anchor
& ANCHOR_END_BUF_MASK
) != 0)
5601 print_distance_range(f
, reg
->anchor_dmin
, reg
->anchor_dmax
);
5604 if (reg
->optimize
) {
5605 fprintf(f
, " sub anchor: "); print_anchor(f
, reg
->sub_anchor
);
5612 fprintf(f
, "exact: [");
5613 for (p
= reg
->exact
; p
< reg
->exact_end
; p
++) {
5616 fprintf(f
, "]: length: %"PRIdPTR
"\n", (reg
->exact_end
- reg
->exact
));
5618 else if (reg
->optimize
& ONIG_OPTIMIZE_MAP
) {
5621 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++)
5622 if (reg
->map
[i
]) n
++;
5624 fprintf(f
, "map: n=%d\n", n
);
5628 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) {
5629 if (reg
->map
[i
] != 0) {
5630 if (c
> 0) fputs(", ", f
);
5632 if (ONIGENC_MBC_MAXLEN(reg
->enc
) == 1 &&
5633 ONIGENC_IS_CODE_PRINT(reg
->enc
, (OnigCodePoint
)i
))
5636 fprintf(f
, "%d", i
);
5643 #endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
5647 onig_free_body(regex_t
* reg
)
5649 if (IS_NOT_NULL(reg
)) {
5650 if (IS_NOT_NULL(reg
->p
)) xfree(reg
->p
);
5651 if (IS_NOT_NULL(reg
->exact
)) xfree(reg
->exact
);
5652 if (IS_NOT_NULL(reg
->int_map
)) xfree(reg
->int_map
);
5653 if (IS_NOT_NULL(reg
->int_map_backward
)) xfree(reg
->int_map_backward
);
5654 if (IS_NOT_NULL(reg
->repeat_range
)) xfree(reg
->repeat_range
);
5655 if (IS_NOT_NULL(reg
->chain
)) onig_free(reg
->chain
);
5657 #ifdef USE_NAMED_GROUP
5658 onig_names_free(reg
);
5664 onig_free(regex_t
* reg
)
5666 if (IS_NOT_NULL(reg
)) {
5667 onig_free_body(reg
);
5674 onig_memsize(const regex_t
*reg
)
5676 size_t size
= sizeof(regex_t
);
5677 if (IS_NULL(reg
)) return 0;
5678 if (IS_NOT_NULL(reg
->p
)) size
+= reg
->alloc
;
5679 if (IS_NOT_NULL(reg
->exact
)) size
+= reg
->exact_end
- reg
->exact
;
5680 if (IS_NOT_NULL(reg
->int_map
)) size
+= sizeof(int) * ONIG_CHAR_TABLE_SIZE
;
5681 if (IS_NOT_NULL(reg
->int_map_backward
)) size
+= sizeof(int) * ONIG_CHAR_TABLE_SIZE
;
5682 if (IS_NOT_NULL(reg
->repeat_range
)) size
+= reg
->repeat_range_alloc
* sizeof(OnigRepeatRange
);
5683 if (IS_NOT_NULL(reg
->chain
)) size
+= onig_memsize(reg
->chain
);
5689 onig_region_memsize(const OnigRegion
*regs
)
5691 size_t size
= sizeof(*regs
);
5692 if (IS_NULL(regs
)) return 0;
5693 size
+= regs
->allocated
* (sizeof(*regs
->beg
) + sizeof(*regs
->end
));
5698 #define REGEX_TRANSFER(to,from) do {\
5699 onig_free_body(to);\
5700 xmemcpy(to, from, sizeof(regex_t));\
5706 onig_transfer(regex_t
* to
, regex_t
* from
)
5708 REGEX_TRANSFER(to
, from
);
5712 #ifdef ONIG_DEBUG_COMPILE
5713 static void print_compiled_byte_code_list(FILE* f
, regex_t
* reg
);
5715 #ifdef ONIG_DEBUG_PARSE_TREE
5716 static void print_tree(FILE* f
, Node
* node
);
5721 onig_compile(regex_t
* reg
, const UChar
* pattern
, const UChar
* pattern_end
,
5722 OnigErrorInfo
* einfo
)
5724 return onig_compile_ruby(reg
, pattern
, pattern_end
, einfo
, NULL
, 0);
5730 onig_compile_ruby(regex_t
* reg
, const UChar
* pattern
, const UChar
* pattern_end
,
5731 OnigErrorInfo
* einfo
, const char *sourcefile
, int sourceline
)
5734 onig_compile(regex_t
* reg
, const UChar
* pattern
, const UChar
* pattern_end
,
5735 OnigErrorInfo
* einfo
)
5738 #define COMPILE_INIT_SIZE 20
5741 OnigDistance init_size
;
5743 ScanEnv scan_env
= {0};
5744 #ifdef USE_SUBEXP_CALL
5745 UnsetAddrList uslist
;
5748 if (IS_NOT_NULL(einfo
)) einfo
->par
= (UChar
* )NULL
;
5751 scan_env
.sourcefile
= sourcefile
;
5752 scan_env
.sourceline
= sourceline
;
5756 print_enc_string(stderr
, reg
->enc
, pattern
, pattern_end
);
5759 if (reg
->alloc
== 0) {
5760 init_size
= (pattern_end
- pattern
) * 2;
5761 if (init_size
<= 0) init_size
= COMPILE_INIT_SIZE
;
5762 r
= BBUF_INIT(reg
, init_size
);
5763 if (r
!= 0) goto end
;
5769 reg
->num_repeat
= 0;
5770 reg
->num_null_check
= 0;
5771 reg
->repeat_range_alloc
= 0;
5772 reg
->repeat_range
= (OnigRepeatRange
* )NULL
;
5773 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5774 reg
->num_comb_exp_check
= 0;
5777 r
= onig_parse_make_tree(&root
, pattern
, pattern_end
, reg
, &scan_env
);
5778 if (r
!= 0) goto err
;
5780 #ifdef ONIG_DEBUG_PARSE_TREE
5782 fprintf(stderr
, "ORIGINAL PARSE TREE:\n");
5783 print_tree(stderr
, root
);
5787 #ifdef USE_NAMED_GROUP
5788 /* mixed use named group and no-named group */
5789 if (scan_env
.num_named
> 0 &&
5790 IS_SYNTAX_BV(scan_env
.syntax
, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
) &&
5791 !ONIG_IS_OPTION_ON(reg
->options
, ONIG_OPTION_CAPTURE_GROUP
)) {
5792 if (scan_env
.num_named
!= scan_env
.num_mem
)
5793 r
= disable_noname_group_capture(&root
, reg
, &scan_env
);
5795 r
= numbered_ref_check(root
);
5797 if (r
!= 0) goto err
;
5801 #ifdef USE_SUBEXP_CALL
5802 if (scan_env
.num_call
> 0) {
5803 r
= unset_addr_list_init(&uslist
, scan_env
.num_call
);
5804 if (r
!= 0) goto err
;
5805 scan_env
.unset_addr_list
= &uslist
;
5806 r
= setup_subexp_call(root
, &scan_env
);
5807 if (r
!= 0) goto err_unset
;
5808 r
= subexp_recursive_check_trav(root
, &scan_env
);
5809 if (r
< 0) goto err_unset
;
5810 r
= subexp_inf_recursive_check_trav(root
, &scan_env
);
5811 if (r
!= 0) goto err_unset
;
5813 reg
->num_call
= scan_env
.num_call
;
5819 r
= setup_tree(root
, reg
, 0, &scan_env
);
5820 if (r
!= 0) goto err_unset
;
5822 #ifdef ONIG_DEBUG_PARSE_TREE
5823 print_tree(stderr
, root
);
5826 reg
->capture_history
= scan_env
.capture_history
;
5827 reg
->bt_mem_start
= scan_env
.bt_mem_start
;
5828 reg
->bt_mem_start
|= reg
->capture_history
;
5829 if (IS_FIND_CONDITION(reg
->options
))
5830 BIT_STATUS_ON_ALL(reg
->bt_mem_end
);
5832 reg
->bt_mem_end
= scan_env
.bt_mem_end
;
5833 reg
->bt_mem_end
|= reg
->capture_history
;
5836 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5837 if (scan_env
.backrefed_mem
== 0
5838 # ifdef USE_SUBEXP_CALL
5839 || scan_env
.num_call
== 0
5842 setup_comb_exp_check(root
, 0, &scan_env
);
5843 # ifdef USE_SUBEXP_CALL
5844 if (scan_env
.has_recursion
!= 0) {
5845 scan_env
.num_comb_exp_check
= 0;
5849 if (scan_env
.comb_exp_max_regnum
> 0) {
5851 for (i
= 1; i
<= scan_env
.comb_exp_max_regnum
; i
++) {
5852 if (BIT_STATUS_AT(scan_env
.backrefed_mem
, i
) != 0) {
5853 scan_env
.num_comb_exp_check
= 0;
5860 reg
->num_comb_exp_check
= scan_env
.num_comb_exp_check
;
5863 clear_optimize_info(reg
);
5864 #ifndef ONIG_DONT_OPTIMIZE
5865 r
= set_optimize_info_from_tree(root
, reg
, &scan_env
);
5866 if (r
!= 0) goto err_unset
;
5869 if (IS_NOT_NULL(scan_env
.mem_nodes_dynamic
)) {
5870 xfree(scan_env
.mem_nodes_dynamic
);
5871 scan_env
.mem_nodes_dynamic
= (Node
** )NULL
;
5874 r
= compile_tree(root
, reg
);
5876 r
= add_opcode(reg
, OP_END
);
5877 #ifdef USE_SUBEXP_CALL
5878 if (scan_env
.num_call
> 0) {
5879 r
= unset_addr_list_fix(&uslist
, reg
);
5880 unset_addr_list_end(&uslist
);
5885 if ((reg
->num_repeat
!= 0) || (reg
->bt_mem_end
!= 0))
5886 reg
->stack_pop_level
= STACK_POP_LEVEL_ALL
;
5888 if (reg
->bt_mem_start
!= 0)
5889 reg
->stack_pop_level
= STACK_POP_LEVEL_MEM_START
;
5891 reg
->stack_pop_level
= STACK_POP_LEVEL_FREE
;
5894 #ifdef USE_SUBEXP_CALL
5895 else if (scan_env
.num_call
> 0) {
5896 unset_addr_list_end(&uslist
);
5899 onig_node_free(root
);
5901 #ifdef ONIG_DEBUG_COMPILE
5902 # ifdef USE_NAMED_GROUP
5903 onig_print_names(stderr
, reg
);
5905 print_compiled_byte_code_list(stderr
, reg
);
5909 onig_reg_resize(reg
);
5913 #ifdef USE_SUBEXP_CALL
5914 if (scan_env
.num_call
> 0) {
5915 unset_addr_list_end(&uslist
);
5919 if (IS_NOT_NULL(scan_env
.error
)) {
5920 if (IS_NOT_NULL(einfo
)) {
5921 einfo
->enc
= scan_env
.enc
;
5922 einfo
->par
= scan_env
.error
;
5923 einfo
->par_end
= scan_env
.error_end
;
5927 onig_node_free(root
);
5928 if (IS_NOT_NULL(scan_env
.mem_nodes_dynamic
))
5929 xfree(scan_env
.mem_nodes_dynamic
);
5933 static int onig_inited
= 0;
5936 onig_reg_init(regex_t
* reg
, OnigOptionType option
,
5937 OnigCaseFoldType case_fold_flag
,
5938 OnigEncoding enc
, const OnigSyntaxType
* syntax
)
5944 return ONIGERR_INVALID_ARGUMENT
;
5946 if (ONIGENC_IS_UNDEF(enc
))
5947 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET
;
5949 if ((option
& (ONIG_OPTION_DONT_CAPTURE_GROUP
|ONIG_OPTION_CAPTURE_GROUP
))
5950 == (ONIG_OPTION_DONT_CAPTURE_GROUP
|ONIG_OPTION_CAPTURE_GROUP
)) {
5951 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS
;
5954 if ((option
& ONIG_OPTION_NEGATE_SINGLELINE
) != 0) {
5955 option
|= syntax
->options
;
5956 option
&= ~ONIG_OPTION_SINGLELINE
;
5959 option
|= syntax
->options
;
5962 (reg
)->options
= option
;
5963 (reg
)->syntax
= syntax
;
5964 (reg
)->optimize
= 0;
5965 (reg
)->exact
= (UChar
* )NULL
;
5966 (reg
)->int_map
= (int* )NULL
;
5967 (reg
)->int_map_backward
= (int* )NULL
;
5968 (reg
)->chain
= (regex_t
* )NULL
;
5970 (reg
)->p
= (UChar
* )NULL
;
5973 (reg
)->name_table
= (void* )NULL
;
5975 (reg
)->case_fold_flag
= case_fold_flag
;
5980 onig_new_without_alloc(regex_t
* reg
, const UChar
* pattern
,
5981 const UChar
* pattern_end
, OnigOptionType option
, OnigEncoding enc
,
5982 const OnigSyntaxType
* syntax
, OnigErrorInfo
* einfo
)
5986 r
= onig_reg_init(reg
, option
, ONIGENC_CASE_FOLD_DEFAULT
, enc
, syntax
);
5989 r
= onig_compile(reg
, pattern
, pattern_end
, einfo
);
5994 onig_new(regex_t
** reg
, const UChar
* pattern
, const UChar
* pattern_end
,
5995 OnigOptionType option
, OnigEncoding enc
, const OnigSyntaxType
* syntax
,
5996 OnigErrorInfo
* einfo
)
6000 *reg
= (regex_t
* )xmalloc(sizeof(regex_t
));
6001 if (IS_NULL(*reg
)) return ONIGERR_MEMORY
;
6003 r
= onig_reg_init(*reg
, option
, ONIGENC_CASE_FOLD_DEFAULT
, enc
, syntax
);
6006 r
= onig_compile(*reg
, pattern
, pattern_end
, einfo
);
6016 onig_initialize(OnigEncoding encodings
[] ARG_UNUSED
, int n ARG_UNUSED
)
6024 if (onig_inited
!= 0)
6029 #if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6030 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF
| _CRTDBG_LEAK_CHECK_DF
);
6034 /* onigenc_set_default_caseconv_table((UChar* )0); */
6036 #ifdef ONIG_DEBUG_STATISTICS
6037 onig_statistics_init();
6044 static OnigEndCallListItemType
* EndCallTop
;
6046 extern void onig_add_end_call(void (*func
)(void))
6048 OnigEndCallListItemType
* item
;
6050 item
= (OnigEndCallListItemType
* )xmalloc(sizeof(*item
));
6051 if (item
== 0) return ;
6053 item
->next
= EndCallTop
;
6060 exec_end_call_list(void)
6062 OnigEndCallListItemType
* prev
;
6065 while (EndCallTop
!= 0) {
6066 func
= EndCallTop
->func
;
6070 EndCallTop
= EndCallTop
->next
;
6078 exec_end_call_list();
6080 #ifdef ONIG_DEBUG_STATISTICS
6081 onig_print_statistics(stderr
);
6084 #if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6085 _CrtDumpMemoryLeaks();
6094 onig_is_in_code_range(const UChar
* p
, OnigCodePoint code
)
6096 OnigCodePoint n
, *data
;
6097 OnigCodePoint low
, high
, x
;
6099 GET_CODE_POINT(n
, p
);
6100 data
= (OnigCodePoint
* )p
;
6103 for (low
= 0, high
= n
; low
< high
; ) {
6104 x
= (low
+ high
) >> 1;
6105 if (code
> data
[x
* 2 + 1])
6111 return ((low
< n
&& code
>= data
[low
* 2]) ? 1 : 0);
6115 onig_is_code_in_cc_len(int elen
, OnigCodePoint code
, CClassNode
* cc
)
6119 if (elen
> 1 || (code
>= SINGLE_BYTE_SIZE
)) {
6120 if (IS_NULL(cc
->mbuf
)) {
6124 found
= (onig_is_in_code_range(cc
->mbuf
->p
, code
) != 0 ? 1 : 0);
6128 found
= (BITSET_AT(cc
->bs
, code
) == 0 ? 0 : 1);
6131 if (IS_NCCLASS_NOT(cc
))
6138 onig_is_code_in_cc(OnigEncoding enc
, OnigCodePoint code
, CClassNode
* cc
)
6142 if (ONIGENC_MBC_MINLEN(enc
) > 1) {
6146 len
= ONIGENC_CODE_TO_MBCLEN(enc
, code
);
6148 return onig_is_code_in_cc_len(len
, code
, cc
);
6154 /* arguments type */
6155 # define ARG_SPECIAL -1
6157 # define ARG_RELADDR 1
6158 # define ARG_ABSADDR 2
6159 # define ARG_LENGTH 3
6160 # define ARG_MEMNUM 4
6161 # define ARG_OPTION 5
6162 # define ARG_STATE_CHECK 6
6164 OnigOpInfoType OnigOpInfo
[] = {
6165 { OP_FINISH
, "finish", ARG_NON
},
6166 { OP_END
, "end", ARG_NON
},
6167 { OP_EXACT1
, "exact1", ARG_SPECIAL
},
6168 { OP_EXACT2
, "exact2", ARG_SPECIAL
},
6169 { OP_EXACT3
, "exact3", ARG_SPECIAL
},
6170 { OP_EXACT4
, "exact4", ARG_SPECIAL
},
6171 { OP_EXACT5
, "exact5", ARG_SPECIAL
},
6172 { OP_EXACTN
, "exactn", ARG_SPECIAL
},
6173 { OP_EXACTMB2N1
, "exactmb2-n1", ARG_SPECIAL
},
6174 { OP_EXACTMB2N2
, "exactmb2-n2", ARG_SPECIAL
},
6175 { OP_EXACTMB2N3
, "exactmb2-n3", ARG_SPECIAL
},
6176 { OP_EXACTMB2N
, "exactmb2-n", ARG_SPECIAL
},
6177 { OP_EXACTMB3N
, "exactmb3n" , ARG_SPECIAL
},
6178 { OP_EXACTMBN
, "exactmbn", ARG_SPECIAL
},
6179 { OP_EXACT1_IC
, "exact1-ic", ARG_SPECIAL
},
6180 { OP_EXACTN_IC
, "exactn-ic", ARG_SPECIAL
},
6181 { OP_CCLASS
, "cclass", ARG_SPECIAL
},
6182 { OP_CCLASS_MB
, "cclass-mb", ARG_SPECIAL
},
6183 { OP_CCLASS_MIX
, "cclass-mix", ARG_SPECIAL
},
6184 { OP_CCLASS_NOT
, "cclass-not", ARG_SPECIAL
},
6185 { OP_CCLASS_MB_NOT
, "cclass-mb-not", ARG_SPECIAL
},
6186 { OP_CCLASS_MIX_NOT
, "cclass-mix-not", ARG_SPECIAL
},
6187 { OP_ANYCHAR
, "anychar", ARG_NON
},
6188 { OP_ANYCHAR_ML
, "anychar-ml", ARG_NON
},
6189 { OP_ANYCHAR_STAR
, "anychar*", ARG_NON
},
6190 { OP_ANYCHAR_ML_STAR
, "anychar-ml*", ARG_NON
},
6191 { OP_ANYCHAR_STAR_PEEK_NEXT
, "anychar*-peek-next", ARG_SPECIAL
},
6192 { OP_ANYCHAR_ML_STAR_PEEK_NEXT
, "anychar-ml*-peek-next", ARG_SPECIAL
},
6193 { OP_WORD
, "word", ARG_NON
},
6194 { OP_NOT_WORD
, "not-word", ARG_NON
},
6195 { OP_WORD_BOUND
, "word-bound", ARG_NON
},
6196 { OP_NOT_WORD_BOUND
, "not-word-bound", ARG_NON
},
6197 { OP_WORD_BEGIN
, "word-begin", ARG_NON
},
6198 { OP_WORD_END
, "word-end", ARG_NON
},
6199 { OP_ASCII_WORD
, "ascii-word", ARG_NON
},
6200 { OP_NOT_ASCII_WORD
, "not-ascii-word", ARG_NON
},
6201 { OP_ASCII_WORD_BOUND
, "ascii-word-bound", ARG_NON
},
6202 { OP_NOT_ASCII_WORD_BOUND
,"not-ascii-word-bound", ARG_NON
},
6203 { OP_ASCII_WORD_BEGIN
, "ascii-word-begin", ARG_NON
},
6204 { OP_ASCII_WORD_END
, "ascii-word-end", ARG_NON
},
6205 { OP_BEGIN_BUF
, "begin-buf", ARG_NON
},
6206 { OP_END_BUF
, "end-buf", ARG_NON
},
6207 { OP_BEGIN_LINE
, "begin-line", ARG_NON
},
6208 { OP_END_LINE
, "end-line", ARG_NON
},
6209 { OP_SEMI_END_BUF
, "semi-end-buf", ARG_NON
},
6210 { OP_BEGIN_POSITION
, "begin-position", ARG_NON
},
6211 { OP_BACKREF1
, "backref1", ARG_NON
},
6212 { OP_BACKREF2
, "backref2", ARG_NON
},
6213 { OP_BACKREFN
, "backrefn", ARG_MEMNUM
},
6214 { OP_BACKREFN_IC
, "backrefn-ic", ARG_SPECIAL
},
6215 { OP_BACKREF_MULTI
, "backref_multi", ARG_SPECIAL
},
6216 { OP_BACKREF_MULTI_IC
, "backref_multi-ic", ARG_SPECIAL
},
6217 { OP_BACKREF_WITH_LEVEL
, "backref_at_level", ARG_SPECIAL
},
6218 { OP_MEMORY_START_PUSH
, "mem-start-push", ARG_MEMNUM
},
6219 { OP_MEMORY_START
, "mem-start", ARG_MEMNUM
},
6220 { OP_MEMORY_END_PUSH
, "mem-end-push", ARG_MEMNUM
},
6221 { OP_MEMORY_END_PUSH_REC
, "mem-end-push-rec", ARG_MEMNUM
},
6222 { OP_MEMORY_END
, "mem-end", ARG_MEMNUM
},
6223 { OP_MEMORY_END_REC
, "mem-end-rec", ARG_MEMNUM
},
6224 { OP_SET_OPTION_PUSH
, "set-option-push", ARG_OPTION
},
6225 { OP_SET_OPTION
, "set-option", ARG_OPTION
},
6226 { OP_KEEP
, "keep", ARG_NON
},
6227 { OP_FAIL
, "fail", ARG_NON
},
6228 { OP_JUMP
, "jump", ARG_RELADDR
},
6229 { OP_PUSH
, "push", ARG_RELADDR
},
6230 { OP_POP
, "pop", ARG_NON
},
6231 { OP_PUSH_OR_JUMP_EXACT1
, "push-or-jump-e1", ARG_SPECIAL
},
6232 { OP_PUSH_IF_PEEK_NEXT
, "push-if-peek-next", ARG_SPECIAL
},
6233 { OP_REPEAT
, "repeat", ARG_SPECIAL
},
6234 { OP_REPEAT_NG
, "repeat-ng", ARG_SPECIAL
},
6235 { OP_REPEAT_INC
, "repeat-inc", ARG_MEMNUM
},
6236 { OP_REPEAT_INC_NG
, "repeat-inc-ng", ARG_MEMNUM
},
6237 { OP_REPEAT_INC_SG
, "repeat-inc-sg", ARG_MEMNUM
},
6238 { OP_REPEAT_INC_NG_SG
, "repeat-inc-ng-sg", ARG_MEMNUM
},
6239 { OP_NULL_CHECK_START
, "null-check-start", ARG_MEMNUM
},
6240 { OP_NULL_CHECK_END
, "null-check-end", ARG_MEMNUM
},
6241 { OP_NULL_CHECK_END_MEMST
,"null-check-end-memst", ARG_MEMNUM
},
6242 { OP_NULL_CHECK_END_MEMST_PUSH
,"null-check-end-memst-push", ARG_MEMNUM
},
6243 { OP_PUSH_POS
, "push-pos", ARG_NON
},
6244 { OP_POP_POS
, "pop-pos", ARG_NON
},
6245 { OP_PUSH_POS_NOT
, "push-pos-not", ARG_RELADDR
},
6246 { OP_FAIL_POS
, "fail-pos", ARG_NON
},
6247 { OP_PUSH_STOP_BT
, "push-stop-bt", ARG_NON
},
6248 { OP_POP_STOP_BT
, "pop-stop-bt", ARG_NON
},
6249 { OP_LOOK_BEHIND
, "look-behind", ARG_SPECIAL
},
6250 { OP_PUSH_LOOK_BEHIND_NOT
, "push-look-behind-not", ARG_SPECIAL
},
6251 { OP_FAIL_LOOK_BEHIND_NOT
, "fail-look-behind-not", ARG_NON
},
6252 { OP_PUSH_ABSENT_POS
, "push-absent-pos", ARG_NON
},
6253 { OP_ABSENT
, "absent", ARG_RELADDR
},
6254 { OP_ABSENT_END
, "absent-end", ARG_NON
},
6255 { OP_CALL
, "call", ARG_ABSADDR
},
6256 { OP_RETURN
, "return", ARG_NON
},
6257 { OP_CONDITION
, "condition", ARG_SPECIAL
},
6258 { OP_STATE_CHECK_PUSH
, "state-check-push", ARG_SPECIAL
},
6259 { OP_STATE_CHECK_PUSH_OR_JUMP
, "state-check-push-or-jump", ARG_SPECIAL
},
6260 { OP_STATE_CHECK
, "state-check", ARG_STATE_CHECK
},
6261 { OP_STATE_CHECK_ANYCHAR_STAR
, "state-check-anychar*", ARG_STATE_CHECK
},
6262 { OP_STATE_CHECK_ANYCHAR_ML_STAR
,
6263 "state-check-anychar-ml*", ARG_STATE_CHECK
},
6272 for (i
= 0; OnigOpInfo
[i
].opcode
>= 0; i
++) {
6273 if (opcode
== OnigOpInfo
[i
].opcode
)
6274 return OnigOpInfo
[i
].name
;
6280 op2arg_type(int opcode
)
6284 for (i
= 0; OnigOpInfo
[i
].opcode
>= 0; i
++) {
6285 if (opcode
== OnigOpInfo
[i
].opcode
)
6286 return OnigOpInfo
[i
].arg_type
;
6291 # ifdef ONIG_DEBUG_PARSE_TREE
6293 Indent(FILE* f
, int indent
)
6296 for (i
= 0; i
< indent
; i
++) putc(' ', f
);
6298 # endif /* ONIG_DEBUG_PARSE_TREE */
6301 p_string(FILE* f
, ptrdiff_t len
, UChar
* s
)
6304 while (len
-- > 0) { fputc(*s
++, f
); }
6308 p_len_string(FILE* f
, LengthType len
, int mb_len
, UChar
* s
)
6310 int x
= len
* mb_len
;
6312 fprintf(f
, ":%d:", len
);
6313 while (x
-- > 0) { fputc(*s
++, f
); }
6317 onig_print_compiled_byte_code(FILE* f
, UChar
* bp
, UChar
* bpend
, UChar
** nextp
,
6324 StateCheckNumType scn
;
6328 fprintf(f
, "[%s", op2name(*bp
));
6329 arg_type
= op2arg_type(*bp
);
6330 if (arg_type
!= ARG_SPECIAL
) {
6336 GET_RELADDR_INC(addr
, bp
);
6337 fprintf(f
, ":(%s%d)", (addr
>= 0) ? "+" : "", addr
);
6340 GET_ABSADDR_INC(addr
, bp
);
6341 fprintf(f
, ":(%d)", addr
);
6344 GET_LENGTH_INC(len
, bp
);
6345 fprintf(f
, ":%d", len
);
6348 mem
= *((MemNumType
* )bp
);
6350 fprintf(f
, ":%d", mem
);
6354 OnigOptionType option
= *((OnigOptionType
* )bp
);
6356 fprintf(f
, ":%d", option
);
6360 case ARG_STATE_CHECK
:
6361 scn
= *((StateCheckNumType
* )bp
);
6362 bp
+= SIZE_STATE_CHECK_NUM
;
6363 fprintf(f
, ":%d", scn
);
6370 case OP_ANYCHAR_STAR_PEEK_NEXT
:
6371 case OP_ANYCHAR_ML_STAR_PEEK_NEXT
:
6372 p_string(f
, 1, bp
++); break;
6374 p_string(f
, 2, bp
); bp
+= 2; break;
6376 p_string(f
, 3, bp
); bp
+= 3; break;
6378 p_string(f
, 4, bp
); bp
+= 4; break;
6380 p_string(f
, 5, bp
); bp
+= 5; break;
6382 GET_LENGTH_INC(len
, bp
);
6383 p_len_string(f
, len
, 1, bp
);
6388 p_string(f
, 2, bp
); bp
+= 2; break;
6390 p_string(f
, 4, bp
); bp
+= 4; break;
6392 p_string(f
, 6, bp
); bp
+= 6; break;
6394 GET_LENGTH_INC(len
, bp
);
6395 p_len_string(f
, len
, 2, bp
);
6399 GET_LENGTH_INC(len
, bp
);
6400 p_len_string(f
, len
, 3, bp
);
6407 GET_LENGTH_INC(mb_len
, bp
);
6408 GET_LENGTH_INC(len
, bp
);
6409 fprintf(f
, ":%d:%d:", mb_len
, len
);
6411 while (n
-- > 0) { fputc(*bp
++, f
); }
6416 len
= enclen(enc
, bp
, bpend
);
6417 p_string(f
, len
, bp
);
6421 GET_LENGTH_INC(len
, bp
);
6422 p_len_string(f
, len
, 1, bp
);
6427 n
= bitset_on_num((BitSetRef
)bp
);
6429 fprintf(f
, ":%d", n
);
6433 n
= bitset_on_num((BitSetRef
)bp
);
6435 fprintf(f
, ":%d", n
);
6439 case OP_CCLASS_MB_NOT
:
6440 GET_LENGTH_INC(len
, bp
);
6442 # ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6445 GET_CODE_POINT(code
, q
);
6447 fprintf(f
, ":%d:%d", (int )code
, len
);
6451 case OP_CCLASS_MIX_NOT
:
6452 n
= bitset_on_num((BitSetRef
)bp
);
6454 GET_LENGTH_INC(len
, bp
);
6456 # ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6459 GET_CODE_POINT(code
, q
);
6461 fprintf(f
, ":%d:%d:%d", n
, (int )code
, len
);
6464 case OP_BACKREFN_IC
:
6465 mem
= *((MemNumType
* )bp
);
6467 fprintf(f
, ":%d", mem
);
6470 case OP_BACKREF_MULTI_IC
:
6471 case OP_BACKREF_MULTI
:
6473 GET_LENGTH_INC(len
, bp
);
6474 for (i
= 0; i
< len
; i
++) {
6475 GET_MEMNUM_INC(mem
, bp
);
6476 if (i
> 0) fputs(", ", f
);
6477 fprintf(f
, "%d", mem
);
6481 case OP_BACKREF_WITH_LEVEL
:
6483 OnigOptionType option
;
6486 GET_OPTION_INC(option
, bp
);
6487 fprintf(f
, ":%d", option
);
6488 GET_LENGTH_INC(level
, bp
);
6489 fprintf(f
, ":%d", level
);
6492 GET_LENGTH_INC(len
, bp
);
6493 for (i
= 0; i
< len
; i
++) {
6494 GET_MEMNUM_INC(mem
, bp
);
6495 if (i
> 0) fputs(", ", f
);
6496 fprintf(f
, "%d", mem
);
6504 mem
= *((MemNumType
* )bp
);
6506 addr
= *((RelAddrType
* )bp
);
6508 fprintf(f
, ":%d:%d", mem
, addr
);
6512 case OP_PUSH_OR_JUMP_EXACT1
:
6513 case OP_PUSH_IF_PEEK_NEXT
:
6514 addr
= *((RelAddrType
* )bp
);
6516 fprintf(f
, ":(%s%d)", (addr
>= 0) ? "+" : "", addr
);
6521 case OP_LOOK_BEHIND
:
6522 GET_LENGTH_INC(len
, bp
);
6523 fprintf(f
, ":%d", len
);
6526 case OP_PUSH_LOOK_BEHIND_NOT
:
6527 GET_RELADDR_INC(addr
, bp
);
6528 GET_LENGTH_INC(len
, bp
);
6529 fprintf(f
, ":%d:(%s%d)", len
, (addr
>= 0) ? "+" : "", addr
);
6532 case OP_STATE_CHECK_PUSH
:
6533 case OP_STATE_CHECK_PUSH_OR_JUMP
:
6534 scn
= *((StateCheckNumType
* )bp
);
6535 bp
+= SIZE_STATE_CHECK_NUM
;
6536 addr
= *((RelAddrType
* )bp
);
6538 fprintf(f
, ":%d:(%s%d)", scn
, (addr
>= 0) ? "+" : "", addr
);
6542 GET_MEMNUM_INC(mem
, bp
);
6543 GET_RELADDR_INC(addr
, bp
);
6544 fprintf(f
, ":%d:(%s%d)", mem
, (addr
>= 0) ? "+" : "", addr
);
6548 fprintf(stderr
, "onig_print_compiled_byte_code: undefined code %d\n",
6553 if (nextp
) *nextp
= bp
;
6556 # ifdef ONIG_DEBUG_COMPILE
6558 print_compiled_byte_code_list(FILE* f
, regex_t
* reg
)
6562 UChar
* end
= reg
->p
+ reg
->used
;
6564 fprintf(f
, "code length: %d", reg
->used
);
6570 fprintf(f
, "\n%ld:", bp
- reg
->p
);
6572 fprintf(f
, " %ld:", bp
- reg
->p
);
6573 onig_print_compiled_byte_code(f
, bp
, end
, &bp
, reg
->enc
);
6578 # endif /* ONIG_DEBUG_COMPILE */
6580 # ifdef ONIG_DEBUG_PARSE_TREE
6582 print_indent_tree(FILE* f
, Node
* node
, int indent
)
6584 int i
, type
, container_p
= 0;
6589 if (IS_NULL(node
)) {
6590 fprintf(f
, "ERROR: null node!!!\n");
6598 if (NTYPE(node
) == NT_LIST
)
6599 fprintf(f
, "<list:%"PRIxPTR
">\n", (intptr_t )node
);
6601 fprintf(f
, "<alt:%"PRIxPTR
">\n", (intptr_t )node
);
6603 print_indent_tree(f
, NCAR(node
), indent
+ add
);
6604 while (IS_NOT_NULL(node
= NCDR(node
))) {
6605 if (NTYPE(node
) != type
) {
6606 fprintf(f
, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node
));
6609 print_indent_tree(f
, NCAR(node
), indent
+ add
);
6614 fprintf(f
, "<string%s:%"PRIxPTR
">",
6615 (NSTRING_IS_RAW(node
) ? "-raw" : ""), (intptr_t )node
);
6616 for (p
= NSTR(node
)->s
; p
< NSTR(node
)->end
; p
++) {
6617 if (*p
>= 0x20 && *p
< 0x7f)
6620 fprintf(f
, " 0x%02x", *p
);
6626 fprintf(f
, "<cclass:%"PRIxPTR
">", (intptr_t )node
);
6627 if (IS_NCCLASS_NOT(NCCLASS(node
))) fputs("not ", f
);
6628 if (NCCLASS(node
)->mbuf
) {
6629 BBuf
* bbuf
= NCCLASS(node
)->mbuf
;
6630 OnigCodePoint
* data
= (OnigCodePoint
* )bbuf
->p
;
6631 OnigCodePoint
* end
= (OnigCodePoint
* )(bbuf
->p
+ bbuf
->used
);
6632 fprintf(f
, "%d", *data
++);
6633 for (; data
< end
; data
+=2) {
6635 fprintf(f
, "%04x-%04x", data
[0], data
[1]);
6641 fprintf(f
, "<ctype:%"PRIxPTR
"> ", (intptr_t )node
);
6642 switch (NCTYPE(node
)->ctype
) {
6643 case ONIGENC_CTYPE_WORD
:
6644 if (NCTYPE(node
)->not != 0)
6645 fputs("not word", f
);
6651 fprintf(f
, "ERROR: undefined ctype.\n");
6657 fprintf(f
, "<anychar:%"PRIxPTR
">", (intptr_t )node
);
6661 fprintf(f
, "<anchor:%"PRIxPTR
"> ", (intptr_t )node
);
6662 switch (NANCHOR(node
)->type
) {
6663 case ANCHOR_BEGIN_BUF
: fputs("begin buf", f
); break;
6664 case ANCHOR_END_BUF
: fputs("end buf", f
); break;
6665 case ANCHOR_BEGIN_LINE
: fputs("begin line", f
); break;
6666 case ANCHOR_END_LINE
: fputs("end line", f
); break;
6667 case ANCHOR_SEMI_END_BUF
: fputs("semi end buf", f
); break;
6668 case ANCHOR_BEGIN_POSITION
: fputs("begin position", f
); break;
6670 case ANCHOR_WORD_BOUND
: fputs("word bound", f
); break;
6671 case ANCHOR_NOT_WORD_BOUND
: fputs("not word bound", f
); break;
6672 # ifdef USE_WORD_BEGIN_END
6673 case ANCHOR_WORD_BEGIN
: fputs("word begin", f
); break;
6674 case ANCHOR_WORD_END
: fputs("word end", f
); break;
6676 case ANCHOR_PREC_READ
: fputs("prec read", f
); container_p
= TRUE
; break;
6677 case ANCHOR_PREC_READ_NOT
: fputs("prec read not", f
); container_p
= TRUE
; break;
6678 case ANCHOR_LOOK_BEHIND
: fputs("look_behind", f
); container_p
= TRUE
; break;
6679 case ANCHOR_LOOK_BEHIND_NOT
: fputs("look_behind_not",f
); container_p
= TRUE
; break;
6680 case ANCHOR_KEEP
: fputs("keep",f
); break;
6683 fprintf(f
, "ERROR: undefined anchor type.\n");
6691 BRefNode
* br
= NBREF(node
);
6693 fprintf(f
, "<backref:%"PRIxPTR
">", (intptr_t )node
);
6694 for (i
= 0; i
< br
->back_num
; i
++) {
6695 if (i
> 0) fputs(", ", f
);
6696 fprintf(f
, "%d", p
[i
]);
6701 # ifdef USE_SUBEXP_CALL
6704 CallNode
* cn
= NCALL(node
);
6705 fprintf(f
, "<call:%"PRIxPTR
">", (intptr_t )node
);
6706 p_string(f
, cn
->name_end
- cn
->name
, cn
->name
);
6712 fprintf(f
, "<quantifier:%"PRIxPTR
">{%d,%d}%s\n", (intptr_t )node
,
6713 NQTFR(node
)->lower
, NQTFR(node
)->upper
,
6714 (NQTFR(node
)->greedy
? "" : "?"));
6715 print_indent_tree(f
, NQTFR(node
)->target
, indent
+ add
);
6719 fprintf(f
, "<enclose:%"PRIxPTR
"> ", (intptr_t )node
);
6720 switch (NENCLOSE(node
)->type
) {
6721 case ENCLOSE_OPTION
:
6722 fprintf(f
, "option:%d", NENCLOSE(node
)->option
);
6724 case ENCLOSE_MEMORY
:
6725 fprintf(f
, "memory:%d", NENCLOSE(node
)->regnum
);
6727 case ENCLOSE_STOP_BACKTRACK
:
6728 fprintf(f
, "stop-bt");
6730 case ENCLOSE_CONDITION
:
6731 fprintf(f
, "condition:%d", NENCLOSE(node
)->regnum
);
6733 case ENCLOSE_ABSENT
:
6734 fprintf(f
, "absent");
6741 print_indent_tree(f
, NENCLOSE(node
)->target
, indent
+ add
);
6745 fprintf(f
, "print_indent_tree: undefined node type %d\n", NTYPE(node
));
6749 if (type
!= NT_LIST
&& type
!= NT_ALT
&& type
!= NT_QTFR
&&
6753 if (container_p
) print_indent_tree(f
, NANCHOR(node
)->target
, indent
+ add
);
6759 print_tree(FILE* f
, Node
* node
)
6761 print_indent_tree(f
, node
, 0);
6763 # endif /* ONIG_DEBUG_PARSE_TREE */
6764 #endif /* ONIG_DEBUG */