1 /**********************************************************************
2 regexec.c - Oniguruma (regular expression library)
3 **********************************************************************/
5 * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 /* #define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
34 #ifdef USE_CRNL_AS_LINE_TERMINATOR
35 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
36 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
37 ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
40 #ifdef USE_CAPTURE_HISTORY
41 static void history_tree_free(OnigCaptureTreeNode
* node
);
44 history_tree_clear(OnigCaptureTreeNode
* node
)
48 if (IS_NOT_NULL(node
)) {
49 for (i
= 0; i
< node
->num_childs
; i
++) {
50 if (IS_NOT_NULL(node
->childs
[i
])) {
51 history_tree_free(node
->childs
[i
]);
54 for (i
= 0; i
< node
->allocated
; i
++) {
55 node
->childs
[i
] = (OnigCaptureTreeNode
* )0;
58 node
->beg
= ONIG_REGION_NOTPOS
;
59 node
->end
= ONIG_REGION_NOTPOS
;
65 history_tree_free(OnigCaptureTreeNode
* node
)
67 history_tree_clear(node
);
72 history_root_free(OnigRegion
* r
)
74 if (IS_NOT_NULL(r
->history_root
)) {
75 history_tree_free(r
->history_root
);
76 r
->history_root
= (OnigCaptureTreeNode
* )0;
80 static OnigCaptureTreeNode
*
81 history_node_new(void)
83 OnigCaptureTreeNode
* node
;
85 node
= (OnigCaptureTreeNode
* )xmalloc(sizeof(OnigCaptureTreeNode
));
86 CHECK_NULL_RETURN(node
);
87 node
->childs
= (OnigCaptureTreeNode
** )0;
91 node
->beg
= ONIG_REGION_NOTPOS
;
92 node
->end
= ONIG_REGION_NOTPOS
;
98 history_tree_add_child(OnigCaptureTreeNode
* parent
, OnigCaptureTreeNode
* child
)
100 #define HISTORY_TREE_INIT_ALLOC_SIZE 8
102 if (parent
->num_childs
>= parent
->allocated
) {
105 if (IS_NULL(parent
->childs
)) {
106 n
= HISTORY_TREE_INIT_ALLOC_SIZE
;
108 (OnigCaptureTreeNode
** )xmalloc(sizeof(OnigCaptureTreeNode
*) * n
);
111 n
= parent
->allocated
* 2;
113 (OnigCaptureTreeNode
** )xrealloc(parent
->childs
,
114 sizeof(OnigCaptureTreeNode
*) * n
);
116 CHECK_NULL_RETURN_MEMERR(parent
->childs
);
117 for (i
= parent
->allocated
; i
< n
; i
++) {
118 parent
->childs
[i
] = (OnigCaptureTreeNode
* )0;
120 parent
->allocated
= n
;
123 parent
->childs
[parent
->num_childs
] = child
;
124 parent
->num_childs
++;
128 static OnigCaptureTreeNode
*
129 history_tree_clone(OnigCaptureTreeNode
* node
)
132 OnigCaptureTreeNode
*clone
, *child
;
134 clone
= history_node_new();
135 CHECK_NULL_RETURN(clone
);
137 clone
->beg
= node
->beg
;
138 clone
->end
= node
->end
;
139 for (i
= 0; i
< node
->num_childs
; i
++) {
140 child
= history_tree_clone(node
->childs
[i
]);
141 if (IS_NULL(child
)) {
142 history_tree_free(clone
);
143 return (OnigCaptureTreeNode
* )0;
145 history_tree_add_child(clone
, child
);
151 extern OnigCaptureTreeNode
*
152 onig_get_capture_tree(OnigRegion
* region
)
154 return region
->history_root
;
156 #endif /* USE_CAPTURE_HISTORY */
159 onig_region_clear(OnigRegion
* region
)
163 for (i
= 0; i
< region
->num_regs
; i
++) {
164 region
->beg
[i
] = region
->end
[i
] = ONIG_REGION_NOTPOS
;
166 #ifdef USE_CAPTURE_HISTORY
167 history_root_free(region
);
172 onig_region_resize(OnigRegion
* region
, int n
)
174 region
->num_regs
= n
;
176 if (n
< ONIG_NREGION
)
179 if (region
->allocated
== 0) {
180 region
->beg
= (int* )xmalloc(n
* sizeof(int));
181 if (region
->beg
== 0)
182 return ONIGERR_MEMORY
;
184 region
->end
= (int* )xmalloc(n
* sizeof(int));
185 if (region
->end
== 0) {
187 return ONIGERR_MEMORY
;
190 region
->allocated
= n
;
192 else if (region
->allocated
< n
) {
195 region
->allocated
= 0;
196 tmp
= (int* )xrealloc(region
->beg
, n
* sizeof(int));
200 return ONIGERR_MEMORY
;
203 tmp
= (int* )xrealloc(region
->end
, n
* sizeof(int));
206 return ONIGERR_MEMORY
;
210 if (region
->beg
== 0 || region
->end
== 0)
211 return ONIGERR_MEMORY
;
213 region
->allocated
= n
;
220 onig_region_resize_clear(OnigRegion
* region
, int n
)
224 r
= onig_region_resize(region
, n
);
225 if (r
!= 0) return r
;
226 onig_region_clear(region
);
231 onig_region_set(OnigRegion
* region
, int at
, int beg
, int end
)
233 if (at
< 0) return ONIGERR_INVALID_ARGUMENT
;
235 if (at
>= region
->allocated
) {
236 int r
= onig_region_resize(region
, at
+ 1);
240 region
->beg
[at
] = beg
;
241 region
->end
[at
] = end
;
246 onig_region_init(OnigRegion
* region
)
248 region
->num_regs
= 0;
249 region
->allocated
= 0;
250 region
->beg
= (int* )0;
251 region
->end
= (int* )0;
252 region
->history_root
= (OnigCaptureTreeNode
* )0;
256 onig_region_new(void)
260 r
= (OnigRegion
* )xmalloc(sizeof(OnigRegion
));
267 onig_region_free(OnigRegion
* r
, int free_self
)
270 if (r
->allocated
> 0) {
271 if (r
->beg
) xfree(r
->beg
);
272 if (r
->end
) xfree(r
->end
);
275 #ifdef USE_CAPTURE_HISTORY
276 history_root_free(r
);
278 if (free_self
) xfree(r
);
283 onig_region_copy(OnigRegion
* to
, OnigRegion
* from
)
285 #define RREGC_SIZE (sizeof(int) * from->num_regs)
288 if (to
== from
) return;
290 onig_region_resize(to
, from
->num_regs
);
291 for (i
= 0; i
< from
->num_regs
; i
++) {
292 to
->beg
[i
] = from
->beg
[i
];
293 to
->end
[i
] = from
->end
[i
];
295 to
->num_regs
= from
->num_regs
;
297 #ifdef USE_CAPTURE_HISTORY
298 history_root_free(to
);
300 if (IS_NOT_NULL(from
->history_root
)) {
301 to
->history_root
= history_tree_clone(from
->history_root
);
308 #define INVALID_STACK_INDEX -1
311 /* used by normal-POP */
312 #define STK_ALT 0x0001
313 #define STK_LOOK_BEHIND_NOT 0x0002
314 #define STK_POS_NOT 0x0003
315 /* handled by normal-POP */
316 #define STK_MEM_START 0x0100
317 #define STK_MEM_END 0x8200
318 #define STK_REPEAT_INC 0x0300
319 #define STK_STATE_CHECK_MARK 0x1000
320 /* avoided by normal-POP */
321 #define STK_NULL_CHECK_START 0x3000
322 #define STK_NULL_CHECK_END 0x5000 /* for recursive call */
323 #define STK_MEM_END_MARK 0x8400
324 #define STK_POS 0x0500 /* used when POP-POS */
325 #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
326 #define STK_REPEAT 0x0700
327 #define STK_CALL_FRAME 0x0800
328 #define STK_RETURN 0x0900
329 #define STK_VOID 0x0a00 /* for fill a blank */
331 /* stack type check mask */
332 #define STK_MASK_POP_USED 0x00ff
333 #define STK_MASK_TO_VOID_TARGET 0x10ff
334 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
336 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
337 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
338 (msa).stack_p = (void* )0;\
339 (msa).options = (arg_option);\
340 (msa).region = (arg_region);\
341 (msa).start = (arg_start);\
342 (msa).best_len = ONIG_MISMATCH;\
345 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
346 (msa).stack_p = (void* )0;\
347 (msa).options = (arg_option);\
348 (msa).region = (arg_region);\
349 (msa).start = (arg_start);\
353 #ifdef USE_COMBINATION_EXPLOSION_CHECK
355 #define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
357 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
358 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
359 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
360 offset = ((offset) * (state_num)) >> 3;\
361 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
362 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\
363 (msa).state_check_buff = (void* )xmalloc(size);\
364 CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\
367 (msa).state_check_buff = (void* )xalloca(size);\
368 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
369 (size_t )(size - (offset))); \
370 (msa).state_check_buff_size = size;\
373 (msa).state_check_buff = (void* )0;\
374 (msa).state_check_buff_size = 0;\
378 (msa).state_check_buff = (void* )0;\
379 (msa).state_check_buff_size = 0;\
383 #define MATCH_ARG_FREE(msa) do {\
384 if ((msa).stack_p) xfree((msa).stack_p);\
385 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
386 if ((msa).state_check_buff) xfree((msa).state_check_buff);\
390 #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
395 #define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
397 alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\
398 stk_alloc = (OnigStackType* )(msa->stack_p);\
399 stk_base = stk_alloc;\
401 stk_end = stk_base + msa->stack_n;\
404 alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\
405 + sizeof(OnigStackType) * (stack_num));\
406 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\
407 stk_base = stk_alloc;\
409 stk_end = stk_base + (stack_num);\
413 #define STACK_SAVE do{\
414 if (stk_base != stk_alloc) {\
415 msa->stack_p = stk_base;\
416 msa->stack_n = stk_end - stk_base;\
420 static unsigned int MatchStackLimitSize
= DEFAULT_MATCH_STACK_LIMIT_SIZE
;
423 onig_get_match_stack_limit_size(void)
425 return MatchStackLimitSize
;
429 onig_set_match_stack_limit_size(unsigned int size
)
431 MatchStackLimitSize
= size
;
436 stack_double(OnigStackType
** arg_stk_base
, OnigStackType
** arg_stk_end
,
437 OnigStackType
** arg_stk
, OnigStackType
* stk_alloc
, OnigMatchArg
* msa
)
440 OnigStackType
*x
, *stk_base
, *stk_end
, *stk
;
442 stk_base
= *arg_stk_base
;
443 stk_end
= *arg_stk_end
;
446 n
= stk_end
- stk_base
;
447 if (stk_base
== stk_alloc
&& IS_NULL(msa
->stack_p
)) {
448 x
= (OnigStackType
* )xmalloc(sizeof(OnigStackType
) * n
* 2);
451 return ONIGERR_MEMORY
;
453 xmemcpy(x
, stk_base
, n
* sizeof(OnigStackType
));
458 if (MatchStackLimitSize
!= 0 && n
> MatchStackLimitSize
) {
459 if ((unsigned int )(stk_end
- stk_base
) == MatchStackLimitSize
)
460 return ONIGERR_MATCH_STACK_LIMIT_OVER
;
462 n
= MatchStackLimitSize
;
464 x
= (OnigStackType
* )xrealloc(stk_base
, sizeof(OnigStackType
) * n
);
467 return ONIGERR_MEMORY
;
470 *arg_stk
= x
+ (stk
- stk_base
);
472 *arg_stk_end
= x
+ n
;
476 #define STACK_ENSURE(n) do {\
477 if (stk_end - stk < (n)) {\
478 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
479 if (r != 0) { STACK_SAVE; return r; } \
483 #define STACK_AT(index) (stk_base + (index))
484 #define GET_STACK_INDEX(stk) ((stk) - stk_base)
486 #define STACK_PUSH_TYPE(stack_type) do {\
488 stk->type = (stack_type);\
492 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
494 #ifdef USE_COMBINATION_EXPLOSION_CHECK
495 #define STATE_CHECK_POS(s,snum) \
496 (((s) - str) * num_comb_exp_check + ((snum) - 1))
497 #define STATE_CHECK_VAL(v,snum) do {\
498 if (state_check_buff != NULL) {\
499 int x = STATE_CHECK_POS(s,snum);\
500 (v) = state_check_buff[x/8] & (1<<(x%8));\
506 #define ELSE_IF_STATE_CHECK_MARK(stk) \
507 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
508 int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
509 state_check_buff[x/8] |= (1<<(x%8)); \
512 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
514 stk->type = (stack_type);\
515 stk->u.state.pcode = (pat);\
516 stk->u.state.pstr = (s);\
517 stk->u.state.pstr_prev = (sprev);\
518 stk->u.state.state_check = 0;\
522 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
523 stk->type = (stack_type);\
524 stk->u.state.pcode = (pat);\
525 stk->u.state.state_check = 0;\
529 #define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\
531 stk->type = STK_ALT;\
532 stk->u.state.pcode = (pat);\
533 stk->u.state.pstr = (s);\
534 stk->u.state.pstr_prev = (sprev);\
535 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
539 #define STACK_PUSH_STATE_CHECK(s,snum) do {\
540 if (state_check_buff != NULL) {\
542 stk->type = STK_STATE_CHECK_MARK;\
543 stk->u.state.pstr = (s);\
544 stk->u.state.state_check = (snum);\
549 #else /* USE_COMBINATION_EXPLOSION_CHECK */
551 #define ELSE_IF_STATE_CHECK_MARK(stk)
553 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
555 stk->type = (stack_type);\
556 stk->u.state.pcode = (pat);\
557 stk->u.state.pstr = (s);\
558 stk->u.state.pstr_prev = (sprev);\
562 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
563 stk->type = (stack_type);\
564 stk->u.state.pcode = (pat);\
567 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
569 #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
570 #define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)
571 #define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev)
572 #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
573 #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \
574 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev)
576 #define STACK_PUSH_REPEAT(id, pat) do {\
578 stk->type = STK_REPEAT;\
579 stk->u.repeat.num = (id);\
580 stk->u.repeat.pcode = (pat);\
581 stk->u.repeat.count = 0;\
585 #define STACK_PUSH_REPEAT_INC(sindex) do {\
587 stk->type = STK_REPEAT_INC;\
588 stk->u.repeat_inc.si = (sindex);\
592 #define STACK_PUSH_MEM_START(mnum, s) do {\
594 stk->type = STK_MEM_START;\
595 stk->u.mem.num = (mnum);\
596 stk->u.mem.pstr = (s);\
597 stk->u.mem.start = mem_start_stk[mnum];\
598 stk->u.mem.end = mem_end_stk[mnum];\
599 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
600 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
604 #define STACK_PUSH_MEM_END(mnum, s) do {\
606 stk->type = STK_MEM_END;\
607 stk->u.mem.num = (mnum);\
608 stk->u.mem.pstr = (s);\
609 stk->u.mem.start = mem_start_stk[mnum];\
610 stk->u.mem.end = mem_end_stk[mnum];\
611 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
615 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
617 stk->type = STK_MEM_END_MARK;\
618 stk->u.mem.num = (mnum);\
622 #define STACK_GET_MEM_START(mnum, k) do {\
625 while (k > stk_base) {\
627 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
628 && k->u.mem.num == (mnum)) {\
631 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
632 if (level == 0) break;\
638 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
641 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
642 if (level == 0) (start) = k->u.mem.pstr;\
645 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
648 (end) = k->u.mem.pstr;\
656 #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
658 stk->type = STK_NULL_CHECK_START;\
659 stk->u.null_check.num = (cnum);\
660 stk->u.null_check.pstr = (s);\
664 #define STACK_PUSH_NULL_CHECK_END(cnum) do {\
666 stk->type = STK_NULL_CHECK_END;\
667 stk->u.null_check.num = (cnum);\
671 #define STACK_PUSH_CALL_FRAME(pat) do {\
673 stk->type = STK_CALL_FRAME;\
674 stk->u.call_frame.ret_addr = (pat);\
678 #define STACK_PUSH_RETURN do {\
680 stk->type = STK_RETURN;\
686 #define STACK_BASE_CHECK(p, at) \
687 if ((p) < stk_base) {\
688 fprintf(stderr, "at %s\n", at);\
692 #define STACK_BASE_CHECK(p, at)
695 #define STACK_POP_ONE do {\
697 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
700 #define STACK_POP do {\
701 switch (pop_level) {\
702 case STACK_POP_LEVEL_FREE:\
705 STACK_BASE_CHECK(stk, "STACK_POP"); \
706 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
707 ELSE_IF_STATE_CHECK_MARK(stk);\
710 case STACK_POP_LEVEL_MEM_START:\
713 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
714 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
715 else if (stk->type == STK_MEM_START) {\
716 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
717 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
719 ELSE_IF_STATE_CHECK_MARK(stk);\
725 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
726 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
727 else if (stk->type == STK_MEM_START) {\
728 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
729 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
731 else if (stk->type == STK_REPEAT_INC) {\
732 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
734 else if (stk->type == STK_MEM_END) {\
735 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
736 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
738 ELSE_IF_STATE_CHECK_MARK(stk);\
744 #define STACK_POP_TIL_POS_NOT do {\
747 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
748 if (stk->type == STK_POS_NOT) break;\
749 else if (stk->type == STK_MEM_START) {\
750 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
751 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
753 else if (stk->type == STK_REPEAT_INC) {\
754 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
756 else if (stk->type == STK_MEM_END) {\
757 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
758 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
760 ELSE_IF_STATE_CHECK_MARK(stk);\
764 #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
767 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
768 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
769 else if (stk->type == STK_MEM_START) {\
770 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
771 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
773 else if (stk->type == STK_REPEAT_INC) {\
774 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
776 else if (stk->type == STK_MEM_END) {\
777 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
778 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
780 ELSE_IF_STATE_CHECK_MARK(stk);\
784 #define STACK_POS_END(k) do {\
788 STACK_BASE_CHECK(k, "STACK_POS_END"); \
789 if (IS_TO_VOID_TARGET(k)) {\
792 else if (k->type == STK_POS) {\
799 #define STACK_STOP_BT_END do {\
800 OnigStackType *k = stk;\
803 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
804 if (IS_TO_VOID_TARGET(k)) {\
807 else if (k->type == STK_STOP_BT) {\
814 #define STACK_NULL_CHECK(isnull,id,s) do {\
815 OnigStackType* k = stk;\
818 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
819 if (k->type == STK_NULL_CHECK_START) {\
820 if (k->u.null_check.num == (id)) {\
821 (isnull) = (k->u.null_check.pstr == (s));\
828 #define STACK_NULL_CHECK_REC(isnull,id,s) do {\
830 OnigStackType* k = stk;\
833 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
834 if (k->type == STK_NULL_CHECK_START) {\
835 if (k->u.null_check.num == (id)) {\
837 (isnull) = (k->u.null_check.pstr == (s));\
843 else if (k->type == STK_NULL_CHECK_END) {\
849 #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
850 OnigStackType* k = stk;\
853 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
854 if (k->type == STK_NULL_CHECK_START) {\
855 if (k->u.null_check.num == (id)) {\
856 if (k->u.null_check.pstr != (s)) {\
864 if (k->type == STK_MEM_START) {\
865 if (k->u.mem.end == INVALID_STACK_INDEX) {\
866 (isnull) = 0; break;\
868 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
869 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
871 endp = (UChar* )k->u.mem.end;\
872 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
873 (isnull) = 0; break;\
875 else if (endp != s) {\
876 (isnull) = -1; /* empty, but position changed */ \
888 #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
890 OnigStackType* k = stk;\
893 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
894 if (k->type == STK_NULL_CHECK_START) {\
895 if (k->u.null_check.num == (id)) {\
897 if (k->u.null_check.pstr != (s)) {\
905 if (k->type == STK_MEM_START) {\
906 if (k->u.mem.end == INVALID_STACK_INDEX) {\
907 (isnull) = 0; break;\
909 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
910 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
912 endp = (UChar* )k->u.mem.end;\
913 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
914 (isnull) = 0; break;\
916 else if (endp != s) {\
917 (isnull) = -1; /* empty, but position changed */ \
930 else if (k->type == STK_NULL_CHECK_END) {\
931 if (k->u.null_check.num == (id)) level++;\
936 #define STACK_GET_REPEAT(id, k) do {\
941 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
942 if (k->type == STK_REPEAT) {\
944 if (k->u.repeat.num == (id)) {\
949 else if (k->type == STK_CALL_FRAME) level--;\
950 else if (k->type == STK_RETURN) level++;\
954 #define STACK_RETURN(addr) do {\
956 OnigStackType* k = stk;\
959 STACK_BASE_CHECK(k, "STACK_RETURN"); \
960 if (k->type == STK_CALL_FRAME) {\
962 (addr) = k->u.call_frame.ret_addr;\
967 else if (k->type == STK_RETURN)\
973 #define STRING_CMP(s1,s2,len) do {\
975 if (*s1++ != *s2++) goto fail;\
979 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\
980 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
984 static int string_cmp_ic(OnigEncoding enc
, int case_fold_flag
,
985 UChar
* s1
, UChar
** ps2
, int mblen
)
987 UChar buf1
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
988 UChar buf2
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
989 UChar
*p1
, *p2
, *end1
, *s2
, *end2
;
996 len1
= ONIGENC_MBC_CASE_FOLD(enc
, case_fold_flag
, &s1
, end1
, buf1
);
997 len2
= ONIGENC_MBC_CASE_FOLD(enc
, case_fold_flag
, &s2
, end2
, buf2
);
998 if (len1
!= len2
) return 0;
1001 while (len1
-- > 0) {
1002 if (*p1
!= *p2
) return 0;
1012 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1014 while (len-- > 0) {\
1015 if (*s1++ != *s2++) {\
1016 is_fail = 1; break;\
1021 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\
1022 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
1029 #define IS_EMPTY_STR (str == end)
1030 #define ON_STR_BEGIN(s) ((s) == str)
1031 #define ON_STR_END(s) ((s) == end)
1032 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1033 #define DATA_ENSURE_CHECK1 (s < right_range)
1034 #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1035 #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1037 #define DATA_ENSURE_CHECK1 (s < end)
1038 #define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1039 #define DATA_ENSURE(n) if (s + (n) > end) goto fail
1040 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1043 #ifdef USE_CAPTURE_HISTORY
1045 make_capture_history_tree(OnigCaptureTreeNode
* node
, OnigStackType
** kp
,
1046 OnigStackType
* stk_top
, UChar
* str
, regex_t
* reg
)
1049 OnigCaptureTreeNode
* child
;
1050 OnigStackType
* k
= *kp
;
1052 while (k
< stk_top
) {
1053 if (k
->type
== STK_MEM_START
) {
1055 if (n
<= ONIG_MAX_CAPTURE_HISTORY_GROUP
&&
1056 BIT_STATUS_AT(reg
->capture_history
, n
) != 0) {
1057 child
= history_node_new();
1058 CHECK_NULL_RETURN_MEMERR(child
);
1060 child
->beg
= (int )(k
->u
.mem
.pstr
- str
);
1061 r
= history_tree_add_child(node
, child
);
1062 if (r
!= 0) return r
;
1064 r
= make_capture_history_tree(child
, kp
, stk_top
, str
, reg
);
1065 if (r
!= 0) return r
;
1068 child
->end
= (int )(k
->u
.mem
.pstr
- str
);
1071 else if (k
->type
== STK_MEM_END
) {
1072 if (k
->u
.mem
.num
== node
->group
) {
1073 node
->end
= (int )(k
->u
.mem
.pstr
- str
);
1081 return 1; /* 1: root node ending. */
1085 #ifdef USE_BACKREF_WITH_LEVEL
1086 static int mem_is_in_memp(int mem
, int num
, UChar
* memp
)
1091 for (i
= 0; i
< num
; i
++) {
1092 GET_MEMNUM_INC(m
, memp
);
1093 if (mem
== (int )m
) return 1;
1098 static int backref_match_at_nested_level(regex_t
* reg
1099 , OnigStackType
* top
, OnigStackType
* stk_base
1100 , int ignore_case
, int case_fold_flag
1101 , int nest
, int mem_num
, UChar
* memp
, UChar
** s
, const UChar
* send
)
1103 UChar
*ss
, *p
, *pstart
, *pend
= NULL_UCHARP
;
1110 while (k
>= stk_base
) {
1111 if (k
->type
== STK_CALL_FRAME
) {
1114 else if (k
->type
== STK_RETURN
) {
1117 else if (level
== nest
) {
1118 if (k
->type
== STK_MEM_START
) {
1119 if (mem_is_in_memp(k
->u
.mem
.num
, mem_num
, memp
)) {
1120 pstart
= k
->u
.mem
.pstr
;
1121 if (pend
!= NULL_UCHARP
) {
1122 if (pend
- pstart
> send
- *s
) return 0; /* or goto next_mem; */
1126 if (ignore_case
!= 0) {
1127 if (string_cmp_ic(reg
->enc
, case_fold_flag
,
1128 pstart
, &ss
, (int )(pend
- pstart
)) == 0)
1129 return 0; /* or goto next_mem; */
1133 if (*p
++ != *ss
++) return 0; /* or goto next_mem; */
1142 else if (k
->type
== STK_MEM_END
) {
1143 if (mem_is_in_memp(k
->u
.mem
.num
, mem_num
, memp
)) {
1144 pend
= k
->u
.mem
.pstr
;
1153 #endif /* USE_BACKREF_WITH_LEVEL */
1156 #ifdef ONIG_DEBUG_STATISTICS
1158 #define USE_TIMEOFDAY
1160 #ifdef USE_TIMEOFDAY
1161 #ifdef HAVE_SYS_TIME_H
1162 #include <sys/time.h>
1164 #ifdef HAVE_UNISTD_H
1167 static struct timeval ts
, te
;
1168 #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
1169 #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
1170 (((te).tv_sec - (ts).tv_sec)*1000000))
1172 #ifdef HAVE_SYS_TIMES_H
1173 #include <sys/times.h>
1175 static struct tms ts
, te
;
1176 #define GETTIME(t) times(&(t))
1177 #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
1180 static int OpCounter
[256];
1181 static int OpPrevCounter
[256];
1182 static unsigned long OpTime
[256];
1183 static int OpCurr
= OP_FINISH
;
1184 static int OpPrevTarget
= OP_FAIL
;
1185 static int MaxStackDepth
= 0;
1187 #define MOP_IN(opcode) do {\
1188 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
1190 OpCounter[opcode]++;\
1194 #define MOP_OUT do {\
1196 OpTime[OpCurr] += TIMEDIFF(te, ts);\
1200 onig_statistics_init(void)
1203 for (i
= 0; i
< 256; i
++) {
1204 OpCounter
[i
] = OpPrevCounter
[i
] = 0; OpTime
[i
] = 0;
1210 onig_print_statistics(FILE* f
)
1213 fprintf(f
, " count prev time\n");
1214 for (i
= 0; OnigOpInfo
[i
].opcode
>= 0; i
++) {
1215 fprintf(f
, "%8d: %8d: %10ld: %s\n",
1216 OpCounter
[i
], OpPrevCounter
[i
], OpTime
[i
], OnigOpInfo
[i
].name
);
1218 fprintf(f
, "\nmax stack depth: %d\n", MaxStackDepth
);
1221 #define STACK_INC do {\
1223 if (stk - stk_base > MaxStackDepth) \
1224 MaxStackDepth = stk - stk_base;\
1228 #define STACK_INC stk++
1230 #define MOP_IN(opcode)
1235 /* matching region of POSIX API */
1236 typedef int regoff_t
;
1243 /* match data(str - end) from position (sstart). */
1244 /* if sstart == str then set sprev to NULL. */
1246 match_at(regex_t
* reg
, const UChar
* str
, const UChar
* end
,
1247 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1248 const UChar
* right_range
,
1250 const UChar
* sstart
, UChar
* sprev
, OnigMatchArg
* msa
)
1252 static UChar FinishCode
[] = { OP_FINISH
};
1254 int i
, n
, num_mem
, best_len
, pop_level
;
1255 LengthType tlen
, tlen2
;
1258 OnigOptionType option
= reg
->options
;
1259 OnigEncoding encode
= reg
->enc
;
1260 OnigCaseFoldType case_fold_flag
= reg
->case_fold_flag
;
1261 UChar
*s
, *q
, *sbegin
;
1264 OnigStackType
*stk_alloc
, *stk_base
, *stk
, *stk_end
;
1265 OnigStackType
*stkp
; /* used as any purpose. */
1267 OnigStackIndex
*repeat_stk
;
1268 OnigStackIndex
*mem_start_stk
, *mem_end_stk
;
1269 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1271 unsigned char* state_check_buff
= msa
->state_check_buff
;
1272 int num_comb_exp_check
= reg
->num_comb_exp_check
;
1274 n
= reg
->num_repeat
+ reg
->num_mem
* 2;
1276 STACK_INIT(alloca_base
, n
, INIT_MATCH_STACK_SIZE
);
1277 pop_level
= reg
->stack_pop_level
;
1278 num_mem
= reg
->num_mem
;
1279 repeat_stk
= (OnigStackIndex
* )alloca_base
;
1281 mem_start_stk
= (OnigStackIndex
* )(repeat_stk
+ reg
->num_repeat
);
1282 mem_end_stk
= mem_start_stk
+ num_mem
;
1283 mem_start_stk
--; /* for index start from 1,
1284 mem_start_stk[1]..mem_start_stk[num_mem] */
1285 mem_end_stk
--; /* for index start from 1,
1286 mem_end_stk[1]..mem_end_stk[num_mem] */
1287 for (i
= 1; i
<= num_mem
; i
++) {
1288 mem_start_stk
[i
] = mem_end_stk
[i
] = INVALID_STACK_INDEX
;
1291 #ifdef ONIG_DEBUG_MATCH
1292 fprintf(stderr
, "match_at: str: %d, end: %d, start: %d, sprev: %d\n",
1293 (int )str
, (int )end
, (int )sstart
, (int )sprev
);
1294 fprintf(stderr
, "size: %d, start offset: %d\n",
1295 (int )(end
- str
), (int )(sstart
- str
));
1298 STACK_PUSH_ENSURED(STK_ALT
, FinishCode
); /* bottom stack */
1299 best_len
= ONIG_MISMATCH
;
1300 s
= (UChar
* )sstart
;
1302 #ifdef ONIG_DEBUG_MATCH
1304 UChar
*q
, *bp
, buf
[50];
1306 fprintf(stderr
, "%4d> \"", (int )(s
- str
));
1308 for (i
= 0, q
= s
; i
< 7 && q
< end
; i
++) {
1309 len
= enclen(encode
, q
);
1310 while (len
-- > 0) *bp
++ = *q
++;
1312 if (q
< end
) { xmemcpy(bp
, "...\"", 4); bp
+= 4; }
1313 else { xmemcpy(bp
, "\"", 1); bp
+= 1; }
1315 fputs((char* )buf
, stderr
);
1316 for (i
= 0; i
< 20 - (bp
- buf
); i
++) fputc(' ', stderr
);
1317 onig_print_compiled_byte_code(stderr
, p
, NULL
, encode
);
1318 fprintf(stderr
, "\n");
1324 case OP_END
: MOP_IN(OP_END
);
1328 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1329 if (IS_FIND_LONGEST(option
)) {
1330 if (n
> msa
->best_len
) {
1332 msa
->best_s
= (UChar
* )sstart
;
1339 region
= msa
->region
;
1341 #ifdef USE_POSIX_API_REGION_OPTION
1342 if (IS_POSIX_REGION(msa
->options
)) {
1343 posix_regmatch_t
* rmt
= (posix_regmatch_t
* )region
;
1345 rmt
[0].rm_so
= sstart
- str
;
1346 rmt
[0].rm_eo
= s
- str
;
1347 for (i
= 1; i
<= num_mem
; i
++) {
1348 if (mem_end_stk
[i
] != INVALID_STACK_INDEX
) {
1349 if (BIT_STATUS_AT(reg
->bt_mem_start
, i
))
1350 rmt
[i
].rm_so
= STACK_AT(mem_start_stk
[i
])->u
.mem
.pstr
- str
;
1352 rmt
[i
].rm_so
= (UChar
* )((void* )(mem_start_stk
[i
])) - str
;
1354 rmt
[i
].rm_eo
= (BIT_STATUS_AT(reg
->bt_mem_end
, i
)
1355 ? STACK_AT(mem_end_stk
[i
])->u
.mem
.pstr
1356 : (UChar
* )((void* )mem_end_stk
[i
])) - str
;
1359 rmt
[i
].rm_so
= rmt
[i
].rm_eo
= ONIG_REGION_NOTPOS
;
1364 #endif /* USE_POSIX_API_REGION_OPTION */
1365 region
->beg
[0] = sstart
- str
;
1366 region
->end
[0] = s
- str
;
1367 for (i
= 1; i
<= num_mem
; i
++) {
1368 if (mem_end_stk
[i
] != INVALID_STACK_INDEX
) {
1369 if (BIT_STATUS_AT(reg
->bt_mem_start
, i
))
1370 region
->beg
[i
] = STACK_AT(mem_start_stk
[i
])->u
.mem
.pstr
- str
;
1372 region
->beg
[i
] = (UChar
* )((void* )mem_start_stk
[i
]) - str
;
1374 region
->end
[i
] = (BIT_STATUS_AT(reg
->bt_mem_end
, i
)
1375 ? STACK_AT(mem_end_stk
[i
])->u
.mem
.pstr
1376 : (UChar
* )((void* )mem_end_stk
[i
])) - str
;
1379 region
->beg
[i
] = region
->end
[i
] = ONIG_REGION_NOTPOS
;
1383 #ifdef USE_CAPTURE_HISTORY
1384 if (reg
->capture_history
!= 0) {
1386 OnigCaptureTreeNode
* node
;
1388 if (IS_NULL(region
->history_root
)) {
1389 region
->history_root
= node
= history_node_new();
1390 CHECK_NULL_RETURN_MEMERR(node
);
1393 node
= region
->history_root
;
1394 history_tree_clear(node
);
1398 node
->beg
= sstart
- str
;
1399 node
->end
= s
- str
;
1402 r
= make_capture_history_tree(region
->history_root
, &stkp
,
1403 stk
, (UChar
* )str
, reg
);
1405 best_len
= r
; /* error code */
1409 #endif /* USE_CAPTURE_HISTORY */
1410 #ifdef USE_POSIX_API_REGION_OPTION
1411 } /* else IS_POSIX_REGION() */
1414 } /* n > best_len */
1416 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1421 if (IS_FIND_CONDITION(option
)) {
1422 if (IS_FIND_NOT_EMPTY(option
) && s
== sstart
) {
1423 best_len
= ONIG_MISMATCH
;
1424 goto fail
; /* for retry */
1426 if (IS_FIND_LONGEST(option
) && DATA_ENSURE_CHECK1
) {
1427 goto fail
; /* for retry */
1431 /* default behavior: return first-matching result. */
1435 case OP_EXACT1
: MOP_IN(OP_EXACT1
);
1438 if (*p
!= *s
) goto fail
;
1441 if (*p
!= *s
++) goto fail
;
1447 case OP_EXACT1_IC
: MOP_IN(OP_EXACT1_IC
);
1450 UChar
*q
, lowbuf
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
1453 len
= ONIGENC_MBC_CASE_FOLD(encode
,
1454 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1469 case OP_EXACT2
: MOP_IN(OP_EXACT2
);
1471 if (*p
!= *s
) goto fail
;
1473 if (*p
!= *s
) goto fail
;
1480 case OP_EXACT3
: MOP_IN(OP_EXACT3
);
1482 if (*p
!= *s
) goto fail
;
1484 if (*p
!= *s
) goto fail
;
1486 if (*p
!= *s
) goto fail
;
1493 case OP_EXACT4
: MOP_IN(OP_EXACT4
);
1495 if (*p
!= *s
) goto fail
;
1497 if (*p
!= *s
) goto fail
;
1499 if (*p
!= *s
) goto fail
;
1501 if (*p
!= *s
) goto fail
;
1508 case OP_EXACT5
: MOP_IN(OP_EXACT5
);
1510 if (*p
!= *s
) goto fail
;
1512 if (*p
!= *s
) goto fail
;
1514 if (*p
!= *s
) goto fail
;
1516 if (*p
!= *s
) goto fail
;
1518 if (*p
!= *s
) goto fail
;
1525 case OP_EXACTN
: MOP_IN(OP_EXACTN
);
1526 GET_LENGTH_INC(tlen
, p
);
1528 while (tlen
-- > 0) {
1529 if (*p
++ != *s
++) goto fail
;
1536 case OP_EXACTN_IC
: MOP_IN(OP_EXACTN_IC
);
1539 UChar
*q
, *endp
, lowbuf
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
1541 GET_LENGTH_INC(tlen
, p
);
1547 len
= ONIGENC_MBC_CASE_FOLD(encode
,
1548 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1554 if (*p
!= *q
) goto fail
;
1564 case OP_EXACTMB2N1
: MOP_IN(OP_EXACTMB2N1
);
1566 if (*p
!= *s
) goto fail
;
1568 if (*p
!= *s
) goto fail
;
1573 case OP_EXACTMB2N2
: MOP_IN(OP_EXACTMB2N2
);
1575 if (*p
!= *s
) goto fail
;
1577 if (*p
!= *s
) goto fail
;
1580 if (*p
!= *s
) goto fail
;
1582 if (*p
!= *s
) goto fail
;
1588 case OP_EXACTMB2N3
: MOP_IN(OP_EXACTMB2N3
);
1590 if (*p
!= *s
) goto fail
;
1592 if (*p
!= *s
) goto fail
;
1594 if (*p
!= *s
) goto fail
;
1596 if (*p
!= *s
) goto fail
;
1599 if (*p
!= *s
) goto fail
;
1601 if (*p
!= *s
) goto fail
;
1607 case OP_EXACTMB2N
: MOP_IN(OP_EXACTMB2N
);
1608 GET_LENGTH_INC(tlen
, p
);
1609 DATA_ENSURE(tlen
* 2);
1610 while (tlen
-- > 0) {
1611 if (*p
!= *s
) goto fail
;
1613 if (*p
!= *s
) goto fail
;
1621 case OP_EXACTMB3N
: MOP_IN(OP_EXACTMB3N
);
1622 GET_LENGTH_INC(tlen
, p
);
1623 DATA_ENSURE(tlen
* 3);
1624 while (tlen
-- > 0) {
1625 if (*p
!= *s
) goto fail
;
1627 if (*p
!= *s
) goto fail
;
1629 if (*p
!= *s
) goto fail
;
1637 case OP_EXACTMBN
: MOP_IN(OP_EXACTMBN
);
1638 GET_LENGTH_INC(tlen
, p
); /* mb-len */
1639 GET_LENGTH_INC(tlen2
, p
); /* string len */
1642 while (tlen2
-- > 0) {
1643 if (*p
!= *s
) goto fail
;
1651 case OP_CCLASS
: MOP_IN(OP_CCLASS
);
1653 if (BITSET_AT(((BitSetRef
)p
), *s
) == 0) goto fail
;
1655 s
+= enclen(encode
, s
, end
); /* OP_CCLASS can match mb-code. \D, \S */
1659 case OP_CCLASS_MB
: MOP_IN(OP_CCLASS_MB
);
1660 if (! ONIGENC_IS_MBC_HEAD(encode
, s
, end
)) goto fail
;
1663 GET_LENGTH_INC(tlen
, p
);
1670 mb_len
= enclen(encode
, s
, end
);
1671 DATA_ENSURE(mb_len
);
1674 code
= ONIGENC_MBC_TO_CODE(encode
, ss
, s
);
1676 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1677 if (! onig_is_in_code_range(p
, code
)) goto fail
;
1681 if (! onig_is_in_code_range(q
, code
)) goto fail
;
1688 case OP_CCLASS_MIX
: MOP_IN(OP_CCLASS_MIX
);
1690 if (ONIGENC_IS_MBC_HEAD(encode
, s
, end
)) {
1695 if (BITSET_AT(((BitSetRef
)p
), *s
) == 0)
1699 GET_LENGTH_INC(tlen
, p
);
1706 case OP_CCLASS_NOT
: MOP_IN(OP_CCLASS_NOT
);
1708 if (BITSET_AT(((BitSetRef
)p
), *s
) != 0) goto fail
;
1710 s
+= enclen(encode
, s
, end
);
1714 case OP_CCLASS_MB_NOT
: MOP_IN(OP_CCLASS_MB_NOT
);
1716 if (! ONIGENC_IS_MBC_HEAD(encode
, s
, end
)) {
1718 GET_LENGTH_INC(tlen
, p
);
1720 goto cc_mb_not_success
;
1724 GET_LENGTH_INC(tlen
, p
);
1728 int mb_len
= enclen(encode
, s
, end
);
1730 if (! DATA_ENSURE_CHECK(mb_len
)) {
1734 goto cc_mb_not_success
;
1739 code
= ONIGENC_MBC_TO_CODE(encode
, ss
, s
);
1741 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1742 if (onig_is_in_code_range(p
, code
)) goto fail
;
1746 if (onig_is_in_code_range(q
, code
)) goto fail
;
1755 case OP_CCLASS_MIX_NOT
: MOP_IN(OP_CCLASS_MIX_NOT
);
1757 if (ONIGENC_IS_MBC_HEAD(encode
, s
, end
)) {
1762 if (BITSET_AT(((BitSetRef
)p
), *s
) != 0)
1766 GET_LENGTH_INC(tlen
, p
);
1773 case OP_CCLASS_NODE
: MOP_IN(OP_CCLASS_NODE
);
1781 GET_POINTER_INC(node
, p
);
1782 mb_len
= enclen(encode
, s
, end
);
1786 code
= ONIGENC_MBC_TO_CODE(encode
, ss
, s
);
1787 if (onig_is_code_in_cc_len(mb_len
, code
, node
) == 0) goto fail
;
1792 case OP_ANYCHAR
: MOP_IN(OP_ANYCHAR
);
1794 n
= enclen(encode
, s
, end
);
1796 if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
)) goto fail
;
1801 case OP_ANYCHAR_ML
: MOP_IN(OP_ANYCHAR_ML
);
1803 n
= enclen(encode
, s
, end
);
1809 case OP_ANYCHAR_STAR
: MOP_IN(OP_ANYCHAR_STAR
);
1810 while (DATA_ENSURE_CHECK1
) {
1811 STACK_PUSH_ALT(p
, s
, sprev
);
1812 n
= enclen(encode
, s
, end
);
1814 if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
)) goto fail
;
1821 case OP_ANYCHAR_ML_STAR
: MOP_IN(OP_ANYCHAR_ML_STAR
);
1822 while (DATA_ENSURE_CHECK1
) {
1823 STACK_PUSH_ALT(p
, s
, sprev
);
1824 n
= enclen(encode
, s
, end
);
1838 case OP_ANYCHAR_STAR_PEEK_NEXT
: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT
);
1839 while (DATA_ENSURE_CHECK1
) {
1841 STACK_PUSH_ALT(p
+ 1, s
, sprev
);
1843 n
= enclen(encode
, s
, end
);
1845 if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
)) goto fail
;
1853 case OP_ANYCHAR_ML_STAR_PEEK_NEXT
:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT
);
1854 while (DATA_ENSURE_CHECK1
) {
1856 STACK_PUSH_ALT(p
+ 1, s
, sprev
);
1858 n
= enclen(encode
, s
, end
);
1873 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1874 case OP_STATE_CHECK_ANYCHAR_STAR
: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR
);
1875 GET_STATE_CHECK_NUM_INC(mem
, p
);
1876 while (DATA_ENSURE_CHECK1
) {
1877 STATE_CHECK_VAL(scv
, mem
);
1880 STACK_PUSH_ALT_WITH_STATE_CHECK(p
, s
, sprev
, mem
);
1881 n
= enclen(encode
, s
);
1883 if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
)) goto fail
;
1890 case OP_STATE_CHECK_ANYCHAR_ML_STAR
:
1891 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR
);
1893 GET_STATE_CHECK_NUM_INC(mem
, p
);
1894 while (DATA_ENSURE_CHECK1
) {
1895 STATE_CHECK_VAL(scv
, mem
);
1898 STACK_PUSH_ALT_WITH_STATE_CHECK(p
, s
, sprev
, mem
);
1899 n
= enclen(encode
, s
);
1912 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1914 case OP_WORD
: MOP_IN(OP_WORD
);
1916 if (! ONIGENC_IS_MBC_WORD(encode
, s
, end
))
1919 s
+= enclen(encode
, s
, end
);
1923 case OP_NOT_WORD
: MOP_IN(OP_NOT_WORD
);
1925 if (ONIGENC_IS_MBC_WORD(encode
, s
, end
))
1928 s
+= enclen(encode
, s
, end
);
1932 case OP_WORD_BOUND
: MOP_IN(OP_WORD_BOUND
);
1933 if (ON_STR_BEGIN(s
)) {
1935 if (! ONIGENC_IS_MBC_WORD(encode
, s
, end
))
1938 else if (ON_STR_END(s
)) {
1939 if (! ONIGENC_IS_MBC_WORD(encode
, sprev
, end
))
1943 if (ONIGENC_IS_MBC_WORD(encode
, s
, end
)
1944 == ONIGENC_IS_MBC_WORD(encode
, sprev
, end
))
1951 case OP_NOT_WORD_BOUND
: MOP_IN(OP_NOT_WORD_BOUND
);
1952 if (ON_STR_BEGIN(s
)) {
1953 if (DATA_ENSURE_CHECK1
&& ONIGENC_IS_MBC_WORD(encode
, s
, end
))
1956 else if (ON_STR_END(s
)) {
1957 if (ONIGENC_IS_MBC_WORD(encode
, sprev
, end
))
1961 if (ONIGENC_IS_MBC_WORD(encode
, s
, end
)
1962 != ONIGENC_IS_MBC_WORD(encode
, sprev
, end
))
1969 #ifdef USE_WORD_BEGIN_END
1970 case OP_WORD_BEGIN
: MOP_IN(OP_WORD_BEGIN
);
1971 if (DATA_ENSURE_CHECK1
&& ONIGENC_IS_MBC_WORD(encode
, s
, end
)) {
1972 if (ON_STR_BEGIN(s
) || !ONIGENC_IS_MBC_WORD(encode
, sprev
, end
)) {
1980 case OP_WORD_END
: MOP_IN(OP_WORD_END
);
1981 if (!ON_STR_BEGIN(s
) && ONIGENC_IS_MBC_WORD(encode
, sprev
, end
)) {
1982 if (ON_STR_END(s
) || !ONIGENC_IS_MBC_WORD(encode
, s
, end
)) {
1991 case OP_BEGIN_BUF
: MOP_IN(OP_BEGIN_BUF
);
1992 if (! ON_STR_BEGIN(s
)) goto fail
;
1998 case OP_END_BUF
: MOP_IN(OP_END_BUF
);
1999 if (! ON_STR_END(s
)) goto fail
;
2005 case OP_BEGIN_LINE
: MOP_IN(OP_BEGIN_LINE
);
2006 if (ON_STR_BEGIN(s
)) {
2007 if (IS_NOTBOL(msa
->options
)) goto fail
;
2011 else if (ONIGENC_IS_MBC_NEWLINE(encode
, sprev
, end
) && !ON_STR_END(s
)) {
2018 case OP_END_LINE
: MOP_IN(OP_END_LINE
);
2019 if (ON_STR_END(s
)) {
2020 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2021 if (IS_EMPTY_STR
|| !ONIGENC_IS_MBC_NEWLINE(encode
, sprev
, end
)) {
2023 if (IS_NOTEOL(msa
->options
)) goto fail
;
2026 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2030 else if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
)) {
2034 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2035 else if (ONIGENC_IS_MBC_CRNL(encode
, s
, end
)) {
2043 case OP_SEMI_END_BUF
: MOP_IN(OP_SEMI_END_BUF
);
2044 if (ON_STR_END(s
)) {
2045 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2046 if (IS_EMPTY_STR
|| !ONIGENC_IS_MBC_NEWLINE(encode
, sprev
, end
)) {
2048 if (IS_NOTEOL(msa
->options
)) goto fail
;
2051 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2055 else if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
) &&
2056 ON_STR_END(s
+ enclen(encode
, s
, end
))) {
2060 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2061 else if (ONIGENC_IS_MBC_CRNL(encode
, s
, end
)) {
2062 UChar
* ss
= s
+ enclen(encode
, s
);
2063 ss
+= enclen(encode
, ss
);
2064 if (ON_STR_END(ss
)) {
2073 case OP_BEGIN_POSITION
: MOP_IN(OP_BEGIN_POSITION
);
2074 if (s
!= msa
->start
)
2081 case OP_MEMORY_START_PUSH
: MOP_IN(OP_MEMORY_START_PUSH
);
2082 GET_MEMNUM_INC(mem
, p
);
2083 STACK_PUSH_MEM_START(mem
, s
);
2088 case OP_MEMORY_START
: MOP_IN(OP_MEMORY_START
);
2089 GET_MEMNUM_INC(mem
, p
);
2090 mem_start_stk
[mem
] = (OnigStackIndex
)((void* )s
);
2095 case OP_MEMORY_END_PUSH
: MOP_IN(OP_MEMORY_END_PUSH
);
2096 GET_MEMNUM_INC(mem
, p
);
2097 STACK_PUSH_MEM_END(mem
, s
);
2102 case OP_MEMORY_END
: MOP_IN(OP_MEMORY_END
);
2103 GET_MEMNUM_INC(mem
, p
);
2104 mem_end_stk
[mem
] = (OnigStackIndex
)((void* )s
);
2109 #ifdef USE_SUBEXP_CALL
2110 case OP_MEMORY_END_PUSH_REC
: MOP_IN(OP_MEMORY_END_PUSH_REC
);
2111 GET_MEMNUM_INC(mem
, p
);
2112 STACK_GET_MEM_START(mem
, stkp
); /* should be before push mem-end. */
2113 STACK_PUSH_MEM_END(mem
, s
);
2114 mem_start_stk
[mem
] = GET_STACK_INDEX(stkp
);
2119 case OP_MEMORY_END_REC
: MOP_IN(OP_MEMORY_END_REC
);
2120 GET_MEMNUM_INC(mem
, p
);
2121 mem_end_stk
[mem
] = (OnigStackIndex
)((void* )s
);
2122 STACK_GET_MEM_START(mem
, stkp
);
2124 if (BIT_STATUS_AT(reg
->bt_mem_start
, mem
))
2125 mem_start_stk
[mem
] = GET_STACK_INDEX(stkp
);
2127 mem_start_stk
[mem
] = (OnigStackIndex
)((void* )stkp
->u
.mem
.pstr
);
2129 STACK_PUSH_MEM_END_MARK(mem
);
2135 case OP_BACKREF1
: MOP_IN(OP_BACKREF1
);
2140 case OP_BACKREF2
: MOP_IN(OP_BACKREF2
);
2145 case OP_BACKREFN
: MOP_IN(OP_BACKREFN
);
2146 GET_MEMNUM_INC(mem
, p
);
2150 UChar
*pstart
, *pend
;
2152 /* if you want to remove following line,
2153 you should check in parse and compile time. */
2154 if (mem
> num_mem
) goto fail
;
2155 if (mem_end_stk
[mem
] == INVALID_STACK_INDEX
) goto fail
;
2156 if (mem_start_stk
[mem
] == INVALID_STACK_INDEX
) goto fail
;
2158 if (BIT_STATUS_AT(reg
->bt_mem_start
, mem
))
2159 pstart
= STACK_AT(mem_start_stk
[mem
])->u
.mem
.pstr
;
2161 pstart
= (UChar
* )((void* )mem_start_stk
[mem
]);
2163 pend
= (BIT_STATUS_AT(reg
->bt_mem_end
, mem
)
2164 ? STACK_AT(mem_end_stk
[mem
])->u
.mem
.pstr
2165 : (UChar
* )((void* )mem_end_stk
[mem
]));
2169 STRING_CMP(pstart
, s
, n
);
2170 while (sprev
+ (len
= enclen(encode
, sprev
, end
)) < s
)
2178 case OP_BACKREFN_IC
: MOP_IN(OP_BACKREFN_IC
);
2179 GET_MEMNUM_INC(mem
, p
);
2182 UChar
*pstart
, *pend
;
2184 /* if you want to remove following line,
2185 you should check in parse and compile time. */
2186 if (mem
> num_mem
) goto fail
;
2187 if (mem_end_stk
[mem
] == INVALID_STACK_INDEX
) goto fail
;
2188 if (mem_start_stk
[mem
] == INVALID_STACK_INDEX
) goto fail
;
2190 if (BIT_STATUS_AT(reg
->bt_mem_start
, mem
))
2191 pstart
= STACK_AT(mem_start_stk
[mem
])->u
.mem
.pstr
;
2193 pstart
= (UChar
* )((void* )mem_start_stk
[mem
]);
2195 pend
= (BIT_STATUS_AT(reg
->bt_mem_end
, mem
)
2196 ? STACK_AT(mem_end_stk
[mem
])->u
.mem
.pstr
2197 : (UChar
* )((void* )mem_end_stk
[mem
]));
2201 STRING_CMP_IC(case_fold_flag
, pstart
, &s
, n
);
2202 while (sprev
+ (len
= enclen(encode
, sprev
, end
)) < s
)
2210 case OP_BACKREF_MULTI
: MOP_IN(OP_BACKREF_MULTI
);
2213 UChar
*pstart
, *pend
, *swork
;
2215 GET_LENGTH_INC(tlen
, p
);
2216 for (i
= 0; i
< tlen
; i
++) {
2217 GET_MEMNUM_INC(mem
, p
);
2219 if (mem_end_stk
[mem
] == INVALID_STACK_INDEX
) continue;
2220 if (mem_start_stk
[mem
] == INVALID_STACK_INDEX
) continue;
2222 if (BIT_STATUS_AT(reg
->bt_mem_start
, mem
))
2223 pstart
= STACK_AT(mem_start_stk
[mem
])->u
.mem
.pstr
;
2225 pstart
= (UChar
* )((void* )mem_start_stk
[mem
]);
2227 pend
= (BIT_STATUS_AT(reg
->bt_mem_end
, mem
)
2228 ? STACK_AT(mem_end_stk
[mem
])->u
.mem
.pstr
2229 : (UChar
* )((void* )mem_end_stk
[mem
]));
2234 STRING_CMP_VALUE(pstart
, swork
, n
, is_fail
);
2235 if (is_fail
) continue;
2237 while (sprev
+ (len
= enclen(encode
, sprev
, end
)) < s
)
2240 p
+= (SIZE_MEMNUM
* (tlen
- i
- 1));
2241 break; /* success */
2243 if (i
== tlen
) goto fail
;
2249 case OP_BACKREF_MULTI_IC
: MOP_IN(OP_BACKREF_MULTI_IC
);
2252 UChar
*pstart
, *pend
, *swork
;
2254 GET_LENGTH_INC(tlen
, p
);
2255 for (i
= 0; i
< tlen
; i
++) {
2256 GET_MEMNUM_INC(mem
, p
);
2258 if (mem_end_stk
[mem
] == INVALID_STACK_INDEX
) continue;
2259 if (mem_start_stk
[mem
] == INVALID_STACK_INDEX
) continue;
2261 if (BIT_STATUS_AT(reg
->bt_mem_start
, mem
))
2262 pstart
= STACK_AT(mem_start_stk
[mem
])->u
.mem
.pstr
;
2264 pstart
= (UChar
* )((void* )mem_start_stk
[mem
]);
2266 pend
= (BIT_STATUS_AT(reg
->bt_mem_end
, mem
)
2267 ? STACK_AT(mem_end_stk
[mem
])->u
.mem
.pstr
2268 : (UChar
* )((void* )mem_end_stk
[mem
]));
2273 STRING_CMP_VALUE_IC(case_fold_flag
, pstart
, &swork
, n
, is_fail
);
2274 if (is_fail
) continue;
2276 while (sprev
+ (len
= enclen(encode
, sprev
, end
)) < s
)
2279 p
+= (SIZE_MEMNUM
* (tlen
- i
- 1));
2280 break; /* success */
2282 if (i
== tlen
) goto fail
;
2288 #ifdef USE_BACKREF_WITH_LEVEL
2289 case OP_BACKREF_WITH_LEVEL
:
2295 GET_OPTION_INC(ic
, p
);
2296 GET_LENGTH_INC(level
, p
);
2297 GET_LENGTH_INC(tlen
, p
);
2300 if (backref_match_at_nested_level(reg
, stk
, stk_base
, ic
2301 , case_fold_flag
, (int )level
, (int )tlen
, p
, &s
, end
)) {
2302 while (sprev
+ (len
= enclen(encode
, sprev
, end
)) < s
)
2305 p
+= (SIZE_MEMNUM
* tlen
);
2317 #if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
2318 case OP_SET_OPTION_PUSH
: MOP_IN(OP_SET_OPTION_PUSH
);
2319 GET_OPTION_INC(option
, p
);
2320 STACK_PUSH_ALT(p
, s
, sprev
);
2321 p
+= SIZE_OP_SET_OPTION
+ SIZE_OP_FAIL
;
2326 case OP_SET_OPTION
: MOP_IN(OP_SET_OPTION
);
2327 GET_OPTION_INC(option
, p
);
2333 case OP_NULL_CHECK_START
: MOP_IN(OP_NULL_CHECK_START
);
2334 GET_MEMNUM_INC(mem
, p
); /* mem: null check id */
2335 STACK_PUSH_NULL_CHECK_START(mem
, s
);
2340 case OP_NULL_CHECK_END
: MOP_IN(OP_NULL_CHECK_END
);
2344 GET_MEMNUM_INC(mem
, p
); /* mem: null check id */
2345 STACK_NULL_CHECK(isnull
, mem
, s
);
2347 #ifdef ONIG_DEBUG_MATCH
2348 fprintf(stderr
, "NULL_CHECK_END: skip id:%d, s:%d\n",
2349 (int )mem
, (int )s
);
2352 /* empty loop founded, skip next instruction */
2359 case OP_REPEAT_INC_NG
:
2360 case OP_REPEAT_INC_SG
:
2361 case OP_REPEAT_INC_NG_SG
:
2365 goto unexpected_bytecode_error
;
2374 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2375 case OP_NULL_CHECK_END_MEMST
: MOP_IN(OP_NULL_CHECK_END_MEMST
);
2379 GET_MEMNUM_INC(mem
, p
); /* mem: null check id */
2380 STACK_NULL_CHECK_MEMST(isnull
, mem
, s
, reg
);
2382 #ifdef ONIG_DEBUG_MATCH
2383 fprintf(stderr
, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n",
2384 (int )mem
, (int )s
);
2386 if (isnull
== -1) goto fail
;
2387 goto null_check_found
;
2395 #ifdef USE_SUBEXP_CALL
2396 case OP_NULL_CHECK_END_MEMST_PUSH
:
2397 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH
);
2401 GET_MEMNUM_INC(mem
, p
); /* mem: null check id */
2402 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2403 STACK_NULL_CHECK_MEMST_REC(isnull
, mem
, s
, reg
);
2405 STACK_NULL_CHECK_REC(isnull
, mem
, s
);
2408 #ifdef ONIG_DEBUG_MATCH
2409 fprintf(stderr
, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n",
2410 (int )mem
, (int )s
);
2412 if (isnull
== -1) goto fail
;
2413 goto null_check_found
;
2416 STACK_PUSH_NULL_CHECK_END(mem
);
2424 case OP_JUMP
: MOP_IN(OP_JUMP
);
2425 GET_RELADDR_INC(addr
, p
);
2428 CHECK_INTERRUPT_IN_MATCH_AT
;
2432 case OP_PUSH
: MOP_IN(OP_PUSH
);
2433 GET_RELADDR_INC(addr
, p
);
2434 STACK_PUSH_ALT(p
+ addr
, s
, sprev
);
2439 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2440 case OP_STATE_CHECK_PUSH
: MOP_IN(OP_STATE_CHECK_PUSH
);
2441 GET_STATE_CHECK_NUM_INC(mem
, p
);
2442 STATE_CHECK_VAL(scv
, mem
);
2445 GET_RELADDR_INC(addr
, p
);
2446 STACK_PUSH_ALT_WITH_STATE_CHECK(p
+ addr
, s
, sprev
, mem
);
2451 case OP_STATE_CHECK_PUSH_OR_JUMP
: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP
);
2452 GET_STATE_CHECK_NUM_INC(mem
, p
);
2453 GET_RELADDR_INC(addr
, p
);
2454 STATE_CHECK_VAL(scv
, mem
);
2459 STACK_PUSH_ALT_WITH_STATE_CHECK(p
+ addr
, s
, sprev
, mem
);
2465 case OP_STATE_CHECK
: MOP_IN(OP_STATE_CHECK
);
2466 GET_STATE_CHECK_NUM_INC(mem
, p
);
2467 STATE_CHECK_VAL(scv
, mem
);
2470 STACK_PUSH_STATE_CHECK(s
, mem
);
2474 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
2476 case OP_POP
: MOP_IN(OP_POP
);
2482 case OP_PUSH_OR_JUMP_EXACT1
: MOP_IN(OP_PUSH_OR_JUMP_EXACT1
);
2483 GET_RELADDR_INC(addr
, p
);
2484 if (*p
== *s
&& DATA_ENSURE_CHECK1
) {
2486 STACK_PUSH_ALT(p
+ addr
, s
, sprev
);
2495 case OP_PUSH_IF_PEEK_NEXT
: MOP_IN(OP_PUSH_IF_PEEK_NEXT
);
2496 GET_RELADDR_INC(addr
, p
);
2499 STACK_PUSH_ALT(p
+ addr
, s
, sprev
);
2508 case OP_REPEAT
: MOP_IN(OP_REPEAT
);
2510 GET_MEMNUM_INC(mem
, p
); /* mem: OP_REPEAT ID */
2511 GET_RELADDR_INC(addr
, p
);
2514 repeat_stk
[mem
] = GET_STACK_INDEX(stk
);
2515 STACK_PUSH_REPEAT(mem
, p
);
2517 if (reg
->repeat_range
[mem
].lower
== 0) {
2518 STACK_PUSH_ALT(p
+ addr
, s
, sprev
);
2525 case OP_REPEAT_NG
: MOP_IN(OP_REPEAT_NG
);
2527 GET_MEMNUM_INC(mem
, p
); /* mem: OP_REPEAT ID */
2528 GET_RELADDR_INC(addr
, p
);
2531 repeat_stk
[mem
] = GET_STACK_INDEX(stk
);
2532 STACK_PUSH_REPEAT(mem
, p
);
2534 if (reg
->repeat_range
[mem
].lower
== 0) {
2535 STACK_PUSH_ALT(p
, s
, sprev
);
2543 case OP_REPEAT_INC
: MOP_IN(OP_REPEAT_INC
);
2544 GET_MEMNUM_INC(mem
, p
); /* mem: OP_REPEAT ID */
2545 si
= repeat_stk
[mem
];
2546 stkp
= STACK_AT(si
);
2549 stkp
->u
.repeat
.count
++;
2550 if (stkp
->u
.repeat
.count
>= reg
->repeat_range
[mem
].upper
) {
2551 /* end of repeat. Nothing to do. */
2553 else if (stkp
->u
.repeat
.count
>= reg
->repeat_range
[mem
].lower
) {
2554 STACK_PUSH_ALT(p
, s
, sprev
);
2555 p
= STACK_AT(si
)->u
.repeat
.pcode
; /* Don't use stkp after PUSH. */
2558 p
= stkp
->u
.repeat
.pcode
;
2560 STACK_PUSH_REPEAT_INC(si
);
2562 CHECK_INTERRUPT_IN_MATCH_AT
;
2566 case OP_REPEAT_INC_SG
: MOP_IN(OP_REPEAT_INC_SG
);
2567 GET_MEMNUM_INC(mem
, p
); /* mem: OP_REPEAT ID */
2568 STACK_GET_REPEAT(mem
, stkp
);
2569 si
= GET_STACK_INDEX(stkp
);
2573 case OP_REPEAT_INC_NG
: MOP_IN(OP_REPEAT_INC_NG
);
2574 GET_MEMNUM_INC(mem
, p
); /* mem: OP_REPEAT ID */
2575 si
= repeat_stk
[mem
];
2576 stkp
= STACK_AT(si
);
2579 stkp
->u
.repeat
.count
++;
2580 if (stkp
->u
.repeat
.count
< reg
->repeat_range
[mem
].upper
) {
2581 if (stkp
->u
.repeat
.count
>= reg
->repeat_range
[mem
].lower
) {
2582 UChar
* pcode
= stkp
->u
.repeat
.pcode
;
2584 STACK_PUSH_REPEAT_INC(si
);
2585 STACK_PUSH_ALT(pcode
, s
, sprev
);
2588 p
= stkp
->u
.repeat
.pcode
;
2589 STACK_PUSH_REPEAT_INC(si
);
2592 else if (stkp
->u
.repeat
.count
== reg
->repeat_range
[mem
].upper
) {
2593 STACK_PUSH_REPEAT_INC(si
);
2596 CHECK_INTERRUPT_IN_MATCH_AT
;
2600 case OP_REPEAT_INC_NG_SG
: MOP_IN(OP_REPEAT_INC_NG_SG
);
2601 GET_MEMNUM_INC(mem
, p
); /* mem: OP_REPEAT ID */
2602 STACK_GET_REPEAT(mem
, stkp
);
2603 si
= GET_STACK_INDEX(stkp
);
2607 case OP_PUSH_POS
: MOP_IN(OP_PUSH_POS
);
2608 STACK_PUSH_POS(s
, sprev
);
2613 case OP_POP_POS
: MOP_IN(OP_POP_POS
);
2615 STACK_POS_END(stkp
);
2616 s
= stkp
->u
.state
.pstr
;
2617 sprev
= stkp
->u
.state
.pstr_prev
;
2623 case OP_PUSH_POS_NOT
: MOP_IN(OP_PUSH_POS_NOT
);
2624 GET_RELADDR_INC(addr
, p
);
2625 STACK_PUSH_POS_NOT(p
+ addr
, s
, sprev
);
2630 case OP_FAIL_POS
: MOP_IN(OP_FAIL_POS
);
2631 STACK_POP_TIL_POS_NOT
;
2635 case OP_PUSH_STOP_BT
: MOP_IN(OP_PUSH_STOP_BT
);
2641 case OP_POP_STOP_BT
: MOP_IN(OP_POP_STOP_BT
);
2647 case OP_LOOK_BEHIND
: MOP_IN(OP_LOOK_BEHIND
);
2648 GET_LENGTH_INC(tlen
, p
);
2649 s
= (UChar
* )ONIGENC_STEP_BACK(encode
, str
, s
, (int )tlen
);
2650 if (IS_NULL(s
)) goto fail
;
2651 sprev
= (UChar
* )onigenc_get_prev_char_head(encode
, str
, s
);
2656 case OP_PUSH_LOOK_BEHIND_NOT
: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT
);
2657 GET_RELADDR_INC(addr
, p
);
2658 GET_LENGTH_INC(tlen
, p
);
2659 q
= (UChar
* )ONIGENC_STEP_BACK(encode
, str
, s
, (int )tlen
);
2661 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
2662 If you want to change to fail, replace following line. */
2667 STACK_PUSH_LOOK_BEHIND_NOT(p
+ addr
, s
, sprev
);
2669 sprev
= (UChar
* )onigenc_get_prev_char_head(encode
, str
, s
);
2675 case OP_FAIL_LOOK_BEHIND_NOT
: MOP_IN(OP_FAIL_LOOK_BEHIND_NOT
);
2676 STACK_POP_TIL_LOOK_BEHIND_NOT
;
2680 #ifdef USE_SUBEXP_CALL
2681 case OP_CALL
: MOP_IN(OP_CALL
);
2682 GET_ABSADDR_INC(addr
, p
);
2683 STACK_PUSH_CALL_FRAME(p
);
2689 case OP_RETURN
: MOP_IN(OP_RETURN
);
2704 case OP_FAIL
: MOP_IN(OP_FAIL
);
2706 p
= stk
->u
.state
.pcode
;
2707 s
= stk
->u
.state
.pstr
;
2708 sprev
= stk
->u
.state
.pstr_prev
;
2710 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2711 if (stk
->u
.state
.state_check
!= 0) {
2712 stk
->type
= STK_STATE_CHECK_MARK
;
2722 goto bytecode_error
;
2724 } /* end of switch */
2726 } /* end of while(1) */
2735 return ONIGERR_STACK_BUG
;
2740 return ONIGERR_UNDEFINED_BYTECODE
;
2742 unexpected_bytecode_error
:
2744 return ONIGERR_UNEXPECTED_BYTECODE
;
2749 slow_search(OnigEncoding enc
, UChar
* target
, UChar
* target_end
,
2750 const UChar
* text
, const UChar
* text_end
, UChar
* text_range
)
2752 UChar
*t
, *p
, *s
, *end
;
2754 end
= (UChar
* )text_end
;
2755 end
-= target_end
- target
- 1;
2756 if (end
> text_range
)
2761 if (enc
->max_enc_len
== enc
->min_enc_len
) {
2762 int n
= enc
->max_enc_len
;
2765 if (*s
== *target
) {
2768 if (target_end
== t
|| memcmp(t
, p
, target_end
- t
) == 0)
2773 return (UChar
*)NULL
;
2776 if (*s
== *target
) {
2779 if (target_end
== t
|| memcmp(t
, p
, target_end
- t
) == 0)
2782 s
+= enclen(enc
, s
, end
);
2785 return (UChar
* )NULL
;
2789 str_lower_case_match(OnigEncoding enc
, int case_fold_flag
,
2790 const UChar
* t
, const UChar
* tend
,
2791 const UChar
* p
, const UChar
* end
)
2794 UChar
*q
, lowbuf
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
2797 lowlen
= ONIGENC_MBC_CASE_FOLD(enc
, case_fold_flag
, &p
, end
, lowbuf
);
2799 while (lowlen
> 0) {
2800 if (*t
++ != *q
++) return 0;
2809 slow_search_ic(OnigEncoding enc
, int case_fold_flag
,
2810 UChar
* target
, UChar
* target_end
,
2811 const UChar
* text
, const UChar
* text_end
, UChar
* text_range
)
2815 end
= (UChar
* )text_end
;
2816 end
-= target_end
- target
- 1;
2817 if (end
> text_range
)
2823 if (str_lower_case_match(enc
, case_fold_flag
, target
, target_end
,
2827 s
+= enclen(enc
, s
, text_end
);
2830 return (UChar
* )NULL
;
2834 slow_search_backward(OnigEncoding enc
, UChar
* target
, UChar
* target_end
,
2835 const UChar
* text
, const UChar
* adjust_text
,
2836 const UChar
* text_end
, const UChar
* text_start
)
2840 s
= (UChar
* )text_end
;
2841 s
-= (target_end
- target
);
2843 s
= (UChar
* )text_start
;
2845 s
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc
, adjust_text
, s
);
2848 if (*s
== *target
) {
2851 while (t
< target_end
) {
2856 if (t
== target_end
)
2859 s
= (UChar
* )onigenc_get_prev_char_head(enc
, adjust_text
, s
);
2862 return (UChar
* )NULL
;
2866 slow_search_backward_ic(OnigEncoding enc
, int case_fold_flag
,
2867 UChar
* target
, UChar
* target_end
,
2868 const UChar
* text
, const UChar
* adjust_text
,
2869 const UChar
* text_end
, const UChar
* text_start
)
2873 s
= (UChar
* )text_end
;
2874 s
-= (target_end
- target
);
2876 s
= (UChar
* )text_start
;
2878 s
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc
, adjust_text
, s
);
2881 if (str_lower_case_match(enc
, case_fold_flag
,
2882 target
, target_end
, s
, text_end
))
2885 s
= (UChar
* )onigenc_get_prev_char_head(enc
, adjust_text
, s
);
2888 return (UChar
* )NULL
;
2892 bm_search_notrev(regex_t
* reg
, const UChar
* target
, const UChar
* target_end
,
2893 const UChar
* text
, const UChar
* text_end
,
2894 const UChar
* text_range
)
2896 const UChar
*s
, *se
, *t
, *p
, *end
;
2900 #ifdef ONIG_DEBUG_SEARCH
2901 fprintf(stderr
, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
2902 (int )text
, (int )text_end
, (int )text_range
);
2905 tail
= target_end
- 1;
2906 tlen1
= tail
- target
;
2908 if (end
+ tlen1
> text_end
)
2909 end
= text_end
- tlen1
;
2913 if (IS_NULL(reg
->int_map
)) {
2917 while (t
>= target
&& *p
== *t
) {
2920 if (t
< target
) return (UChar
* )s
;
2922 skip
= reg
->map
[*se
];
2925 s
+= enclen(reg
->enc
, s
, end
);
2926 } while ((s
- t
) < skip
&& s
< end
);
2933 while (t
>= target
&& *p
== *t
) {
2936 if (t
< target
) return (UChar
* )s
;
2938 skip
= reg
->int_map
[*se
];
2941 s
+= enclen(reg
->enc
, s
, end
);
2942 } while ((s
- t
) < skip
&& s
< end
);
2946 return (UChar
* )NULL
;
2950 bm_search(regex_t
* reg
, const UChar
* target
, const UChar
* target_end
,
2951 const UChar
* text
, const UChar
* text_end
, const UChar
* text_range
)
2953 const UChar
*s
, *t
, *p
, *end
;
2956 end
= text_range
+ (target_end
- target
) - 1;
2960 tail
= target_end
- 1;
2961 s
= text
+ (target_end
- target
) - 1;
2962 if (IS_NULL(reg
->int_map
)) {
2966 while (t
>= target
&& *p
== *t
) {
2969 if (t
< target
) return (UChar
* )(p
+ 1);
2973 else { /* see int_map[] */
2977 while (t
>= target
&& *p
== *t
) {
2980 if (t
< target
) return (UChar
* )(p
+ 1);
2981 s
+= reg
->int_map
[*s
];
2984 return (UChar
* )NULL
;
2988 set_bm_backward_skip(UChar
* s
, UChar
* end
, OnigEncoding enc ARG_UNUSED
,
2994 if (IS_NULL(*skip
)) {
2995 *skip
= (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE
);
2996 if (IS_NULL(*skip
)) return ONIGERR_MEMORY
;
3000 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++)
3003 for (i
= len
- 1; i
> 0; i
--)
3010 bm_search_backward(regex_t
* reg
, const UChar
* target
, const UChar
* target_end
,
3011 const UChar
* text
, const UChar
* adjust_text
,
3012 const UChar
* text_end
, const UChar
* text_start
)
3014 const UChar
*s
, *t
, *p
;
3016 s
= text_end
- (target_end
- target
);
3020 s
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg
->enc
, adjust_text
, s
);
3025 while (t
< target_end
&& *p
== *t
) {
3028 if (t
== target_end
)
3031 s
-= reg
->int_map_backward
[*s
];
3032 s
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg
->enc
, adjust_text
, s
);
3035 return (UChar
* )NULL
;
3039 map_search(OnigEncoding enc
, UChar map
[],
3040 const UChar
* text
, const UChar
* text_range
)
3042 const UChar
*s
= text
;
3044 while (s
< text_range
) {
3045 if (map
[*s
]) return (UChar
* )s
;
3047 s
+= enclen(enc
, s
, text_range
);
3049 return (UChar
* )NULL
;
3053 map_search_backward(OnigEncoding enc
, UChar map
[],
3054 const UChar
* text
, const UChar
* adjust_text
,
3055 const UChar
* text_start
)
3057 const UChar
*s
= text_start
;
3060 if (map
[*s
]) return (UChar
* )s
;
3062 s
= onigenc_get_prev_char_head(enc
, adjust_text
, s
);
3064 return (UChar
* )NULL
;
3068 onig_match(regex_t
* reg
, const UChar
* str
, const UChar
* end
, const UChar
* at
, OnigRegion
* region
,
3069 OnigOptionType option
)
3075 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
3077 THREAD_ATOMIC_START
;
3078 if (ONIG_STATE(reg
) >= ONIG_STATE_NORMAL
) {
3079 ONIG_STATE_INC(reg
);
3080 if (IS_NOT_NULL(reg
->chain
) && ONIG_STATE(reg
) == ONIG_STATE_NORMAL
) {
3081 onig_chain_reduce(reg
);
3082 ONIG_STATE_INC(reg
);
3090 while (ONIG_STATE(reg
) < ONIG_STATE_NORMAL
) {
3091 if (++n
> THREAD_PASS_LIMIT_COUNT
)
3092 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT
;
3098 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
3100 MATCH_ARG_INIT(msa
, option
, region
, at
);
3101 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3103 int offset
= at
- str
;
3104 STATE_CHECK_BUFF_INIT(msa
, end
- str
, offset
, reg
->num_comb_exp_check
);
3109 #ifdef USE_POSIX_API_REGION_OPTION
3110 && !IS_POSIX_REGION(option
)
3113 r
= onig_region_resize_clear(region
, reg
->num_mem
+ 1);
3119 prev
= (UChar
* )onigenc_get_prev_char_head(reg
->enc
, str
, at
);
3120 r
= match_at(reg
, str
, end
,
3121 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3127 MATCH_ARG_FREE(msa
);
3128 ONIG_STATE_DEC_THREAD(reg
);
3133 forward_search_range(regex_t
* reg
, const UChar
* str
, const UChar
* end
, UChar
* s
,
3134 UChar
* range
, UChar
** low
, UChar
** high
, UChar
** low_prev
)
3136 UChar
*p
, *pprev
= (UChar
* )NULL
;
3138 #ifdef ONIG_DEBUG_SEARCH
3139 fprintf(stderr
, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n",
3140 (int )str
, (int )end
, (int )s
, (int )range
);
3144 if (reg
->dmin
> 0) {
3145 if (ONIGENC_IS_SINGLEBYTE(reg
->enc
)) {
3149 UChar
*q
= p
+ reg
->dmin
;
3150 while (p
< q
) p
+= enclen(reg
->enc
, p
, end
);
3155 switch (reg
->optimize
) {
3156 case ONIG_OPTIMIZE_EXACT
:
3157 p
= slow_search(reg
->enc
, reg
->exact
, reg
->exact_end
, p
, end
, range
);
3159 case ONIG_OPTIMIZE_EXACT_IC
:
3160 p
= slow_search_ic(reg
->enc
, reg
->case_fold_flag
,
3161 reg
->exact
, reg
->exact_end
, p
, end
, range
);
3164 case ONIG_OPTIMIZE_EXACT_BM
:
3165 p
= bm_search(reg
, reg
->exact
, reg
->exact_end
, p
, end
, range
);
3168 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV
:
3169 p
= bm_search_notrev(reg
, reg
->exact
, reg
->exact_end
, p
, end
, range
);
3172 case ONIG_OPTIMIZE_MAP
:
3173 p
= map_search(reg
->enc
, reg
->map
, p
, range
);
3177 if (p
&& p
< range
) {
3178 if (p
- reg
->dmin
< s
) {
3181 p
+= enclen(reg
->enc
, p
, end
);
3185 if (reg
->sub_anchor
) {
3188 switch (reg
->sub_anchor
) {
3189 case ANCHOR_BEGIN_LINE
:
3190 if (!ON_STR_BEGIN(p
)) {
3191 prev
= onigenc_get_prev_char_head(reg
->enc
,
3192 (pprev
? pprev
: str
), p
);
3193 if (!ONIGENC_IS_MBC_NEWLINE(reg
->enc
, prev
, end
))
3198 case ANCHOR_END_LINE
:
3199 if (ON_STR_END(p
)) {
3200 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3201 prev
= (UChar
* )onigenc_get_prev_char_head(reg
->enc
,
3202 (pprev
? pprev
: str
), p
);
3203 if (prev
&& ONIGENC_IS_MBC_NEWLINE(reg
->enc
, prev
, end
))
3207 else if (! ONIGENC_IS_MBC_NEWLINE(reg
->enc
, p
, end
)
3208 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3209 && ! ONIGENC_IS_MBC_CRNL(reg
->enc
, p
, end
)
3217 if (reg
->dmax
== 0) {
3221 *low_prev
= onigenc_get_prev_char_head(reg
->enc
, s
, p
);
3223 *low_prev
= onigenc_get_prev_char_head(reg
->enc
,
3224 (pprev
? pprev
: str
), p
);
3228 if (reg
->dmax
!= ONIG_INFINITE_DISTANCE
) {
3229 *low
= p
- reg
->dmax
;
3231 *low
= onigenc_get_right_adjust_char_head_with_prev(reg
->enc
, s
,
3232 *low
, (const UChar
** )low_prev
);
3233 if (low_prev
&& IS_NULL(*low_prev
))
3234 *low_prev
= onigenc_get_prev_char_head(reg
->enc
,
3235 (pprev
? pprev
: s
), *low
);
3239 *low_prev
= onigenc_get_prev_char_head(reg
->enc
,
3240 (pprev
? pprev
: str
), *low
);
3244 /* no needs to adjust *high, *high is used as range check only */
3245 *high
= p
- reg
->dmin
;
3247 #ifdef ONIG_DEBUG_SEARCH
3249 "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
3250 (int )(*low
- str
), (int )(*high
- str
), reg
->dmin
, reg
->dmax
);
3252 return 1; /* success */
3255 return 0; /* fail */
3258 static int set_bm_backward_skip
P_((UChar
* s
, UChar
* end
, OnigEncoding enc
,
3261 #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
3264 backward_search_range(regex_t
* reg
, const UChar
* str
, const UChar
* end
,
3265 UChar
* s
, const UChar
* range
, UChar
* adjrange
,
3266 UChar
** low
, UChar
** high
)
3275 switch (reg
->optimize
) {
3276 case ONIG_OPTIMIZE_EXACT
:
3278 p
= slow_search_backward(reg
->enc
, reg
->exact
, reg
->exact_end
,
3279 range
, adjrange
, end
, p
);
3282 case ONIG_OPTIMIZE_EXACT_IC
:
3283 p
= slow_search_backward_ic(reg
->enc
, reg
->case_fold_flag
,
3284 reg
->exact
, reg
->exact_end
,
3285 range
, adjrange
, end
, p
);
3288 case ONIG_OPTIMIZE_EXACT_BM
:
3289 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV
:
3290 if (IS_NULL(reg
->int_map_backward
)) {
3291 if (s
- range
< BM_BACKWARD_SEARCH_LENGTH_THRESHOLD
)
3294 r
= set_bm_backward_skip(reg
->exact
, reg
->exact_end
, reg
->enc
,
3295 &(reg
->int_map_backward
));
3298 p
= bm_search_backward(reg
, reg
->exact
, reg
->exact_end
, range
, adjrange
,
3302 case ONIG_OPTIMIZE_MAP
:
3303 p
= map_search_backward(reg
->enc
, reg
->map
, range
, adjrange
, p
);
3308 if (reg
->sub_anchor
) {
3311 switch (reg
->sub_anchor
) {
3312 case ANCHOR_BEGIN_LINE
:
3313 if (!ON_STR_BEGIN(p
)) {
3314 prev
= onigenc_get_prev_char_head(reg
->enc
, str
, p
);
3315 if (!ONIGENC_IS_MBC_NEWLINE(reg
->enc
, prev
, end
)) {
3322 case ANCHOR_END_LINE
:
3323 if (ON_STR_END(p
)) {
3324 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3325 prev
= onigenc_get_prev_char_head(reg
->enc
, adjrange
, p
);
3326 if (IS_NULL(prev
)) goto fail
;
3327 if (ONIGENC_IS_MBC_NEWLINE(reg
->enc
, prev
, end
)) {
3333 else if (! ONIGENC_IS_MBC_NEWLINE(reg
->enc
, p
, end
)
3334 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3335 && ! ONIGENC_IS_MBC_CRNL(reg
->enc
, p
, end
)
3338 p
= onigenc_get_prev_char_head(reg
->enc
, adjrange
, p
);
3339 if (IS_NULL(p
)) goto fail
;
3346 /* no needs to adjust *high, *high is used as range check only */
3347 if (reg
->dmax
!= ONIG_INFINITE_DISTANCE
) {
3348 *low
= p
- reg
->dmax
;
3349 *high
= p
- reg
->dmin
;
3350 *high
= onigenc_get_right_adjust_char_head(reg
->enc
, adjrange
, *high
);
3353 #ifdef ONIG_DEBUG_SEARCH
3354 fprintf(stderr
, "backward_search_range: low: %d, high: %d\n",
3355 (int )(*low
- str
), (int )(*high
- str
));
3357 return 1; /* success */
3361 #ifdef ONIG_DEBUG_SEARCH
3362 fprintf(stderr
, "backward_search_range: fail.\n");
3364 return 0; /* fail */
3369 onig_search(regex_t
* reg
, const UChar
* str
, const UChar
* end
,
3370 const UChar
* start
, const UChar
* range
, OnigRegion
* region
, OnigOptionType option
)
3375 const UChar
*orig_start
= start
;
3376 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3377 const UChar
*orig_range
= range
;
3380 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
3382 THREAD_ATOMIC_START
;
3383 if (ONIG_STATE(reg
) >= ONIG_STATE_NORMAL
) {
3384 ONIG_STATE_INC(reg
);
3385 if (IS_NOT_NULL(reg
->chain
) && ONIG_STATE(reg
) == ONIG_STATE_NORMAL
) {
3386 onig_chain_reduce(reg
);
3387 ONIG_STATE_INC(reg
);
3395 while (ONIG_STATE(reg
) < ONIG_STATE_NORMAL
) {
3396 if (++n
> THREAD_PASS_LIMIT_COUNT
)
3397 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT
;
3403 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
3405 #ifdef ONIG_DEBUG_SEARCH
3407 "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
3408 (int )str
, (int )(end
- str
), (int )(start
- str
), (int )(range
- str
));
3412 #ifdef USE_POSIX_API_REGION_OPTION
3413 && !IS_POSIX_REGION(option
)
3416 r
= onig_region_resize_clear(region
, reg
->num_mem
+ 1);
3417 if (r
) goto finish_no_msa
;
3420 if (start
> end
|| start
< str
) goto mismatch_no_msa
;
3423 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3424 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3425 #define MATCH_AND_RETURN_CHECK(upper_range) \
3426 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
3427 if (r != ONIG_MISMATCH) {\
3429 if (! IS_FIND_LONGEST(reg->options)) {\
3433 else goto finish; /* error */ \
3436 #define MATCH_AND_RETURN_CHECK(upper_range) \
3437 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
3438 if (r != ONIG_MISMATCH) {\
3442 else goto finish; /* error */ \
3444 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
3446 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3447 #define MATCH_AND_RETURN_CHECK(none) \
3448 r = match_at(reg, str, end, s, prev, &msa);\
3449 if (r != ONIG_MISMATCH) {\
3451 if (! IS_FIND_LONGEST(reg->options)) {\
3455 else goto finish; /* error */ \
3458 #define MATCH_AND_RETURN_CHECK(none) \
3459 r = match_at(reg, str, end, s, prev, &msa);\
3460 if (r != ONIG_MISMATCH) {\
3464 else goto finish; /* error */ \
3466 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
3467 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
3470 /* anchor optimize: resume search range */
3471 if (reg
->anchor
!= 0 && str
< end
) {
3472 UChar
*min_semi_end
, *max_semi_end
;
3474 if (reg
->anchor
& ANCHOR_BEGIN_POSITION
) {
3475 /* search start-position only */
3482 else if (reg
->anchor
& ANCHOR_BEGIN_BUF
) {
3483 /* search str-position only */
3484 if (range
> start
) {
3485 if (start
!= str
) goto mismatch_no_msa
;
3494 goto mismatch_no_msa
;
3497 else if (reg
->anchor
& ANCHOR_END_BUF
) {
3498 min_semi_end
= max_semi_end
= (UChar
* )end
;
3501 if ((OnigDistance
)(max_semi_end
- str
) < reg
->anchor_dmin
)
3502 goto mismatch_no_msa
;
3504 if (range
> start
) {
3505 if ((OnigDistance
)(min_semi_end
- start
) > reg
->anchor_dmax
) {
3506 start
= min_semi_end
- reg
->anchor_dmax
;
3508 start
= onigenc_get_right_adjust_char_head(reg
->enc
, str
, start
);
3509 else { /* match with empty at end */
3510 start
= onigenc_get_prev_char_head(reg
->enc
, str
, end
);
3513 if ((OnigDistance
)(max_semi_end
- (range
- 1)) < reg
->anchor_dmin
) {
3514 range
= max_semi_end
- reg
->anchor_dmin
+ 1;
3517 if (start
>= range
) goto mismatch_no_msa
;
3520 if ((OnigDistance
)(min_semi_end
- range
) > reg
->anchor_dmax
) {
3521 range
= min_semi_end
- reg
->anchor_dmax
;
3523 if ((OnigDistance
)(max_semi_end
- start
) < reg
->anchor_dmin
) {
3524 start
= max_semi_end
- reg
->anchor_dmin
;
3525 start
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg
->enc
, str
, start
);
3527 if (range
> start
) goto mismatch_no_msa
;
3530 else if (reg
->anchor
& ANCHOR_SEMI_END_BUF
) {
3531 UChar
* pre_end
= ONIGENC_STEP_BACK(reg
->enc
, str
, end
, 1);
3533 max_semi_end
= (UChar
* )end
;
3534 if (ONIGENC_IS_MBC_NEWLINE(reg
->enc
, pre_end
, end
)) {
3535 min_semi_end
= pre_end
;
3537 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3538 pre_end
= ONIGENC_STEP_BACK(reg
->enc
, str
, pre_end
, 1);
3539 if (IS_NOT_NULL(pre_end
) &&
3540 ONIGENC_IS_MBC_CRNL(reg
->enc
, pre_end
, end
)) {
3541 min_semi_end
= pre_end
;
3544 if (min_semi_end
> str
&& start
<= min_semi_end
) {
3549 min_semi_end
= (UChar
* )end
;
3553 else if ((reg
->anchor
& ANCHOR_ANYCHAR_STAR_ML
)) {
3554 goto begin_position
;
3557 else if (str
== end
) { /* empty string */
3558 static const UChar
* address_for_empty_string
= (UChar
* )"";
3560 #ifdef ONIG_DEBUG_SEARCH
3561 fprintf(stderr
, "onig_search: empty string.\n");
3564 if (reg
->threshold_len
== 0) {
3565 start
= end
= str
= address_for_empty_string
;
3567 prev
= (UChar
* )NULL
;
3569 MATCH_ARG_INIT(msa
, option
, region
, start
);
3570 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3571 msa
.state_check_buff
= (void* )0;
3572 msa
.state_check_buff_size
= 0; /* NO NEED, for valgrind */
3574 MATCH_AND_RETURN_CHECK(end
);
3577 goto mismatch_no_msa
;
3580 #ifdef ONIG_DEBUG_SEARCH
3581 fprintf(stderr
, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
3582 (int )(end
- str
), (int )(start
- str
), (int )(range
- str
));
3585 MATCH_ARG_INIT(msa
, option
, region
, orig_start
);
3586 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3588 int offset
= (MIN(start
, range
) - str
);
3589 STATE_CHECK_BUFF_INIT(msa
, end
- str
, offset
, reg
->num_comb_exp_check
);
3594 if (range
> start
) { /* forward search */
3596 prev
= onigenc_get_prev_char_head(reg
->enc
, str
, s
);
3598 prev
= (UChar
* )NULL
;
3600 if (reg
->optimize
!= ONIG_OPTIMIZE_NONE
) {
3601 UChar
*sch_range
, *low
, *high
, *low_prev
;
3603 sch_range
= (UChar
* )range
;
3604 if (reg
->dmax
!= 0) {
3605 if (reg
->dmax
== ONIG_INFINITE_DISTANCE
)
3606 sch_range
= (UChar
* )end
;
3608 sch_range
+= reg
->dmax
;
3609 if (sch_range
> end
) sch_range
= (UChar
* )end
;
3613 if ((end
- start
) < reg
->threshold_len
)
3616 if (reg
->dmax
!= ONIG_INFINITE_DISTANCE
) {
3618 if (! forward_search_range(reg
, str
, end
, s
, sch_range
,
3619 &low
, &high
, &low_prev
)) goto mismatch
;
3625 MATCH_AND_RETURN_CHECK(orig_range
);
3627 s
+= enclen(reg
->enc
, s
, end
);
3629 } while (s
< range
);
3632 else { /* check only. */
3633 if (! forward_search_range(reg
, str
, end
, s
, sch_range
,
3634 &low
, &high
, (UChar
** )NULL
)) goto mismatch
;
3636 if ((reg
->anchor
& ANCHOR_ANYCHAR_STAR
) != 0) {
3638 MATCH_AND_RETURN_CHECK(orig_range
);
3640 s
+= enclen(reg
->enc
, s
, end
);
3642 while (!ONIGENC_IS_MBC_NEWLINE(reg
->enc
, prev
, end
) && s
< range
) {
3644 s
+= enclen(reg
->enc
, s
, end
);
3646 } while (s
< range
);
3653 MATCH_AND_RETURN_CHECK(orig_range
);
3655 s
+= enclen(reg
->enc
, s
, end
);
3656 } while (s
< range
);
3658 if (s
== range
) { /* because empty match with /$/. */
3659 MATCH_AND_RETURN_CHECK(orig_range
);
3662 else { /* backward search */
3663 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3664 if (orig_start
< end
)
3665 orig_start
+= enclen(reg
->enc
, orig_start
, end
); /* is upper range */
3668 if (reg
->optimize
!= ONIG_OPTIMIZE_NONE
) {
3669 UChar
*low
, *high
, *adjrange
, *sch_start
;
3672 adjrange
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg
->enc
, str
, range
);
3674 adjrange
= (UChar
* )end
;
3676 if (reg
->dmax
!= ONIG_INFINITE_DISTANCE
&&
3677 (end
- range
) >= reg
->threshold_len
) {
3679 sch_start
= s
+ reg
->dmax
;
3680 if (sch_start
> end
) sch_start
= (UChar
* )end
;
3681 if (backward_search_range(reg
, str
, end
, sch_start
, range
, adjrange
,
3689 prev
= onigenc_get_prev_char_head(reg
->enc
, str
, s
);
3690 MATCH_AND_RETURN_CHECK(orig_start
);
3693 } while (s
>= range
);
3696 else { /* check only. */
3697 if ((end
- range
) < reg
->threshold_len
) goto mismatch
;
3700 if (reg
->dmax
!= 0) {
3701 if (reg
->dmax
== ONIG_INFINITE_DISTANCE
)
3702 sch_start
= (UChar
* )end
;
3704 sch_start
+= reg
->dmax
;
3705 if (sch_start
> end
) sch_start
= (UChar
* )end
;
3707 sch_start
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg
->enc
,
3711 if (backward_search_range(reg
, str
, end
, sch_start
, range
, adjrange
,
3712 &low
, &high
) <= 0) goto mismatch
;
3717 prev
= onigenc_get_prev_char_head(reg
->enc
, str
, s
);
3718 MATCH_AND_RETURN_CHECK(orig_start
);
3720 } while (s
>= range
);
3724 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3725 if (IS_FIND_LONGEST(reg
->options
)) {
3726 if (msa
.best_len
>= 0) {
3735 MATCH_ARG_FREE(msa
);
3736 ONIG_STATE_DEC_THREAD(reg
);
3738 /* If result is mismatch and no FIND_NOT_EMPTY option,
3739 then the region is not setted in match_at(). */
3740 if (IS_FIND_NOT_EMPTY(reg
->options
) && region
3741 #ifdef USE_POSIX_API_REGION_OPTION
3742 && !IS_POSIX_REGION(option
)
3745 onig_region_clear(region
);
3749 if (r
!= ONIG_MISMATCH
)
3750 fprintf(stderr
, "onig_search: error %d\n", r
);
3757 ONIG_STATE_DEC_THREAD(reg
);
3759 if (r
!= ONIG_MISMATCH
)
3760 fprintf(stderr
, "onig_search: error %d\n", r
);
3765 ONIG_STATE_DEC_THREAD(reg
);
3766 MATCH_ARG_FREE(msa
);
3771 onig_get_encoding(regex_t
* reg
)
3776 extern OnigOptionType
3777 onig_get_options(regex_t
* reg
)
3779 return reg
->options
;
3782 extern OnigCaseFoldType
3783 onig_get_case_fold_flag(regex_t
* reg
)
3785 return reg
->case_fold_flag
;
3788 extern OnigSyntaxType
*
3789 onig_get_syntax(regex_t
* reg
)
3795 onig_number_of_captures(regex_t
* reg
)
3797 return reg
->num_mem
;
3801 onig_number_of_capture_histories(regex_t
* reg
)
3803 #ifdef USE_CAPTURE_HISTORY
3807 for (i
= 0; i
<= ONIG_MAX_CAPTURE_HISTORY_GROUP
; i
++) {
3808 if (BIT_STATUS_AT(reg
->capture_history
, i
) != 0)
3818 onig_copy_encoding(OnigEncoding to
, OnigEncoding from
)