1 /**********************************************************************
2 regexec.c - Oniguruma (regular expression library)
3 **********************************************************************/
5 * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 /* #define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
34 #ifdef USE_CRNL_AS_LINE_TERMINATOR
35 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
36 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
37 ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
40 #ifdef USE_CAPTURE_HISTORY
41 static void history_tree_free(OnigCaptureTreeNode
* node
);
44 history_tree_clear(OnigCaptureTreeNode
* node
)
48 if (IS_NOT_NULL(node
)) {
49 for (i
= 0; i
< node
->num_childs
; i
++) {
50 if (IS_NOT_NULL(node
->childs
[i
])) {
51 history_tree_free(node
->childs
[i
]);
54 for (i
= 0; i
< node
->allocated
; i
++) {
55 node
->childs
[i
] = (OnigCaptureTreeNode
* )0;
58 node
->beg
= ONIG_REGION_NOTPOS
;
59 node
->end
= ONIG_REGION_NOTPOS
;
65 history_tree_free(OnigCaptureTreeNode
* node
)
67 history_tree_clear(node
);
72 history_root_free(OnigRegion
* r
)
74 if (IS_NOT_NULL(r
->history_root
)) {
75 history_tree_free(r
->history_root
);
76 r
->history_root
= (OnigCaptureTreeNode
* )0;
80 static OnigCaptureTreeNode
*
81 history_node_new(void)
83 OnigCaptureTreeNode
* node
;
85 node
= (OnigCaptureTreeNode
* )xmalloc(sizeof(OnigCaptureTreeNode
));
86 CHECK_NULL_RETURN(node
);
87 node
->childs
= (OnigCaptureTreeNode
** )0;
91 node
->beg
= ONIG_REGION_NOTPOS
;
92 node
->end
= ONIG_REGION_NOTPOS
;
98 history_tree_add_child(OnigCaptureTreeNode
* parent
, OnigCaptureTreeNode
* child
)
100 #define HISTORY_TREE_INIT_ALLOC_SIZE 8
102 if (parent
->num_childs
>= parent
->allocated
) {
105 if (IS_NULL(parent
->childs
)) {
106 n
= HISTORY_TREE_INIT_ALLOC_SIZE
;
108 (OnigCaptureTreeNode
** )xmalloc(sizeof(OnigCaptureTreeNode
*) * n
);
111 n
= parent
->allocated
* 2;
113 (OnigCaptureTreeNode
** )xrealloc(parent
->childs
,
114 sizeof(OnigCaptureTreeNode
*) * n
);
116 CHECK_NULL_RETURN_MEMERR(parent
->childs
);
117 for (i
= parent
->allocated
; i
< n
; i
++) {
118 parent
->childs
[i
] = (OnigCaptureTreeNode
* )0;
120 parent
->allocated
= n
;
123 parent
->childs
[parent
->num_childs
] = child
;
124 parent
->num_childs
++;
128 static OnigCaptureTreeNode
*
129 history_tree_clone(OnigCaptureTreeNode
* node
)
132 OnigCaptureTreeNode
*clone
, *child
;
134 clone
= history_node_new();
135 CHECK_NULL_RETURN(clone
);
137 clone
->beg
= node
->beg
;
138 clone
->end
= node
->end
;
139 for (i
= 0; i
< node
->num_childs
; i
++) {
140 child
= history_tree_clone(node
->childs
[i
]);
141 if (IS_NULL(child
)) {
142 history_tree_free(clone
);
143 return (OnigCaptureTreeNode
* )0;
145 history_tree_add_child(clone
, child
);
151 extern OnigCaptureTreeNode
*
152 onig_get_capture_tree(OnigRegion
* region
)
154 return region
->history_root
;
156 #endif /* USE_CAPTURE_HISTORY */
159 onig_region_clear(OnigRegion
* region
)
163 for (i
= 0; i
< region
->num_regs
; i
++) {
164 region
->beg
[i
] = region
->end
[i
] = ONIG_REGION_NOTPOS
;
166 #ifdef USE_CAPTURE_HISTORY
167 history_root_free(region
);
172 onig_region_resize(OnigRegion
* region
, int n
)
174 region
->num_regs
= n
;
176 if (n
< ONIG_NREGION
)
179 if (region
->allocated
== 0) {
180 region
->beg
= (int* )xmalloc(n
* sizeof(int));
181 region
->end
= (int* )xmalloc(n
* sizeof(int));
183 if (region
->beg
== 0 || region
->end
== 0)
184 return ONIGERR_MEMORY
;
186 region
->allocated
= n
;
188 else if (region
->allocated
< n
) {
189 region
->beg
= (int* )xrealloc(region
->beg
, n
* sizeof(int));
190 region
->end
= (int* )xrealloc(region
->end
, n
* sizeof(int));
192 if (region
->beg
== 0 || region
->end
== 0)
193 return ONIGERR_MEMORY
;
195 region
->allocated
= n
;
202 onig_region_resize_clear(OnigRegion
* region
, int n
)
206 r
= onig_region_resize(region
, n
);
207 if (r
!= 0) return r
;
208 onig_region_clear(region
);
213 onig_region_set(OnigRegion
* region
, int at
, int beg
, int end
)
215 if (at
< 0) return ONIGERR_INVALID_ARGUMENT
;
217 if (at
>= region
->allocated
) {
218 int r
= onig_region_resize(region
, at
+ 1);
222 region
->beg
[at
] = beg
;
223 region
->end
[at
] = end
;
228 onig_region_init(OnigRegion
* region
)
230 region
->num_regs
= 0;
231 region
->allocated
= 0;
232 region
->beg
= (int* )0;
233 region
->end
= (int* )0;
234 region
->history_root
= (OnigCaptureTreeNode
* )0;
238 onig_region_new(void)
242 r
= (OnigRegion
* )xmalloc(sizeof(OnigRegion
));
248 onig_region_free(OnigRegion
* r
, int free_self
)
251 if (r
->allocated
> 0) {
252 if (r
->beg
) xfree(r
->beg
);
253 if (r
->end
) xfree(r
->end
);
256 #ifdef USE_CAPTURE_HISTORY
257 history_root_free(r
);
259 if (free_self
) xfree(r
);
264 onig_region_copy(OnigRegion
* to
, OnigRegion
* from
)
266 #define RREGC_SIZE (sizeof(int) * from->num_regs)
269 if (to
== from
) return;
271 if (to
->allocated
== 0) {
272 if (from
->num_regs
> 0) {
273 to
->beg
= (int* )xmalloc(RREGC_SIZE
);
274 to
->end
= (int* )xmalloc(RREGC_SIZE
);
275 to
->allocated
= from
->num_regs
;
278 else if (to
->allocated
< from
->num_regs
) {
279 to
->beg
= (int* )xrealloc(to
->beg
, RREGC_SIZE
);
280 to
->end
= (int* )xrealloc(to
->end
, RREGC_SIZE
);
281 to
->allocated
= from
->num_regs
;
284 for (i
= 0; i
< from
->num_regs
; i
++) {
285 to
->beg
[i
] = from
->beg
[i
];
286 to
->end
[i
] = from
->end
[i
];
288 to
->num_regs
= from
->num_regs
;
290 #ifdef USE_CAPTURE_HISTORY
291 history_root_free(to
);
293 if (IS_NOT_NULL(from
->history_root
)) {
294 to
->history_root
= history_tree_clone(from
->history_root
);
301 #define INVALID_STACK_INDEX -1
304 /* used by normal-POP */
305 #define STK_ALT 0x0001
306 #define STK_LOOK_BEHIND_NOT 0x0002
307 #define STK_POS_NOT 0x0003
308 /* handled by normal-POP */
309 #define STK_MEM_START 0x0100
310 #define STK_MEM_END 0x8200
311 #define STK_REPEAT_INC 0x0300
312 #define STK_STATE_CHECK_MARK 0x1000
313 /* avoided by normal-POP */
314 #define STK_NULL_CHECK_START 0x3000
315 #define STK_NULL_CHECK_END 0x5000 /* for recursive call */
316 #define STK_MEM_END_MARK 0x8400
317 #define STK_POS 0x0500 /* used when POP-POS */
318 #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
319 #define STK_REPEAT 0x0700
320 #define STK_CALL_FRAME 0x0800
321 #define STK_RETURN 0x0900
322 #define STK_VOID 0x0a00 /* for fill a blank */
324 /* stack type check mask */
325 #define STK_MASK_POP_USED 0x00ff
326 #define STK_MASK_TO_VOID_TARGET 0x10ff
327 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
329 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
330 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
331 (msa).stack_p = (void* )0;\
332 (msa).options = (arg_option);\
333 (msa).region = (arg_region);\
334 (msa).start = (arg_start);\
335 (msa).best_len = ONIG_MISMATCH;\
338 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
339 (msa).stack_p = (void* )0;\
340 (msa).options = (arg_option);\
341 (msa).region = (arg_region);\
342 (msa).start = (arg_start);\
346 #ifdef USE_COMBINATION_EXPLOSION_CHECK
348 #define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
350 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
351 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
352 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
353 offset = ((offset) * (state_num)) >> 3;\
354 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
355 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \
356 (msa).state_check_buff = (void* )xmalloc(size);\
358 (msa).state_check_buff = (void* )xalloca(size);\
359 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
360 (size_t )(size - (offset))); \
361 (msa).state_check_buff_size = size;\
364 (msa).state_check_buff = (void* )0;\
365 (msa).state_check_buff_size = 0;\
369 (msa).state_check_buff = (void* )0;\
370 (msa).state_check_buff_size = 0;\
374 #define MATCH_ARG_FREE(msa) do {\
375 if ((msa).stack_p) xfree((msa).stack_p);\
376 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
377 if ((msa).state_check_buff) xfree((msa).state_check_buff);\
381 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num)
382 #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
387 #define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
389 alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\
390 stk_alloc = (OnigStackType* )(msa->stack_p);\
391 stk_base = stk_alloc;\
393 stk_end = stk_base + msa->stack_n;\
396 alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\
397 + sizeof(OnigStackType) * (stack_num));\
398 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\
399 stk_base = stk_alloc;\
401 stk_end = stk_base + (stack_num);\
405 #define STACK_SAVE do{\
406 if (stk_base != stk_alloc) {\
407 msa->stack_p = stk_base;\
408 msa->stack_n = stk_end - stk_base;\
412 static unsigned int MatchStackLimitSize
= DEFAULT_MATCH_STACK_LIMIT_SIZE
;
415 onig_get_match_stack_limit_size(void)
417 return MatchStackLimitSize
;
421 onig_set_match_stack_limit_size(unsigned int size
)
423 MatchStackLimitSize
= size
;
428 stack_double(OnigStackType
** arg_stk_base
, OnigStackType
** arg_stk_end
,
429 OnigStackType
** arg_stk
, OnigStackType
* stk_alloc
, OnigMatchArg
* msa
)
432 OnigStackType
*x
, *stk_base
, *stk_end
, *stk
;
434 stk_base
= *arg_stk_base
;
435 stk_end
= *arg_stk_end
;
438 n
= stk_end
- stk_base
;
439 if (stk_base
== stk_alloc
&& IS_NULL(msa
->stack_p
)) {
440 x
= (OnigStackType
* )xmalloc(sizeof(OnigStackType
) * n
* 2);
443 return ONIGERR_MEMORY
;
445 xmemcpy(x
, stk_base
, n
* sizeof(OnigStackType
));
450 if (MatchStackLimitSize
!= 0 && n
> MatchStackLimitSize
) {
451 if ((unsigned int )(stk_end
- stk_base
) == MatchStackLimitSize
)
452 return ONIGERR_MATCH_STACK_LIMIT_OVER
;
454 n
= MatchStackLimitSize
;
456 x
= (OnigStackType
* )xrealloc(stk_base
, sizeof(OnigStackType
) * n
);
459 return ONIGERR_MEMORY
;
462 *arg_stk
= x
+ (stk
- stk_base
);
464 *arg_stk_end
= x
+ n
;
468 #define STACK_ENSURE(n) do {\
469 if (stk_end - stk < (n)) {\
470 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
471 if (r != 0) { STACK_SAVE; return r; } \
475 #define STACK_AT(index) (stk_base + (index))
476 #define GET_STACK_INDEX(stk) ((stk) - stk_base)
478 #define STACK_PUSH_TYPE(stack_type) do {\
480 stk->type = (stack_type);\
484 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
486 #ifdef USE_COMBINATION_EXPLOSION_CHECK
487 #define STATE_CHECK_POS(s,snum) \
488 (((s) - str) * num_comb_exp_check + ((snum) - 1))
489 #define STATE_CHECK_VAL(v,snum) do {\
490 if (state_check_buff != NULL) {\
491 int x = STATE_CHECK_POS(s,snum);\
492 (v) = state_check_buff[x/8] & (1<<(x%8));\
498 #define ELSE_IF_STATE_CHECK_MARK(stk) \
499 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
500 int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
501 state_check_buff[x/8] |= (1<<(x%8)); \
504 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
506 stk->type = (stack_type);\
507 stk->u.state.pcode = (pat);\
508 stk->u.state.pstr = (s);\
509 stk->u.state.pstr_prev = (sprev);\
510 stk->u.state.state_check = 0;\
514 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
515 stk->type = (stack_type);\
516 stk->u.state.pcode = (pat);\
517 stk->u.state.state_check = 0;\
521 #define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\
523 stk->type = STK_ALT;\
524 stk->u.state.pcode = (pat);\
525 stk->u.state.pstr = (s);\
526 stk->u.state.pstr_prev = (sprev);\
527 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
531 #define STACK_PUSH_STATE_CHECK(s,snum) do {\
532 if (state_check_buff != NULL) {\
534 stk->type = STK_STATE_CHECK_MARK;\
535 stk->u.state.pstr = (s);\
536 stk->u.state.state_check = (snum);\
541 #else /* USE_COMBINATION_EXPLOSION_CHECK */
543 #define ELSE_IF_STATE_CHECK_MARK(stk)
545 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
547 stk->type = (stack_type);\
548 stk->u.state.pcode = (pat);\
549 stk->u.state.pstr = (s);\
550 stk->u.state.pstr_prev = (sprev);\
554 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
555 stk->type = (stack_type);\
556 stk->u.state.pcode = (pat);\
559 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
561 #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
562 #define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)
563 #define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev)
564 #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
565 #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \
566 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev)
568 #define STACK_PUSH_REPEAT(id, pat) do {\
570 stk->type = STK_REPEAT;\
571 stk->u.repeat.num = (id);\
572 stk->u.repeat.pcode = (pat);\
573 stk->u.repeat.count = 0;\
577 #define STACK_PUSH_REPEAT_INC(sindex) do {\
579 stk->type = STK_REPEAT_INC;\
580 stk->u.repeat_inc.si = (sindex);\
584 #define STACK_PUSH_MEM_START(mnum, s) do {\
586 stk->type = STK_MEM_START;\
587 stk->u.mem.num = (mnum);\
588 stk->u.mem.pstr = (s);\
589 stk->u.mem.start = mem_start_stk[mnum];\
590 stk->u.mem.end = mem_end_stk[mnum];\
591 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
592 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
596 #define STACK_PUSH_MEM_END(mnum, s) do {\
598 stk->type = STK_MEM_END;\
599 stk->u.mem.num = (mnum);\
600 stk->u.mem.pstr = (s);\
601 stk->u.mem.start = mem_start_stk[mnum];\
602 stk->u.mem.end = mem_end_stk[mnum];\
603 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
607 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
609 stk->type = STK_MEM_END_MARK;\
610 stk->u.mem.num = (mnum);\
614 #define STACK_GET_MEM_START(mnum, k) do {\
617 while (k > stk_base) {\
619 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
620 && k->u.mem.num == (mnum)) {\
623 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
624 if (level == 0) break;\
630 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
633 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
634 if (level == 0) (start) = k->u.mem.pstr;\
637 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
640 (end) = k->u.mem.pstr;\
648 #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
650 stk->type = STK_NULL_CHECK_START;\
651 stk->u.null_check.num = (cnum);\
652 stk->u.null_check.pstr = (s);\
656 #define STACK_PUSH_NULL_CHECK_END(cnum) do {\
658 stk->type = STK_NULL_CHECK_END;\
659 stk->u.null_check.num = (cnum);\
663 #define STACK_PUSH_CALL_FRAME(pat) do {\
665 stk->type = STK_CALL_FRAME;\
666 stk->u.call_frame.ret_addr = (pat);\
670 #define STACK_PUSH_RETURN do {\
672 stk->type = STK_RETURN;\
678 #define STACK_BASE_CHECK(p, at) \
679 if ((p) < stk_base) {\
680 fprintf(stderr, "at %s\n", at);\
684 #define STACK_BASE_CHECK(p, at)
687 #define STACK_POP_ONE do {\
689 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
692 #define STACK_POP do {\
693 switch (pop_level) {\
694 case STACK_POP_LEVEL_FREE:\
697 STACK_BASE_CHECK(stk, "STACK_POP"); \
698 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
699 ELSE_IF_STATE_CHECK_MARK(stk);\
702 case STACK_POP_LEVEL_MEM_START:\
705 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
706 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
707 else if (stk->type == STK_MEM_START) {\
708 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
709 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
711 ELSE_IF_STATE_CHECK_MARK(stk);\
717 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
718 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
719 else if (stk->type == STK_MEM_START) {\
720 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
721 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
723 else if (stk->type == STK_REPEAT_INC) {\
724 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
726 else if (stk->type == STK_MEM_END) {\
727 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
728 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
730 ELSE_IF_STATE_CHECK_MARK(stk);\
736 #define STACK_POP_TIL_POS_NOT do {\
739 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
740 if (stk->type == STK_POS_NOT) break;\
741 else if (stk->type == STK_MEM_START) {\
742 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
743 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
745 else if (stk->type == STK_REPEAT_INC) {\
746 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
748 else if (stk->type == STK_MEM_END) {\
749 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
750 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
752 ELSE_IF_STATE_CHECK_MARK(stk);\
756 #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
759 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
760 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
761 else if (stk->type == STK_MEM_START) {\
762 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
763 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
765 else if (stk->type == STK_REPEAT_INC) {\
766 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
768 else if (stk->type == STK_MEM_END) {\
769 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
770 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
772 ELSE_IF_STATE_CHECK_MARK(stk);\
776 #define STACK_POS_END(k) do {\
780 STACK_BASE_CHECK(k, "STACK_POS_END"); \
781 if (IS_TO_VOID_TARGET(k)) {\
784 else if (k->type == STK_POS) {\
791 #define STACK_STOP_BT_END do {\
792 OnigStackType *k = stk;\
795 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
796 if (IS_TO_VOID_TARGET(k)) {\
799 else if (k->type == STK_STOP_BT) {\
806 #define STACK_NULL_CHECK(isnull,id,s) do {\
807 OnigStackType* k = stk;\
810 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
811 if (k->type == STK_NULL_CHECK_START) {\
812 if (k->u.null_check.num == (id)) {\
813 (isnull) = (k->u.null_check.pstr == (s));\
820 #define STACK_NULL_CHECK_REC(isnull,id,s) do {\
822 OnigStackType* k = stk;\
825 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
826 if (k->type == STK_NULL_CHECK_START) {\
827 if (k->u.null_check.num == (id)) {\
829 (isnull) = (k->u.null_check.pstr == (s));\
835 else if (k->type == STK_NULL_CHECK_END) {\
841 #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
842 OnigStackType* k = stk;\
845 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
846 if (k->type == STK_NULL_CHECK_START) {\
847 if (k->u.null_check.num == (id)) {\
848 if (k->u.null_check.pstr != (s)) {\
856 if (k->type == STK_MEM_START) {\
857 if (k->u.mem.end == INVALID_STACK_INDEX) {\
858 (isnull) = 0; break;\
860 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
861 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
863 endp = (UChar* )k->u.mem.end;\
864 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
865 (isnull) = 0; break;\
867 else if (endp != s) {\
868 (isnull) = -1; /* empty, but position changed */ \
880 #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
882 OnigStackType* k = stk;\
885 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
886 if (k->type == STK_NULL_CHECK_START) {\
887 if (k->u.null_check.num == (id)) {\
889 if (k->u.null_check.pstr != (s)) {\
897 if (k->type == STK_MEM_START) {\
898 if (k->u.mem.end == INVALID_STACK_INDEX) {\
899 (isnull) = 0; break;\
901 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
902 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
904 endp = (UChar* )k->u.mem.end;\
905 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
906 (isnull) = 0; break;\
908 else if (endp != s) {\
909 (isnull) = -1; /* empty, but position changed */ \
922 else if (k->type == STK_NULL_CHECK_END) {\
923 if (k->u.null_check.num == (id)) level++;\
928 #define STACK_GET_REPEAT(id, k) do {\
933 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
934 if (k->type == STK_REPEAT) {\
936 if (k->u.repeat.num == (id)) {\
941 else if (k->type == STK_CALL_FRAME) level--;\
942 else if (k->type == STK_RETURN) level++;\
946 #define STACK_RETURN(addr) do {\
948 OnigStackType* k = stk;\
951 STACK_BASE_CHECK(k, "STACK_RETURN"); \
952 if (k->type == STK_CALL_FRAME) {\
954 (addr) = k->u.call_frame.ret_addr;\
959 else if (k->type == STK_RETURN)\
965 #define STRING_CMP(s1,s2,len) do {\
967 if (*s1++ != *s2++) goto fail;\
971 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\
972 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
976 static int string_cmp_ic(OnigEncoding enc
, int case_fold_flag
,
977 UChar
* s1
, UChar
** ps2
, int mblen
)
979 UChar buf1
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
980 UChar buf2
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
981 UChar
*p1
, *p2
, *end1
, *s2
, *end2
;
988 len1
= ONIGENC_MBC_CASE_FOLD(enc
, case_fold_flag
, &s1
, end1
, buf1
);
989 len2
= ONIGENC_MBC_CASE_FOLD(enc
, case_fold_flag
, &s2
, end2
, buf2
);
990 if (len1
!= len2
) return 0;
994 if (*p1
!= *p2
) return 0;
1004 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1006 while (len-- > 0) {\
1007 if (*s1++ != *s2++) {\
1008 is_fail = 1; break;\
1013 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\
1014 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
1021 #define IS_EMPTY_STR (str == end)
1022 #define ON_STR_BEGIN(s) ((s) == str)
1023 #define ON_STR_END(s) ((s) == end)
1024 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1025 #define DATA_ENSURE_CHECK1 (s < right_range)
1026 #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1027 #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1029 #define DATA_ENSURE_CHECK1 (s < end)
1030 #define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1031 #define DATA_ENSURE(n) if (s + (n) > end) goto fail
1032 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1035 #ifdef USE_CAPTURE_HISTORY
1037 make_capture_history_tree(OnigCaptureTreeNode
* node
, OnigStackType
** kp
,
1038 OnigStackType
* stk_top
, UChar
* str
, regex_t
* reg
)
1041 OnigCaptureTreeNode
* child
;
1042 OnigStackType
* k
= *kp
;
1044 while (k
< stk_top
) {
1045 if (k
->type
== STK_MEM_START
) {
1047 if (n
<= ONIG_MAX_CAPTURE_HISTORY_GROUP
&&
1048 BIT_STATUS_AT(reg
->capture_history
, n
) != 0) {
1049 child
= history_node_new();
1050 CHECK_NULL_RETURN_MEMERR(child
);
1052 child
->beg
= (int )(k
->u
.mem
.pstr
- str
);
1053 r
= history_tree_add_child(node
, child
);
1054 if (r
!= 0) return r
;
1056 r
= make_capture_history_tree(child
, kp
, stk_top
, str
, reg
);
1057 if (r
!= 0) return r
;
1060 child
->end
= (int )(k
->u
.mem
.pstr
- str
);
1063 else if (k
->type
== STK_MEM_END
) {
1064 if (k
->u
.mem
.num
== node
->group
) {
1065 node
->end
= (int )(k
->u
.mem
.pstr
- str
);
1073 return 1; /* 1: root node ending. */
1077 #ifdef USE_BACKREF_WITH_LEVEL
1078 static int mem_is_in_memp(int mem
, int num
, UChar
* memp
)
1083 for (i
= 0; i
< num
; i
++) {
1084 GET_MEMNUM_INC(m
, memp
);
1085 if (mem
== (int )m
) return 1;
1090 static int backref_match_at_nested_level(regex_t
* reg
1091 , OnigStackType
* top
, OnigStackType
* stk_base
1092 , int ignore_case
, int case_fold_flag
1093 , int nest
, int mem_num
, UChar
* memp
, UChar
** s
, const UChar
* send
)
1095 UChar
*ss
, *p
, *pstart
, *pend
= NULL_UCHARP
;
1102 while (k
>= stk_base
) {
1103 if (k
->type
== STK_CALL_FRAME
) {
1106 else if (k
->type
== STK_RETURN
) {
1109 else if (level
== nest
) {
1110 if (k
->type
== STK_MEM_START
) {
1111 if (mem_is_in_memp(k
->u
.mem
.num
, mem_num
, memp
)) {
1112 pstart
= k
->u
.mem
.pstr
;
1113 if (pend
!= NULL_UCHARP
) {
1114 if (pend
- pstart
> send
- *s
) return 0; /* or goto next_mem; */
1118 if (ignore_case
!= 0) {
1119 if (string_cmp_ic(reg
->enc
, case_fold_flag
,
1120 pstart
, &ss
, (int )(pend
- pstart
)) == 0)
1121 return 0; /* or goto next_mem; */
1125 if (*p
++ != *ss
++) return 0; /* or goto next_mem; */
1134 else if (k
->type
== STK_MEM_END
) {
1135 if (mem_is_in_memp(k
->u
.mem
.num
, mem_num
, memp
)) {
1136 pend
= k
->u
.mem
.pstr
;
1145 #endif /* USE_BACKREF_WITH_LEVEL */
1148 #ifdef ONIG_DEBUG_STATISTICS
1150 #define USE_TIMEOFDAY
1152 #ifdef USE_TIMEOFDAY
1153 #ifdef HAVE_SYS_TIME_H
1154 #include <sys/time.h>
1156 #ifdef HAVE_UNISTD_H
1159 static struct timeval ts
, te
;
1160 #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
1161 #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
1162 (((te).tv_sec - (ts).tv_sec)*1000000))
1164 #ifdef HAVE_SYS_TIMES_H
1165 #include <sys/times.h>
1167 static struct tms ts
, te
;
1168 #define GETTIME(t) times(&(t))
1169 #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
1172 static int OpCounter
[256];
1173 static int OpPrevCounter
[256];
1174 static unsigned long OpTime
[256];
1175 static int OpCurr
= OP_FINISH
;
1176 static int OpPrevTarget
= OP_FAIL
;
1177 static int MaxStackDepth
= 0;
1179 #define MOP_IN(opcode) do {\
1180 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
1182 OpCounter[opcode]++;\
1186 #define MOP_OUT do {\
1188 OpTime[OpCurr] += TIMEDIFF(te, ts);\
1192 onig_statistics_init(void)
1195 for (i
= 0; i
< 256; i
++) {
1196 OpCounter
[i
] = OpPrevCounter
[i
] = 0; OpTime
[i
] = 0;
1202 onig_print_statistics(FILE* f
)
1205 fprintf(f
, " count prev time\n");
1206 for (i
= 0; OnigOpInfo
[i
].opcode
>= 0; i
++) {
1207 fprintf(f
, "%8d: %8d: %10ld: %s\n",
1208 OpCounter
[i
], OpPrevCounter
[i
], OpTime
[i
], OnigOpInfo
[i
].name
);
1210 fprintf(f
, "\nmax stack depth: %d\n", MaxStackDepth
);
1213 #define STACK_INC do {\
1215 if (stk - stk_base > MaxStackDepth) \
1216 MaxStackDepth = stk - stk_base;\
1220 #define STACK_INC stk++
1222 #define MOP_IN(opcode)
1227 /* matching region of POSIX API */
1228 typedef int regoff_t
;
1235 /* match data(str - end) from position (sstart). */
1236 /* if sstart == str then set sprev to NULL. */
1238 match_at(regex_t
* reg
, const UChar
* str
, const UChar
* end
,
1239 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1240 const UChar
* right_range
,
1242 const UChar
* sstart
, UChar
* sprev
, OnigMatchArg
* msa
)
1244 static UChar FinishCode
[] = { OP_FINISH
};
1246 int i
, n
, num_mem
, best_len
, pop_level
;
1247 LengthType tlen
, tlen2
;
1250 OnigOptionType option
= reg
->options
;
1251 OnigEncoding encode
= reg
->enc
;
1252 OnigCaseFoldType case_fold_flag
= reg
->case_fold_flag
;
1253 UChar
*s
, *q
, *sbegin
;
1256 OnigStackType
*stk_alloc
, *stk_base
, *stk
, *stk_end
;
1257 OnigStackType
*stkp
; /* used as any purpose. */
1259 OnigStackIndex
*repeat_stk
;
1260 OnigStackIndex
*mem_start_stk
, *mem_end_stk
;
1261 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1263 unsigned char* state_check_buff
= msa
->state_check_buff
;
1264 int num_comb_exp_check
= reg
->num_comb_exp_check
;
1266 n
= reg
->num_repeat
+ reg
->num_mem
* 2;
1268 STACK_INIT(alloca_base
, n
, INIT_MATCH_STACK_SIZE
);
1269 pop_level
= reg
->stack_pop_level
;
1270 num_mem
= reg
->num_mem
;
1271 repeat_stk
= (OnigStackIndex
* )alloca_base
;
1273 mem_start_stk
= (OnigStackIndex
* )(repeat_stk
+ reg
->num_repeat
);
1274 mem_end_stk
= mem_start_stk
+ num_mem
;
1275 mem_start_stk
--; /* for index start from 1,
1276 mem_start_stk[1]..mem_start_stk[num_mem] */
1277 mem_end_stk
--; /* for index start from 1,
1278 mem_end_stk[1]..mem_end_stk[num_mem] */
1279 for (i
= 1; i
<= num_mem
; i
++) {
1280 mem_start_stk
[i
] = mem_end_stk
[i
] = INVALID_STACK_INDEX
;
1283 #ifdef ONIG_DEBUG_MATCH
1284 fprintf(stderr
, "match_at: str: %d, end: %d, start: %d, sprev: %d\n",
1285 (int )str
, (int )end
, (int )sstart
, (int )sprev
);
1286 fprintf(stderr
, "size: %d, start offset: %d\n",
1287 (int )(end
- str
), (int )(sstart
- str
));
1290 STACK_PUSH_ENSURED(STK_ALT
, FinishCode
); /* bottom stack */
1291 best_len
= ONIG_MISMATCH
;
1292 s
= (UChar
* )sstart
;
1294 #ifdef ONIG_DEBUG_MATCH
1296 UChar
*q
, *bp
, buf
[50];
1298 fprintf(stderr
, "%4d> \"", (int )(s
- str
));
1300 for (i
= 0, q
= s
; i
< 7 && q
< end
; i
++) {
1301 len
= enclen(encode
, q
);
1302 while (len
-- > 0) *bp
++ = *q
++;
1304 if (q
< end
) { xmemcpy(bp
, "...\"", 4); bp
+= 4; }
1305 else { xmemcpy(bp
, "\"", 1); bp
+= 1; }
1307 fputs((char* )buf
, stderr
);
1308 for (i
= 0; i
< 20 - (bp
- buf
); i
++) fputc(' ', stderr
);
1309 onig_print_compiled_byte_code(stderr
, p
, NULL
, encode
);
1310 fprintf(stderr
, "\n");
1316 case OP_END
: MOP_IN(OP_END
);
1320 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1321 if (IS_FIND_LONGEST(option
)) {
1322 if (n
> msa
->best_len
) {
1324 msa
->best_s
= (UChar
* )sstart
;
1331 region
= msa
->region
;
1333 #ifdef USE_POSIX_API_REGION_OPTION
1334 if (IS_POSIX_REGION(msa
->options
)) {
1335 posix_regmatch_t
* rmt
= (posix_regmatch_t
* )region
;
1337 rmt
[0].rm_so
= sstart
- str
;
1338 rmt
[0].rm_eo
= s
- str
;
1339 for (i
= 1; i
<= num_mem
; i
++) {
1340 if (mem_end_stk
[i
] != INVALID_STACK_INDEX
) {
1341 if (BIT_STATUS_AT(reg
->bt_mem_start
, i
))
1342 rmt
[i
].rm_so
= STACK_AT(mem_start_stk
[i
])->u
.mem
.pstr
- str
;
1344 rmt
[i
].rm_so
= (UChar
* )((void* )(mem_start_stk
[i
])) - str
;
1346 rmt
[i
].rm_eo
= (BIT_STATUS_AT(reg
->bt_mem_end
, i
)
1347 ? STACK_AT(mem_end_stk
[i
])->u
.mem
.pstr
1348 : (UChar
* )((void* )mem_end_stk
[i
])) - str
;
1351 rmt
[i
].rm_so
= rmt
[i
].rm_eo
= ONIG_REGION_NOTPOS
;
1356 #endif /* USE_POSIX_API_REGION_OPTION */
1357 region
->beg
[0] = sstart
- str
;
1358 region
->end
[0] = s
- str
;
1359 for (i
= 1; i
<= num_mem
; i
++) {
1360 if (mem_end_stk
[i
] != INVALID_STACK_INDEX
) {
1361 if (BIT_STATUS_AT(reg
->bt_mem_start
, i
))
1362 region
->beg
[i
] = STACK_AT(mem_start_stk
[i
])->u
.mem
.pstr
- str
;
1364 region
->beg
[i
] = (UChar
* )((void* )mem_start_stk
[i
]) - str
;
1366 region
->end
[i
] = (BIT_STATUS_AT(reg
->bt_mem_end
, i
)
1367 ? STACK_AT(mem_end_stk
[i
])->u
.mem
.pstr
1368 : (UChar
* )((void* )mem_end_stk
[i
])) - str
;
1371 region
->beg
[i
] = region
->end
[i
] = ONIG_REGION_NOTPOS
;
1375 #ifdef USE_CAPTURE_HISTORY
1376 if (reg
->capture_history
!= 0) {
1378 OnigCaptureTreeNode
* node
;
1380 if (IS_NULL(region
->history_root
)) {
1381 region
->history_root
= node
= history_node_new();
1382 CHECK_NULL_RETURN_MEMERR(node
);
1385 node
= region
->history_root
;
1386 history_tree_clear(node
);
1390 node
->beg
= sstart
- str
;
1391 node
->end
= s
- str
;
1394 r
= make_capture_history_tree(region
->history_root
, &stkp
,
1395 stk
, (UChar
* )str
, reg
);
1397 best_len
= r
; /* error code */
1401 #endif /* USE_CAPTURE_HISTORY */
1402 #ifdef USE_POSIX_API_REGION_OPTION
1403 } /* else IS_POSIX_REGION() */
1406 } /* n > best_len */
1408 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1413 if (IS_FIND_CONDITION(option
)) {
1414 if (IS_FIND_NOT_EMPTY(option
) && s
== sstart
) {
1415 best_len
= ONIG_MISMATCH
;
1416 goto fail
; /* for retry */
1418 if (IS_FIND_LONGEST(option
) && DATA_ENSURE_CHECK1
) {
1419 goto fail
; /* for retry */
1423 /* default behavior: return first-matching result. */
1427 case OP_EXACT1
: MOP_IN(OP_EXACT1
);
1430 if (*p
!= *s
) goto fail
;
1433 if (*p
!= *s
++) goto fail
;
1439 case OP_EXACT1_IC
: MOP_IN(OP_EXACT1_IC
);
1442 UChar
*q
, lowbuf
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
1445 len
= ONIGENC_MBC_CASE_FOLD(encode
,
1446 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1461 case OP_EXACT2
: MOP_IN(OP_EXACT2
);
1463 if (*p
!= *s
) goto fail
;
1465 if (*p
!= *s
) goto fail
;
1472 case OP_EXACT3
: MOP_IN(OP_EXACT3
);
1474 if (*p
!= *s
) goto fail
;
1476 if (*p
!= *s
) goto fail
;
1478 if (*p
!= *s
) goto fail
;
1485 case OP_EXACT4
: MOP_IN(OP_EXACT4
);
1487 if (*p
!= *s
) goto fail
;
1489 if (*p
!= *s
) goto fail
;
1491 if (*p
!= *s
) goto fail
;
1493 if (*p
!= *s
) goto fail
;
1500 case OP_EXACT5
: MOP_IN(OP_EXACT5
);
1502 if (*p
!= *s
) goto fail
;
1504 if (*p
!= *s
) goto fail
;
1506 if (*p
!= *s
) goto fail
;
1508 if (*p
!= *s
) goto fail
;
1510 if (*p
!= *s
) goto fail
;
1517 case OP_EXACTN
: MOP_IN(OP_EXACTN
);
1518 GET_LENGTH_INC(tlen
, p
);
1520 while (tlen
-- > 0) {
1521 if (*p
++ != *s
++) goto fail
;
1528 case OP_EXACTN_IC
: MOP_IN(OP_EXACTN_IC
);
1531 UChar
*q
, *endp
, lowbuf
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
1533 GET_LENGTH_INC(tlen
, p
);
1539 len
= ONIGENC_MBC_CASE_FOLD(encode
,
1540 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1546 if (*p
!= *q
) goto fail
;
1556 case OP_EXACTMB2N1
: MOP_IN(OP_EXACTMB2N1
);
1558 if (*p
!= *s
) goto fail
;
1560 if (*p
!= *s
) goto fail
;
1565 case OP_EXACTMB2N2
: MOP_IN(OP_EXACTMB2N2
);
1567 if (*p
!= *s
) goto fail
;
1569 if (*p
!= *s
) goto fail
;
1572 if (*p
!= *s
) goto fail
;
1574 if (*p
!= *s
) goto fail
;
1580 case OP_EXACTMB2N3
: MOP_IN(OP_EXACTMB2N3
);
1582 if (*p
!= *s
) goto fail
;
1584 if (*p
!= *s
) goto fail
;
1586 if (*p
!= *s
) goto fail
;
1588 if (*p
!= *s
) goto fail
;
1591 if (*p
!= *s
) goto fail
;
1593 if (*p
!= *s
) goto fail
;
1599 case OP_EXACTMB2N
: MOP_IN(OP_EXACTMB2N
);
1600 GET_LENGTH_INC(tlen
, p
);
1601 DATA_ENSURE(tlen
* 2);
1602 while (tlen
-- > 0) {
1603 if (*p
!= *s
) goto fail
;
1605 if (*p
!= *s
) goto fail
;
1613 case OP_EXACTMB3N
: MOP_IN(OP_EXACTMB3N
);
1614 GET_LENGTH_INC(tlen
, p
);
1615 DATA_ENSURE(tlen
* 3);
1616 while (tlen
-- > 0) {
1617 if (*p
!= *s
) goto fail
;
1619 if (*p
!= *s
) goto fail
;
1621 if (*p
!= *s
) goto fail
;
1629 case OP_EXACTMBN
: MOP_IN(OP_EXACTMBN
);
1630 GET_LENGTH_INC(tlen
, p
); /* mb-len */
1631 GET_LENGTH_INC(tlen2
, p
); /* string len */
1634 while (tlen2
-- > 0) {
1635 if (*p
!= *s
) goto fail
;
1643 case OP_CCLASS
: MOP_IN(OP_CCLASS
);
1645 if (BITSET_AT(((BitSetRef
)p
), *s
) == 0) goto fail
;
1647 s
+= enclen(encode
, s
, end
); /* OP_CCLASS can match mb-code. \D, \S */
1651 case OP_CCLASS_MB
: MOP_IN(OP_CCLASS_MB
);
1652 if (! ONIGENC_IS_MBC_HEAD(encode
, s
, end
)) goto fail
;
1655 GET_LENGTH_INC(tlen
, p
);
1662 mb_len
= enclen(encode
, s
, end
);
1663 DATA_ENSURE(mb_len
);
1666 code
= ONIGENC_MBC_TO_CODE(encode
, ss
, s
);
1668 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1669 if (! onig_is_in_code_range(p
, code
)) goto fail
;
1673 if (! onig_is_in_code_range(q
, code
)) goto fail
;
1680 case OP_CCLASS_MIX
: MOP_IN(OP_CCLASS_MIX
);
1682 if (ONIGENC_IS_MBC_HEAD(encode
, s
, end
)) {
1687 if (BITSET_AT(((BitSetRef
)p
), *s
) == 0)
1691 GET_LENGTH_INC(tlen
, p
);
1698 case OP_CCLASS_NOT
: MOP_IN(OP_CCLASS_NOT
);
1700 if (BITSET_AT(((BitSetRef
)p
), *s
) != 0) goto fail
;
1702 s
+= enclen(encode
, s
, end
);
1706 case OP_CCLASS_MB_NOT
: MOP_IN(OP_CCLASS_MB_NOT
);
1708 if (! ONIGENC_IS_MBC_HEAD(encode
, s
, end
)) {
1710 GET_LENGTH_INC(tlen
, p
);
1712 goto cc_mb_not_success
;
1716 GET_LENGTH_INC(tlen
, p
);
1720 int mb_len
= enclen(encode
, s
, end
);
1722 if (! DATA_ENSURE_CHECK(mb_len
)) {
1726 goto cc_mb_not_success
;
1731 code
= ONIGENC_MBC_TO_CODE(encode
, ss
, s
);
1733 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1734 if (onig_is_in_code_range(p
, code
)) goto fail
;
1738 if (onig_is_in_code_range(q
, code
)) goto fail
;
1747 case OP_CCLASS_MIX_NOT
: MOP_IN(OP_CCLASS_MIX_NOT
);
1749 if (ONIGENC_IS_MBC_HEAD(encode
, s
, end
)) {
1754 if (BITSET_AT(((BitSetRef
)p
), *s
) != 0)
1758 GET_LENGTH_INC(tlen
, p
);
1765 case OP_CCLASS_NODE
: MOP_IN(OP_CCLASS_NODE
);
1773 GET_POINTER_INC(node
, p
);
1774 mb_len
= enclen(encode
, s
, end
);
1778 code
= ONIGENC_MBC_TO_CODE(encode
, ss
, s
);
1779 if (onig_is_code_in_cc_len(mb_len
, code
, node
) == 0) goto fail
;
1784 case OP_ANYCHAR
: MOP_IN(OP_ANYCHAR
);
1786 n
= enclen(encode
, s
, end
);
1788 if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
)) goto fail
;
1793 case OP_ANYCHAR_ML
: MOP_IN(OP_ANYCHAR_ML
);
1795 n
= enclen(encode
, s
, end
);
1801 case OP_ANYCHAR_STAR
: MOP_IN(OP_ANYCHAR_STAR
);
1802 while (DATA_ENSURE_CHECK1
) {
1803 STACK_PUSH_ALT(p
, s
, sprev
);
1804 n
= enclen(encode
, s
, end
);
1806 if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
)) goto fail
;
1813 case OP_ANYCHAR_ML_STAR
: MOP_IN(OP_ANYCHAR_ML_STAR
);
1814 while (DATA_ENSURE_CHECK1
) {
1815 STACK_PUSH_ALT(p
, s
, sprev
);
1816 n
= enclen(encode
, s
, end
);
1830 case OP_ANYCHAR_STAR_PEEK_NEXT
: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT
);
1831 while (DATA_ENSURE_CHECK1
) {
1833 STACK_PUSH_ALT(p
+ 1, s
, sprev
);
1835 n
= enclen(encode
, s
, end
);
1837 if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
)) goto fail
;
1845 case OP_ANYCHAR_ML_STAR_PEEK_NEXT
:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT
);
1846 while (DATA_ENSURE_CHECK1
) {
1848 STACK_PUSH_ALT(p
+ 1, s
, sprev
);
1850 n
= enclen(encode
, s
, end
);
1865 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1866 case OP_STATE_CHECK_ANYCHAR_STAR
: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR
);
1867 GET_STATE_CHECK_NUM_INC(mem
, p
);
1868 while (DATA_ENSURE_CHECK1
) {
1869 STATE_CHECK_VAL(scv
, mem
);
1872 STACK_PUSH_ALT_WITH_STATE_CHECK(p
, s
, sprev
, mem
);
1873 n
= enclen(encode
, s
);
1875 if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
)) goto fail
;
1882 case OP_STATE_CHECK_ANYCHAR_ML_STAR
:
1883 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR
);
1885 GET_STATE_CHECK_NUM_INC(mem
, p
);
1886 while (DATA_ENSURE_CHECK1
) {
1887 STATE_CHECK_VAL(scv
, mem
);
1890 STACK_PUSH_ALT_WITH_STATE_CHECK(p
, s
, sprev
, mem
);
1891 n
= enclen(encode
, s
);
1904 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1906 case OP_WORD
: MOP_IN(OP_WORD
);
1908 if (! ONIGENC_IS_MBC_WORD(encode
, s
, end
))
1911 s
+= enclen(encode
, s
, end
);
1915 case OP_NOT_WORD
: MOP_IN(OP_NOT_WORD
);
1917 if (ONIGENC_IS_MBC_WORD(encode
, s
, end
))
1920 s
+= enclen(encode
, s
, end
);
1924 case OP_WORD_BOUND
: MOP_IN(OP_WORD_BOUND
);
1925 if (ON_STR_BEGIN(s
)) {
1927 if (! ONIGENC_IS_MBC_WORD(encode
, s
, end
))
1930 else if (ON_STR_END(s
)) {
1931 if (! ONIGENC_IS_MBC_WORD(encode
, sprev
, end
))
1935 if (ONIGENC_IS_MBC_WORD(encode
, s
, end
)
1936 == ONIGENC_IS_MBC_WORD(encode
, sprev
, end
))
1943 case OP_NOT_WORD_BOUND
: MOP_IN(OP_NOT_WORD_BOUND
);
1944 if (ON_STR_BEGIN(s
)) {
1945 if (DATA_ENSURE_CHECK1
&& ONIGENC_IS_MBC_WORD(encode
, s
, end
))
1948 else if (ON_STR_END(s
)) {
1949 if (ONIGENC_IS_MBC_WORD(encode
, sprev
, end
))
1953 if (ONIGENC_IS_MBC_WORD(encode
, s
, end
)
1954 != ONIGENC_IS_MBC_WORD(encode
, sprev
, end
))
1961 #ifdef USE_WORD_BEGIN_END
1962 case OP_WORD_BEGIN
: MOP_IN(OP_WORD_BEGIN
);
1963 if (DATA_ENSURE_CHECK1
&& ONIGENC_IS_MBC_WORD(encode
, s
, end
)) {
1964 if (ON_STR_BEGIN(s
) || !ONIGENC_IS_MBC_WORD(encode
, sprev
, end
)) {
1972 case OP_WORD_END
: MOP_IN(OP_WORD_END
);
1973 if (!ON_STR_BEGIN(s
) && ONIGENC_IS_MBC_WORD(encode
, sprev
, end
)) {
1974 if (ON_STR_END(s
) || !ONIGENC_IS_MBC_WORD(encode
, s
, end
)) {
1983 case OP_BEGIN_BUF
: MOP_IN(OP_BEGIN_BUF
);
1984 if (! ON_STR_BEGIN(s
)) goto fail
;
1990 case OP_END_BUF
: MOP_IN(OP_END_BUF
);
1991 if (! ON_STR_END(s
)) goto fail
;
1997 case OP_BEGIN_LINE
: MOP_IN(OP_BEGIN_LINE
);
1998 if (ON_STR_BEGIN(s
)) {
1999 if (IS_NOTBOL(msa
->options
)) goto fail
;
2003 else if (ONIGENC_IS_MBC_NEWLINE(encode
, sprev
, end
) && !ON_STR_END(s
)) {
2010 case OP_END_LINE
: MOP_IN(OP_END_LINE
);
2011 if (ON_STR_END(s
)) {
2012 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2013 if (IS_EMPTY_STR
|| !ONIGENC_IS_MBC_NEWLINE(encode
, sprev
, end
)) {
2015 if (IS_NOTEOL(msa
->options
)) goto fail
;
2018 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2022 else if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
)) {
2026 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2027 else if (ONIGENC_IS_MBC_CRNL(encode
, s
, end
)) {
2035 case OP_SEMI_END_BUF
: MOP_IN(OP_SEMI_END_BUF
);
2036 if (ON_STR_END(s
)) {
2037 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2038 if (IS_EMPTY_STR
|| !ONIGENC_IS_MBC_NEWLINE(encode
, sprev
, end
)) {
2040 if (IS_NOTEOL(msa
->options
)) goto fail
;
2043 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2047 else if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
) &&
2048 ON_STR_END(s
+ enclen(encode
, s
, end
))) {
2052 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2053 else if (ONIGENC_IS_MBC_CRNL(encode
, s
, end
)) {
2054 UChar
* ss
= s
+ enclen(encode
, s
);
2055 ss
+= enclen(encode
, ss
);
2056 if (ON_STR_END(ss
)) {
2065 case OP_BEGIN_POSITION
: MOP_IN(OP_BEGIN_POSITION
);
2066 if (s
!= msa
->start
)
2073 case OP_MEMORY_START_PUSH
: MOP_IN(OP_MEMORY_START_PUSH
);
2074 GET_MEMNUM_INC(mem
, p
);
2075 STACK_PUSH_MEM_START(mem
, s
);
2080 case OP_MEMORY_START
: MOP_IN(OP_MEMORY_START
);
2081 GET_MEMNUM_INC(mem
, p
);
2082 mem_start_stk
[mem
] = (OnigStackIndex
)((void* )s
);
2087 case OP_MEMORY_END_PUSH
: MOP_IN(OP_MEMORY_END_PUSH
);
2088 GET_MEMNUM_INC(mem
, p
);
2089 STACK_PUSH_MEM_END(mem
, s
);
2094 case OP_MEMORY_END
: MOP_IN(OP_MEMORY_END
);
2095 GET_MEMNUM_INC(mem
, p
);
2096 mem_end_stk
[mem
] = (OnigStackIndex
)((void* )s
);
2101 #ifdef USE_SUBEXP_CALL
2102 case OP_MEMORY_END_PUSH_REC
: MOP_IN(OP_MEMORY_END_PUSH_REC
);
2103 GET_MEMNUM_INC(mem
, p
);
2104 STACK_GET_MEM_START(mem
, stkp
); /* should be before push mem-end. */
2105 STACK_PUSH_MEM_END(mem
, s
);
2106 mem_start_stk
[mem
] = GET_STACK_INDEX(stkp
);
2111 case OP_MEMORY_END_REC
: MOP_IN(OP_MEMORY_END_REC
);
2112 GET_MEMNUM_INC(mem
, p
);
2113 mem_end_stk
[mem
] = (OnigStackIndex
)((void* )s
);
2114 STACK_GET_MEM_START(mem
, stkp
);
2116 if (BIT_STATUS_AT(reg
->bt_mem_start
, mem
))
2117 mem_start_stk
[mem
] = GET_STACK_INDEX(stkp
);
2119 mem_start_stk
[mem
] = (OnigStackIndex
)((void* )stkp
->u
.mem
.pstr
);
2121 STACK_PUSH_MEM_END_MARK(mem
);
2127 case OP_BACKREF1
: MOP_IN(OP_BACKREF1
);
2132 case OP_BACKREF2
: MOP_IN(OP_BACKREF2
);
2137 case OP_BACKREFN
: MOP_IN(OP_BACKREFN
);
2138 GET_MEMNUM_INC(mem
, p
);
2142 UChar
*pstart
, *pend
;
2144 /* if you want to remove following line,
2145 you should check in parse and compile time. */
2146 if (mem
> num_mem
) goto fail
;
2147 if (mem_end_stk
[mem
] == INVALID_STACK_INDEX
) goto fail
;
2148 if (mem_start_stk
[mem
] == INVALID_STACK_INDEX
) goto fail
;
2150 if (BIT_STATUS_AT(reg
->bt_mem_start
, mem
))
2151 pstart
= STACK_AT(mem_start_stk
[mem
])->u
.mem
.pstr
;
2153 pstart
= (UChar
* )((void* )mem_start_stk
[mem
]);
2155 pend
= (BIT_STATUS_AT(reg
->bt_mem_end
, mem
)
2156 ? STACK_AT(mem_end_stk
[mem
])->u
.mem
.pstr
2157 : (UChar
* )((void* )mem_end_stk
[mem
]));
2161 STRING_CMP(pstart
, s
, n
);
2162 while (sprev
+ (len
= enclen(encode
, sprev
, end
)) < s
)
2170 case OP_BACKREFN_IC
: MOP_IN(OP_BACKREFN_IC
);
2171 GET_MEMNUM_INC(mem
, p
);
2174 UChar
*pstart
, *pend
;
2176 /* if you want to remove following line,
2177 you should check in parse and compile time. */
2178 if (mem
> num_mem
) goto fail
;
2179 if (mem_end_stk
[mem
] == INVALID_STACK_INDEX
) goto fail
;
2180 if (mem_start_stk
[mem
] == INVALID_STACK_INDEX
) goto fail
;
2182 if (BIT_STATUS_AT(reg
->bt_mem_start
, mem
))
2183 pstart
= STACK_AT(mem_start_stk
[mem
])->u
.mem
.pstr
;
2185 pstart
= (UChar
* )((void* )mem_start_stk
[mem
]);
2187 pend
= (BIT_STATUS_AT(reg
->bt_mem_end
, mem
)
2188 ? STACK_AT(mem_end_stk
[mem
])->u
.mem
.pstr
2189 : (UChar
* )((void* )mem_end_stk
[mem
]));
2193 STRING_CMP_IC(case_fold_flag
, pstart
, &s
, n
);
2194 while (sprev
+ (len
= enclen(encode
, sprev
, end
)) < s
)
2202 case OP_BACKREF_MULTI
: MOP_IN(OP_BACKREF_MULTI
);
2205 UChar
*pstart
, *pend
, *swork
;
2207 GET_LENGTH_INC(tlen
, p
);
2208 for (i
= 0; i
< tlen
; i
++) {
2209 GET_MEMNUM_INC(mem
, p
);
2211 if (mem_end_stk
[mem
] == INVALID_STACK_INDEX
) continue;
2212 if (mem_start_stk
[mem
] == INVALID_STACK_INDEX
) continue;
2214 if (BIT_STATUS_AT(reg
->bt_mem_start
, mem
))
2215 pstart
= STACK_AT(mem_start_stk
[mem
])->u
.mem
.pstr
;
2217 pstart
= (UChar
* )((void* )mem_start_stk
[mem
]);
2219 pend
= (BIT_STATUS_AT(reg
->bt_mem_end
, mem
)
2220 ? STACK_AT(mem_end_stk
[mem
])->u
.mem
.pstr
2221 : (UChar
* )((void* )mem_end_stk
[mem
]));
2226 STRING_CMP_VALUE(pstart
, swork
, n
, is_fail
);
2227 if (is_fail
) continue;
2229 while (sprev
+ (len
= enclen(encode
, sprev
, end
)) < s
)
2232 p
+= (SIZE_MEMNUM
* (tlen
- i
- 1));
2233 break; /* success */
2235 if (i
== tlen
) goto fail
;
2241 case OP_BACKREF_MULTI_IC
: MOP_IN(OP_BACKREF_MULTI_IC
);
2244 UChar
*pstart
, *pend
, *swork
;
2246 GET_LENGTH_INC(tlen
, p
);
2247 for (i
= 0; i
< tlen
; i
++) {
2248 GET_MEMNUM_INC(mem
, p
);
2250 if (mem_end_stk
[mem
] == INVALID_STACK_INDEX
) continue;
2251 if (mem_start_stk
[mem
] == INVALID_STACK_INDEX
) continue;
2253 if (BIT_STATUS_AT(reg
->bt_mem_start
, mem
))
2254 pstart
= STACK_AT(mem_start_stk
[mem
])->u
.mem
.pstr
;
2256 pstart
= (UChar
* )((void* )mem_start_stk
[mem
]);
2258 pend
= (BIT_STATUS_AT(reg
->bt_mem_end
, mem
)
2259 ? STACK_AT(mem_end_stk
[mem
])->u
.mem
.pstr
2260 : (UChar
* )((void* )mem_end_stk
[mem
]));
2265 STRING_CMP_VALUE_IC(case_fold_flag
, pstart
, &swork
, n
, is_fail
);
2266 if (is_fail
) continue;
2268 while (sprev
+ (len
= enclen(encode
, sprev
, end
)) < s
)
2271 p
+= (SIZE_MEMNUM
* (tlen
- i
- 1));
2272 break; /* success */
2274 if (i
== tlen
) goto fail
;
2280 #ifdef USE_BACKREF_WITH_LEVEL
2281 case OP_BACKREF_WITH_LEVEL
:
2287 GET_OPTION_INC(ic
, p
);
2288 GET_LENGTH_INC(level
, p
);
2289 GET_LENGTH_INC(tlen
, p
);
2292 if (backref_match_at_nested_level(reg
, stk
, stk_base
, ic
2293 , case_fold_flag
, (int )level
, (int )tlen
, p
, &s
, end
)) {
2294 while (sprev
+ (len
= enclen(encode
, sprev
, end
)) < s
)
2297 p
+= (SIZE_MEMNUM
* tlen
);
2309 #if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
2310 case OP_SET_OPTION_PUSH
: MOP_IN(OP_SET_OPTION_PUSH
);
2311 GET_OPTION_INC(option
, p
);
2312 STACK_PUSH_ALT(p
, s
, sprev
);
2313 p
+= SIZE_OP_SET_OPTION
+ SIZE_OP_FAIL
;
2318 case OP_SET_OPTION
: MOP_IN(OP_SET_OPTION
);
2319 GET_OPTION_INC(option
, p
);
2325 case OP_NULL_CHECK_START
: MOP_IN(OP_NULL_CHECK_START
);
2326 GET_MEMNUM_INC(mem
, p
); /* mem: null check id */
2327 STACK_PUSH_NULL_CHECK_START(mem
, s
);
2332 case OP_NULL_CHECK_END
: MOP_IN(OP_NULL_CHECK_END
);
2336 GET_MEMNUM_INC(mem
, p
); /* mem: null check id */
2337 STACK_NULL_CHECK(isnull
, mem
, s
);
2339 #ifdef ONIG_DEBUG_MATCH
2340 fprintf(stderr
, "NULL_CHECK_END: skip id:%d, s:%d\n",
2341 (int )mem
, (int )s
);
2344 /* empty loop founded, skip next instruction */
2351 case OP_REPEAT_INC_NG
:
2352 case OP_REPEAT_INC_SG
:
2353 case OP_REPEAT_INC_NG_SG
:
2357 goto unexpected_bytecode_error
;
2366 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2367 case OP_NULL_CHECK_END_MEMST
: MOP_IN(OP_NULL_CHECK_END_MEMST
);
2371 GET_MEMNUM_INC(mem
, p
); /* mem: null check id */
2372 STACK_NULL_CHECK_MEMST(isnull
, mem
, s
, reg
);
2374 #ifdef ONIG_DEBUG_MATCH
2375 fprintf(stderr
, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n",
2376 (int )mem
, (int )s
);
2378 if (isnull
== -1) goto fail
;
2379 goto null_check_found
;
2387 #ifdef USE_SUBEXP_CALL
2388 case OP_NULL_CHECK_END_MEMST_PUSH
:
2389 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH
);
2393 GET_MEMNUM_INC(mem
, p
); /* mem: null check id */
2394 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2395 STACK_NULL_CHECK_MEMST_REC(isnull
, mem
, s
, reg
);
2397 STACK_NULL_CHECK_REC(isnull
, mem
, s
);
2400 #ifdef ONIG_DEBUG_MATCH
2401 fprintf(stderr
, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n",
2402 (int )mem
, (int )s
);
2404 if (isnull
== -1) goto fail
;
2405 goto null_check_found
;
2408 STACK_PUSH_NULL_CHECK_END(mem
);
2416 case OP_JUMP
: MOP_IN(OP_JUMP
);
2417 GET_RELADDR_INC(addr
, p
);
2420 CHECK_INTERRUPT_IN_MATCH_AT
;
2424 case OP_PUSH
: MOP_IN(OP_PUSH
);
2425 GET_RELADDR_INC(addr
, p
);
2426 STACK_PUSH_ALT(p
+ addr
, s
, sprev
);
2431 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2432 case OP_STATE_CHECK_PUSH
: MOP_IN(OP_STATE_CHECK_PUSH
);
2433 GET_STATE_CHECK_NUM_INC(mem
, p
);
2434 STATE_CHECK_VAL(scv
, mem
);
2437 GET_RELADDR_INC(addr
, p
);
2438 STACK_PUSH_ALT_WITH_STATE_CHECK(p
+ addr
, s
, sprev
, mem
);
2443 case OP_STATE_CHECK_PUSH_OR_JUMP
: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP
);
2444 GET_STATE_CHECK_NUM_INC(mem
, p
);
2445 GET_RELADDR_INC(addr
, p
);
2446 STATE_CHECK_VAL(scv
, mem
);
2451 STACK_PUSH_ALT_WITH_STATE_CHECK(p
+ addr
, s
, sprev
, mem
);
2457 case OP_STATE_CHECK
: MOP_IN(OP_STATE_CHECK
);
2458 GET_STATE_CHECK_NUM_INC(mem
, p
);
2459 STATE_CHECK_VAL(scv
, mem
);
2462 STACK_PUSH_STATE_CHECK(s
, mem
);
2466 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
2468 case OP_POP
: MOP_IN(OP_POP
);
2474 case OP_PUSH_OR_JUMP_EXACT1
: MOP_IN(OP_PUSH_OR_JUMP_EXACT1
);
2475 GET_RELADDR_INC(addr
, p
);
2476 if (*p
== *s
&& DATA_ENSURE_CHECK1
) {
2478 STACK_PUSH_ALT(p
+ addr
, s
, sprev
);
2487 case OP_PUSH_IF_PEEK_NEXT
: MOP_IN(OP_PUSH_IF_PEEK_NEXT
);
2488 GET_RELADDR_INC(addr
, p
);
2491 STACK_PUSH_ALT(p
+ addr
, s
, sprev
);
2500 case OP_REPEAT
: MOP_IN(OP_REPEAT
);
2502 GET_MEMNUM_INC(mem
, p
); /* mem: OP_REPEAT ID */
2503 GET_RELADDR_INC(addr
, p
);
2506 repeat_stk
[mem
] = GET_STACK_INDEX(stk
);
2507 STACK_PUSH_REPEAT(mem
, p
);
2509 if (reg
->repeat_range
[mem
].lower
== 0) {
2510 STACK_PUSH_ALT(p
+ addr
, s
, sprev
);
2517 case OP_REPEAT_NG
: MOP_IN(OP_REPEAT_NG
);
2519 GET_MEMNUM_INC(mem
, p
); /* mem: OP_REPEAT ID */
2520 GET_RELADDR_INC(addr
, p
);
2523 repeat_stk
[mem
] = GET_STACK_INDEX(stk
);
2524 STACK_PUSH_REPEAT(mem
, p
);
2526 if (reg
->repeat_range
[mem
].lower
== 0) {
2527 STACK_PUSH_ALT(p
, s
, sprev
);
2535 case OP_REPEAT_INC
: MOP_IN(OP_REPEAT_INC
);
2536 GET_MEMNUM_INC(mem
, p
); /* mem: OP_REPEAT ID */
2537 si
= repeat_stk
[mem
];
2538 stkp
= STACK_AT(si
);
2541 stkp
->u
.repeat
.count
++;
2542 if (stkp
->u
.repeat
.count
>= reg
->repeat_range
[mem
].upper
) {
2543 /* end of repeat. Nothing to do. */
2545 else if (stkp
->u
.repeat
.count
>= reg
->repeat_range
[mem
].lower
) {
2546 STACK_PUSH_ALT(p
, s
, sprev
);
2547 p
= STACK_AT(si
)->u
.repeat
.pcode
; /* Don't use stkp after PUSH. */
2550 p
= stkp
->u
.repeat
.pcode
;
2552 STACK_PUSH_REPEAT_INC(si
);
2554 CHECK_INTERRUPT_IN_MATCH_AT
;
2558 case OP_REPEAT_INC_SG
: MOP_IN(OP_REPEAT_INC_SG
);
2559 GET_MEMNUM_INC(mem
, p
); /* mem: OP_REPEAT ID */
2560 STACK_GET_REPEAT(mem
, stkp
);
2561 si
= GET_STACK_INDEX(stkp
);
2565 case OP_REPEAT_INC_NG
: MOP_IN(OP_REPEAT_INC_NG
);
2566 GET_MEMNUM_INC(mem
, p
); /* mem: OP_REPEAT ID */
2567 si
= repeat_stk
[mem
];
2568 stkp
= STACK_AT(si
);
2571 stkp
->u
.repeat
.count
++;
2572 if (stkp
->u
.repeat
.count
< reg
->repeat_range
[mem
].upper
) {
2573 if (stkp
->u
.repeat
.count
>= reg
->repeat_range
[mem
].lower
) {
2574 UChar
* pcode
= stkp
->u
.repeat
.pcode
;
2576 STACK_PUSH_REPEAT_INC(si
);
2577 STACK_PUSH_ALT(pcode
, s
, sprev
);
2580 p
= stkp
->u
.repeat
.pcode
;
2581 STACK_PUSH_REPEAT_INC(si
);
2584 else if (stkp
->u
.repeat
.count
== reg
->repeat_range
[mem
].upper
) {
2585 STACK_PUSH_REPEAT_INC(si
);
2588 CHECK_INTERRUPT_IN_MATCH_AT
;
2592 case OP_REPEAT_INC_NG_SG
: MOP_IN(OP_REPEAT_INC_NG_SG
);
2593 GET_MEMNUM_INC(mem
, p
); /* mem: OP_REPEAT ID */
2594 STACK_GET_REPEAT(mem
, stkp
);
2595 si
= GET_STACK_INDEX(stkp
);
2599 case OP_PUSH_POS
: MOP_IN(OP_PUSH_POS
);
2600 STACK_PUSH_POS(s
, sprev
);
2605 case OP_POP_POS
: MOP_IN(OP_POP_POS
);
2607 STACK_POS_END(stkp
);
2608 s
= stkp
->u
.state
.pstr
;
2609 sprev
= stkp
->u
.state
.pstr_prev
;
2615 case OP_PUSH_POS_NOT
: MOP_IN(OP_PUSH_POS_NOT
);
2616 GET_RELADDR_INC(addr
, p
);
2617 STACK_PUSH_POS_NOT(p
+ addr
, s
, sprev
);
2622 case OP_FAIL_POS
: MOP_IN(OP_FAIL_POS
);
2623 STACK_POP_TIL_POS_NOT
;
2627 case OP_PUSH_STOP_BT
: MOP_IN(OP_PUSH_STOP_BT
);
2633 case OP_POP_STOP_BT
: MOP_IN(OP_POP_STOP_BT
);
2639 case OP_LOOK_BEHIND
: MOP_IN(OP_LOOK_BEHIND
);
2640 GET_LENGTH_INC(tlen
, p
);
2641 s
= (UChar
* )ONIGENC_STEP_BACK(encode
, str
, s
, (int )tlen
);
2642 if (IS_NULL(s
)) goto fail
;
2643 sprev
= (UChar
* )onigenc_get_prev_char_head(encode
, str
, s
);
2648 case OP_PUSH_LOOK_BEHIND_NOT
: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT
);
2649 GET_RELADDR_INC(addr
, p
);
2650 GET_LENGTH_INC(tlen
, p
);
2651 q
= (UChar
* )ONIGENC_STEP_BACK(encode
, str
, s
, (int )tlen
);
2653 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
2654 If you want to change to fail, replace following line. */
2659 STACK_PUSH_LOOK_BEHIND_NOT(p
+ addr
, s
, sprev
);
2661 sprev
= (UChar
* )onigenc_get_prev_char_head(encode
, str
, s
);
2667 case OP_FAIL_LOOK_BEHIND_NOT
: MOP_IN(OP_FAIL_LOOK_BEHIND_NOT
);
2668 STACK_POP_TIL_LOOK_BEHIND_NOT
;
2672 #ifdef USE_SUBEXP_CALL
2673 case OP_CALL
: MOP_IN(OP_CALL
);
2674 GET_ABSADDR_INC(addr
, p
);
2675 STACK_PUSH_CALL_FRAME(p
);
2681 case OP_RETURN
: MOP_IN(OP_RETURN
);
2696 case OP_FAIL
: MOP_IN(OP_FAIL
);
2698 p
= stk
->u
.state
.pcode
;
2699 s
= stk
->u
.state
.pstr
;
2700 sprev
= stk
->u
.state
.pstr_prev
;
2702 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2703 if (stk
->u
.state
.state_check
!= 0) {
2704 stk
->type
= STK_STATE_CHECK_MARK
;
2714 goto bytecode_error
;
2716 } /* end of switch */
2718 } /* end of while(1) */
2727 return ONIGERR_STACK_BUG
;
2732 return ONIGERR_UNDEFINED_BYTECODE
;
2734 unexpected_bytecode_error
:
2736 return ONIGERR_UNEXPECTED_BYTECODE
;
2741 slow_search(OnigEncoding enc
, UChar
* target
, UChar
* target_end
,
2742 const UChar
* text
, const UChar
* text_end
, UChar
* text_range
)
2744 UChar
*t
, *p
, *s
, *end
;
2746 end
= (UChar
* )text_end
;
2747 end
-= target_end
- target
- 1;
2748 if (end
> text_range
)
2754 if (*s
== *target
) {
2757 while (t
< target_end
) {
2762 if (t
== target_end
)
2765 s
+= enclen(enc
, s
, end
);
2768 return (UChar
* )NULL
;
2772 str_lower_case_match(OnigEncoding enc
, int case_fold_flag
,
2773 const UChar
* t
, const UChar
* tend
,
2774 const UChar
* p
, const UChar
* end
)
2777 UChar
*q
, lowbuf
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
2780 lowlen
= ONIGENC_MBC_CASE_FOLD(enc
, case_fold_flag
, &p
, end
, lowbuf
);
2782 while (lowlen
> 0) {
2783 if (*t
++ != *q
++) return 0;
2792 slow_search_ic(OnigEncoding enc
, int case_fold_flag
,
2793 UChar
* target
, UChar
* target_end
,
2794 const UChar
* text
, const UChar
* text_end
, UChar
* text_range
)
2798 end
= (UChar
* )text_end
;
2799 end
-= target_end
- target
- 1;
2800 if (end
> text_range
)
2806 if (str_lower_case_match(enc
, case_fold_flag
, target
, target_end
,
2810 s
+= enclen(enc
, s
, text_end
);
2813 return (UChar
* )NULL
;
2817 slow_search_backward(OnigEncoding enc
, UChar
* target
, UChar
* target_end
,
2818 const UChar
* text
, const UChar
* adjust_text
,
2819 const UChar
* text_end
, const UChar
* text_start
)
2823 s
= (UChar
* )text_end
;
2824 s
-= (target_end
- target
);
2826 s
= (UChar
* )text_start
;
2828 s
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc
, adjust_text
, s
);
2831 if (*s
== *target
) {
2834 while (t
< target_end
) {
2839 if (t
== target_end
)
2842 s
= (UChar
* )onigenc_get_prev_char_head(enc
, adjust_text
, s
);
2845 return (UChar
* )NULL
;
2849 slow_search_backward_ic(OnigEncoding enc
, int case_fold_flag
,
2850 UChar
* target
, UChar
* target_end
,
2851 const UChar
* text
, const UChar
* adjust_text
,
2852 const UChar
* text_end
, const UChar
* text_start
)
2856 s
= (UChar
* )text_end
;
2857 s
-= (target_end
- target
);
2859 s
= (UChar
* )text_start
;
2861 s
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc
, adjust_text
, s
);
2864 if (str_lower_case_match(enc
, case_fold_flag
,
2865 target
, target_end
, s
, text_end
))
2868 s
= (UChar
* )onigenc_get_prev_char_head(enc
, adjust_text
, s
);
2871 return (UChar
* )NULL
;
2875 bm_search_notrev(regex_t
* reg
, const UChar
* target
, const UChar
* target_end
,
2876 const UChar
* text
, const UChar
* text_end
,
2877 const UChar
* text_range
)
2879 const UChar
*s
, *se
, *t
, *p
, *end
;
2883 #ifdef ONIG_DEBUG_SEARCH
2884 fprintf(stderr
, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
2885 (int )text
, (int )text_end
, (int )text_range
);
2888 tail
= target_end
- 1;
2889 tlen1
= tail
- target
;
2891 if (end
+ tlen1
> text_end
)
2892 end
= text_end
- tlen1
;
2896 if (IS_NULL(reg
->int_map
)) {
2900 while (t
>= target
&& *p
== *t
) {
2903 if (t
< target
) return (UChar
* )s
;
2905 skip
= reg
->map
[*se
];
2908 s
+= enclen(reg
->enc
, s
, end
);
2909 } while ((s
- t
) < skip
&& s
< end
);
2916 while (t
>= target
&& *p
== *t
) {
2919 if (t
< target
) return (UChar
* )s
;
2921 skip
= reg
->int_map
[*se
];
2924 s
+= enclen(reg
->enc
, s
, end
);
2925 } while ((s
- t
) < skip
&& s
< end
);
2929 return (UChar
* )NULL
;
2933 bm_search(regex_t
* reg
, const UChar
* target
, const UChar
* target_end
,
2934 const UChar
* text
, const UChar
* text_end
, const UChar
* text_range
)
2936 const UChar
*s
, *t
, *p
, *end
;
2939 end
= text_range
+ (target_end
- target
) - 1;
2943 tail
= target_end
- 1;
2944 s
= text
+ (target_end
- target
) - 1;
2945 if (IS_NULL(reg
->int_map
)) {
2949 while (t
>= target
&& *p
== *t
) {
2952 if (t
< target
) return (UChar
* )(p
+ 1);
2956 else { /* see int_map[] */
2960 while (t
>= target
&& *p
== *t
) {
2963 if (t
< target
) return (UChar
* )(p
+ 1);
2964 s
+= reg
->int_map
[*s
];
2967 return (UChar
* )NULL
;
2971 set_bm_backward_skip(UChar
* s
, UChar
* end
, OnigEncoding enc ARG_UNUSED
,
2977 if (IS_NULL(*skip
)) {
2978 *skip
= (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE
);
2979 if (IS_NULL(*skip
)) return ONIGERR_MEMORY
;
2983 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++)
2986 for (i
= len
- 1; i
> 0; i
--)
2993 bm_search_backward(regex_t
* reg
, const UChar
* target
, const UChar
* target_end
,
2994 const UChar
* text
, const UChar
* adjust_text
,
2995 const UChar
* text_end
, const UChar
* text_start
)
2997 const UChar
*s
, *t
, *p
;
2999 s
= text_end
- (target_end
- target
);
3003 s
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg
->enc
, adjust_text
, s
);
3008 while (t
< target_end
&& *p
== *t
) {
3011 if (t
== target_end
)
3014 s
-= reg
->int_map_backward
[*s
];
3015 s
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg
->enc
, adjust_text
, s
);
3018 return (UChar
* )NULL
;
3022 map_search(OnigEncoding enc
, UChar map
[],
3023 const UChar
* text
, const UChar
* text_range
)
3025 const UChar
*s
= text
;
3027 while (s
< text_range
) {
3028 if (map
[*s
]) return (UChar
* )s
;
3030 s
+= enclen(enc
, s
, text_range
);
3032 return (UChar
* )NULL
;
3036 map_search_backward(OnigEncoding enc
, UChar map
[],
3037 const UChar
* text
, const UChar
* adjust_text
,
3038 const UChar
* text_start
)
3040 const UChar
*s
= text_start
;
3043 if (map
[*s
]) return (UChar
* )s
;
3045 s
= onigenc_get_prev_char_head(enc
, adjust_text
, s
);
3047 return (UChar
* )NULL
;
3051 onig_match(regex_t
* reg
, const UChar
* str
, const UChar
* end
, const UChar
* at
, OnigRegion
* region
,
3052 OnigOptionType option
)
3058 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
3060 THREAD_ATOMIC_START
;
3061 if (ONIG_STATE(reg
) >= ONIG_STATE_NORMAL
) {
3062 ONIG_STATE_INC(reg
);
3063 if (IS_NOT_NULL(reg
->chain
) && ONIG_STATE(reg
) == ONIG_STATE_NORMAL
) {
3064 onig_chain_reduce(reg
);
3065 ONIG_STATE_INC(reg
);
3073 while (ONIG_STATE(reg
) < ONIG_STATE_NORMAL
) {
3074 if (++n
> THREAD_PASS_LIMIT_COUNT
)
3075 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT
;
3081 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
3083 MATCH_ARG_INIT(msa
, option
, region
, at
);
3084 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3086 int offset
= at
- str
;
3087 STATE_CHECK_BUFF_INIT(msa
, end
- str
, offset
, reg
->num_comb_exp_check
);
3092 #ifdef USE_POSIX_API_REGION_OPTION
3093 && !IS_POSIX_REGION(option
)
3096 r
= onig_region_resize_clear(region
, reg
->num_mem
+ 1);
3102 prev
= (UChar
* )onigenc_get_prev_char_head(reg
->enc
, str
, at
);
3103 r
= match_at(reg
, str
, end
,
3104 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3110 MATCH_ARG_FREE(msa
);
3111 ONIG_STATE_DEC_THREAD(reg
);
3116 forward_search_range(regex_t
* reg
, const UChar
* str
, const UChar
* end
, UChar
* s
,
3117 UChar
* range
, UChar
** low
, UChar
** high
, UChar
** low_prev
)
3119 UChar
*p
, *pprev
= (UChar
* )NULL
;
3121 #ifdef ONIG_DEBUG_SEARCH
3122 fprintf(stderr
, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n",
3123 (int )str
, (int )end
, (int )s
, (int )range
);
3127 if (reg
->dmin
> 0) {
3128 if (ONIGENC_IS_SINGLEBYTE(reg
->enc
)) {
3132 UChar
*q
= p
+ reg
->dmin
;
3133 while (p
< q
) p
+= enclen(reg
->enc
, p
, end
);
3138 switch (reg
->optimize
) {
3139 case ONIG_OPTIMIZE_EXACT
:
3140 p
= slow_search(reg
->enc
, reg
->exact
, reg
->exact_end
, p
, end
, range
);
3142 case ONIG_OPTIMIZE_EXACT_IC
:
3143 p
= slow_search_ic(reg
->enc
, reg
->case_fold_flag
,
3144 reg
->exact
, reg
->exact_end
, p
, end
, range
);
3147 case ONIG_OPTIMIZE_EXACT_BM
:
3148 p
= bm_search(reg
, reg
->exact
, reg
->exact_end
, p
, end
, range
);
3151 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV
:
3152 p
= bm_search_notrev(reg
, reg
->exact
, reg
->exact_end
, p
, end
, range
);
3155 case ONIG_OPTIMIZE_MAP
:
3156 p
= map_search(reg
->enc
, reg
->map
, p
, range
);
3160 if (p
&& p
< range
) {
3161 if (p
- reg
->dmin
< s
) {
3164 p
+= enclen(reg
->enc
, p
, end
);
3168 if (reg
->sub_anchor
) {
3171 switch (reg
->sub_anchor
) {
3172 case ANCHOR_BEGIN_LINE
:
3173 if (!ON_STR_BEGIN(p
)) {
3174 prev
= onigenc_get_prev_char_head(reg
->enc
,
3175 (pprev
? pprev
: str
), p
);
3176 if (!ONIGENC_IS_MBC_NEWLINE(reg
->enc
, prev
, end
))
3181 case ANCHOR_END_LINE
:
3182 if (ON_STR_END(p
)) {
3183 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3184 prev
= (UChar
* )onigenc_get_prev_char_head(reg
->enc
,
3185 (pprev
? pprev
: str
), p
);
3186 if (prev
&& ONIGENC_IS_MBC_NEWLINE(reg
->enc
, prev
, end
))
3190 else if (! ONIGENC_IS_MBC_NEWLINE(reg
->enc
, p
, end
)
3191 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3192 && ! ONIGENC_IS_MBC_CRNL(reg
->enc
, p
, end
)
3200 if (reg
->dmax
== 0) {
3204 *low_prev
= onigenc_get_prev_char_head(reg
->enc
, s
, p
);
3206 *low_prev
= onigenc_get_prev_char_head(reg
->enc
,
3207 (pprev
? pprev
: str
), p
);
3211 if (reg
->dmax
!= ONIG_INFINITE_DISTANCE
) {
3212 *low
= p
- reg
->dmax
;
3214 *low
= onigenc_get_right_adjust_char_head_with_prev(reg
->enc
, s
,
3215 *low
, (const UChar
** )low_prev
);
3216 if (low_prev
&& IS_NULL(*low_prev
))
3217 *low_prev
= onigenc_get_prev_char_head(reg
->enc
,
3218 (pprev
? pprev
: s
), *low
);
3222 *low_prev
= onigenc_get_prev_char_head(reg
->enc
,
3223 (pprev
? pprev
: str
), *low
);
3227 /* no needs to adjust *high, *high is used as range check only */
3228 *high
= p
- reg
->dmin
;
3230 #ifdef ONIG_DEBUG_SEARCH
3232 "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
3233 (int )(*low
- str
), (int )(*high
- str
), reg
->dmin
, reg
->dmax
);
3235 return 1; /* success */
3238 return 0; /* fail */
3241 static int set_bm_backward_skip
P_((UChar
* s
, UChar
* end
, OnigEncoding enc
,
3244 #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
3247 backward_search_range(regex_t
* reg
, const UChar
* str
, const UChar
* end
,
3248 UChar
* s
, const UChar
* range
, UChar
* adjrange
,
3249 UChar
** low
, UChar
** high
)
3258 switch (reg
->optimize
) {
3259 case ONIG_OPTIMIZE_EXACT
:
3261 p
= slow_search_backward(reg
->enc
, reg
->exact
, reg
->exact_end
,
3262 range
, adjrange
, end
, p
);
3265 case ONIG_OPTIMIZE_EXACT_IC
:
3266 p
= slow_search_backward_ic(reg
->enc
, reg
->case_fold_flag
,
3267 reg
->exact
, reg
->exact_end
,
3268 range
, adjrange
, end
, p
);
3271 case ONIG_OPTIMIZE_EXACT_BM
:
3272 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV
:
3273 if (IS_NULL(reg
->int_map_backward
)) {
3274 if (s
- range
< BM_BACKWARD_SEARCH_LENGTH_THRESHOLD
)
3277 r
= set_bm_backward_skip(reg
->exact
, reg
->exact_end
, reg
->enc
,
3278 &(reg
->int_map_backward
));
3281 p
= bm_search_backward(reg
, reg
->exact
, reg
->exact_end
, range
, adjrange
,
3285 case ONIG_OPTIMIZE_MAP
:
3286 p
= map_search_backward(reg
->enc
, reg
->map
, range
, adjrange
, p
);
3291 if (reg
->sub_anchor
) {
3294 switch (reg
->sub_anchor
) {
3295 case ANCHOR_BEGIN_LINE
:
3296 if (!ON_STR_BEGIN(p
)) {
3297 prev
= onigenc_get_prev_char_head(reg
->enc
, str
, p
);
3298 if (!ONIGENC_IS_MBC_NEWLINE(reg
->enc
, prev
, end
)) {
3305 case ANCHOR_END_LINE
:
3306 if (ON_STR_END(p
)) {
3307 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3308 prev
= onigenc_get_prev_char_head(reg
->enc
, adjrange
, p
);
3309 if (IS_NULL(prev
)) goto fail
;
3310 if (ONIGENC_IS_MBC_NEWLINE(reg
->enc
, prev
, end
)) {
3316 else if (! ONIGENC_IS_MBC_NEWLINE(reg
->enc
, p
, end
)
3317 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3318 && ! ONIGENC_IS_MBC_CRNL(reg
->enc
, p
, end
)
3321 p
= onigenc_get_prev_char_head(reg
->enc
, adjrange
, p
);
3322 if (IS_NULL(p
)) goto fail
;
3329 /* no needs to adjust *high, *high is used as range check only */
3330 if (reg
->dmax
!= ONIG_INFINITE_DISTANCE
) {
3331 *low
= p
- reg
->dmax
;
3332 *high
= p
- reg
->dmin
;
3333 *high
= onigenc_get_right_adjust_char_head(reg
->enc
, adjrange
, *high
);
3336 #ifdef ONIG_DEBUG_SEARCH
3337 fprintf(stderr
, "backward_search_range: low: %d, high: %d\n",
3338 (int )(*low
- str
), (int )(*high
- str
));
3340 return 1; /* success */
3344 #ifdef ONIG_DEBUG_SEARCH
3345 fprintf(stderr
, "backward_search_range: fail.\n");
3347 return 0; /* fail */
3352 onig_search(regex_t
* reg
, const UChar
* str
, const UChar
* end
,
3353 const UChar
* start
, const UChar
* range
, OnigRegion
* region
, OnigOptionType option
)
3358 const UChar
*orig_start
= start
;
3359 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3360 const UChar
*orig_range
= range
;
3363 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
3365 THREAD_ATOMIC_START
;
3366 if (ONIG_STATE(reg
) >= ONIG_STATE_NORMAL
) {
3367 ONIG_STATE_INC(reg
);
3368 if (IS_NOT_NULL(reg
->chain
) && ONIG_STATE(reg
) == ONIG_STATE_NORMAL
) {
3369 onig_chain_reduce(reg
);
3370 ONIG_STATE_INC(reg
);
3378 while (ONIG_STATE(reg
) < ONIG_STATE_NORMAL
) {
3379 if (++n
> THREAD_PASS_LIMIT_COUNT
)
3380 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT
;
3386 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
3388 #ifdef ONIG_DEBUG_SEARCH
3390 "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
3391 (int )str
, (int )(end
- str
), (int )(start
- str
), (int )(range
- str
));
3395 #ifdef USE_POSIX_API_REGION_OPTION
3396 && !IS_POSIX_REGION(option
)
3399 r
= onig_region_resize_clear(region
, reg
->num_mem
+ 1);
3400 if (r
) goto finish_no_msa
;
3403 if (start
> end
|| start
< str
) goto mismatch_no_msa
;
3406 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3407 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3408 #define MATCH_AND_RETURN_CHECK(upper_range) \
3409 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
3410 if (r != ONIG_MISMATCH) {\
3412 if (! IS_FIND_LONGEST(reg->options)) {\
3416 else goto finish; /* error */ \
3419 #define MATCH_AND_RETURN_CHECK(upper_range) \
3420 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
3421 if (r != ONIG_MISMATCH) {\
3425 else goto finish; /* error */ \
3427 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
3429 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3430 #define MATCH_AND_RETURN_CHECK(none) \
3431 r = match_at(reg, str, end, s, prev, &msa);\
3432 if (r != ONIG_MISMATCH) {\
3434 if (! IS_FIND_LONGEST(reg->options)) {\
3438 else goto finish; /* error */ \
3441 #define MATCH_AND_RETURN_CHECK(none) \
3442 r = match_at(reg, str, end, s, prev, &msa);\
3443 if (r != ONIG_MISMATCH) {\
3447 else goto finish; /* error */ \
3449 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
3450 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
3453 /* anchor optimize: resume search range */
3454 if (reg
->anchor
!= 0 && str
< end
) {
3455 UChar
*min_semi_end
, *max_semi_end
;
3457 if (reg
->anchor
& ANCHOR_BEGIN_POSITION
) {
3458 /* search start-position only */
3465 else if (reg
->anchor
& ANCHOR_BEGIN_BUF
) {
3466 /* search str-position only */
3467 if (range
> start
) {
3468 if (start
!= str
) goto mismatch_no_msa
;
3477 goto mismatch_no_msa
;
3480 else if (reg
->anchor
& ANCHOR_END_BUF
) {
3481 min_semi_end
= max_semi_end
= (UChar
* )end
;
3484 if ((OnigDistance
)(max_semi_end
- str
) < reg
->anchor_dmin
)
3485 goto mismatch_no_msa
;
3487 if (range
> start
) {
3488 if ((OnigDistance
)(min_semi_end
- start
) > reg
->anchor_dmax
) {
3489 start
= min_semi_end
- reg
->anchor_dmax
;
3491 start
= onigenc_get_right_adjust_char_head(reg
->enc
, str
, start
);
3492 else { /* match with empty at end */
3493 start
= onigenc_get_prev_char_head(reg
->enc
, str
, end
);
3496 if ((OnigDistance
)(max_semi_end
- (range
- 1)) < reg
->anchor_dmin
) {
3497 range
= max_semi_end
- reg
->anchor_dmin
+ 1;
3500 if (start
>= range
) goto mismatch_no_msa
;
3503 if ((OnigDistance
)(min_semi_end
- range
) > reg
->anchor_dmax
) {
3504 range
= min_semi_end
- reg
->anchor_dmax
;
3506 if ((OnigDistance
)(max_semi_end
- start
) < reg
->anchor_dmin
) {
3507 start
= max_semi_end
- reg
->anchor_dmin
;
3508 start
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg
->enc
, str
, start
);
3510 if (range
> start
) goto mismatch_no_msa
;
3513 else if (reg
->anchor
& ANCHOR_SEMI_END_BUF
) {
3514 UChar
* pre_end
= ONIGENC_STEP_BACK(reg
->enc
, str
, end
, 1);
3516 max_semi_end
= (UChar
* )end
;
3517 if (ONIGENC_IS_MBC_NEWLINE(reg
->enc
, pre_end
, end
)) {
3518 min_semi_end
= pre_end
;
3520 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3521 pre_end
= ONIGENC_STEP_BACK(reg
->enc
, str
, pre_end
, 1);
3522 if (IS_NOT_NULL(pre_end
) &&
3523 ONIGENC_IS_MBC_CRNL(reg
->enc
, pre_end
, end
)) {
3524 min_semi_end
= pre_end
;
3527 if (min_semi_end
> str
&& start
<= min_semi_end
) {
3532 min_semi_end
= (UChar
* )end
;
3536 else if ((reg
->anchor
& ANCHOR_ANYCHAR_STAR_ML
)) {
3537 goto begin_position
;
3540 else if (str
== end
) { /* empty string */
3541 static const UChar
* address_for_empty_string
= (UChar
* )"";
3543 #ifdef ONIG_DEBUG_SEARCH
3544 fprintf(stderr
, "onig_search: empty string.\n");
3547 if (reg
->threshold_len
== 0) {
3548 start
= end
= str
= address_for_empty_string
;
3550 prev
= (UChar
* )NULL
;
3552 MATCH_ARG_INIT(msa
, option
, region
, start
);
3553 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3554 msa
.state_check_buff
= (void* )0;
3555 msa
.state_check_buff_size
= 0; /* NO NEED, for valgrind */
3557 MATCH_AND_RETURN_CHECK(end
);
3560 goto mismatch_no_msa
;
3563 #ifdef ONIG_DEBUG_SEARCH
3564 fprintf(stderr
, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
3565 (int )(end
- str
), (int )(start
- str
), (int )(range
- str
));
3568 MATCH_ARG_INIT(msa
, option
, region
, orig_start
);
3569 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3571 int offset
= (MIN(start
, range
) - str
);
3572 STATE_CHECK_BUFF_INIT(msa
, end
- str
, offset
, reg
->num_comb_exp_check
);
3577 if (range
> start
) { /* forward search */
3579 prev
= onigenc_get_prev_char_head(reg
->enc
, str
, s
);
3581 prev
= (UChar
* )NULL
;
3583 if (reg
->optimize
!= ONIG_OPTIMIZE_NONE
) {
3584 UChar
*sch_range
, *low
, *high
, *low_prev
;
3586 sch_range
= (UChar
* )range
;
3587 if (reg
->dmax
!= 0) {
3588 if (reg
->dmax
== ONIG_INFINITE_DISTANCE
)
3589 sch_range
= (UChar
* )end
;
3591 sch_range
+= reg
->dmax
;
3592 if (sch_range
> end
) sch_range
= (UChar
* )end
;
3596 if ((end
- start
) < reg
->threshold_len
)
3599 if (reg
->dmax
!= ONIG_INFINITE_DISTANCE
) {
3601 if (! forward_search_range(reg
, str
, end
, s
, sch_range
,
3602 &low
, &high
, &low_prev
)) goto mismatch
;
3608 MATCH_AND_RETURN_CHECK(orig_range
);
3610 s
+= enclen(reg
->enc
, s
, end
);
3612 } while (s
< range
);
3615 else { /* check only. */
3616 if (! forward_search_range(reg
, str
, end
, s
, sch_range
,
3617 &low
, &high
, (UChar
** )NULL
)) goto mismatch
;
3619 if ((reg
->anchor
& ANCHOR_ANYCHAR_STAR
) != 0) {
3621 MATCH_AND_RETURN_CHECK(orig_range
);
3623 s
+= enclen(reg
->enc
, s
, end
);
3625 while (!ONIGENC_IS_MBC_NEWLINE(reg
->enc
, prev
, end
) && s
< range
) {
3627 s
+= enclen(reg
->enc
, s
, end
);
3629 } while (s
< range
);
3636 MATCH_AND_RETURN_CHECK(orig_range
);
3638 s
+= enclen(reg
->enc
, s
, end
);
3639 } while (s
< range
);
3641 if (s
== range
) { /* because empty match with /$/. */
3642 MATCH_AND_RETURN_CHECK(orig_range
);
3645 else { /* backward search */
3646 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3647 if (orig_start
< end
)
3648 orig_start
+= enclen(reg
->enc
, orig_start
, end
); /* is upper range */
3651 if (reg
->optimize
!= ONIG_OPTIMIZE_NONE
) {
3652 UChar
*low
, *high
, *adjrange
, *sch_start
;
3655 adjrange
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg
->enc
, str
, range
);
3657 adjrange
= (UChar
* )end
;
3659 if (reg
->dmax
!= ONIG_INFINITE_DISTANCE
&&
3660 (end
- range
) >= reg
->threshold_len
) {
3662 sch_start
= s
+ reg
->dmax
;
3663 if (sch_start
> end
) sch_start
= (UChar
* )end
;
3664 if (backward_search_range(reg
, str
, end
, sch_start
, range
, adjrange
,
3672 prev
= onigenc_get_prev_char_head(reg
->enc
, str
, s
);
3673 MATCH_AND_RETURN_CHECK(orig_start
);
3676 } while (s
>= range
);
3679 else { /* check only. */
3680 if ((end
- range
) < reg
->threshold_len
) goto mismatch
;
3683 if (reg
->dmax
!= 0) {
3684 if (reg
->dmax
== ONIG_INFINITE_DISTANCE
)
3685 sch_start
= (UChar
* )end
;
3687 sch_start
+= reg
->dmax
;
3688 if (sch_start
> end
) sch_start
= (UChar
* )end
;
3690 sch_start
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg
->enc
,
3694 if (backward_search_range(reg
, str
, end
, sch_start
, range
, adjrange
,
3695 &low
, &high
) <= 0) goto mismatch
;
3700 prev
= onigenc_get_prev_char_head(reg
->enc
, str
, s
);
3701 MATCH_AND_RETURN_CHECK(orig_start
);
3703 } while (s
>= range
);
3707 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3708 if (IS_FIND_LONGEST(reg
->options
)) {
3709 if (msa
.best_len
>= 0) {
3718 MATCH_ARG_FREE(msa
);
3719 ONIG_STATE_DEC_THREAD(reg
);
3721 /* If result is mismatch and no FIND_NOT_EMPTY option,
3722 then the region is not setted in match_at(). */
3723 if (IS_FIND_NOT_EMPTY(reg
->options
) && region
3724 #ifdef USE_POSIX_API_REGION_OPTION
3725 && !IS_POSIX_REGION(option
)
3728 onig_region_clear(region
);
3732 if (r
!= ONIG_MISMATCH
)
3733 fprintf(stderr
, "onig_search: error %d\n", r
);
3740 ONIG_STATE_DEC_THREAD(reg
);
3742 if (r
!= ONIG_MISMATCH
)
3743 fprintf(stderr
, "onig_search: error %d\n", r
);
3748 ONIG_STATE_DEC_THREAD(reg
);
3749 MATCH_ARG_FREE(msa
);
3754 onig_get_encoding(regex_t
* reg
)
3759 extern OnigOptionType
3760 onig_get_options(regex_t
* reg
)
3762 return reg
->options
;
3765 extern OnigCaseFoldType
3766 onig_get_case_fold_flag(regex_t
* reg
)
3768 return reg
->case_fold_flag
;
3771 extern OnigSyntaxType
*
3772 onig_get_syntax(regex_t
* reg
)
3778 onig_number_of_captures(regex_t
* reg
)
3780 return reg
->num_mem
;
3784 onig_number_of_capture_histories(regex_t
* reg
)
3786 #ifdef USE_CAPTURE_HISTORY
3790 for (i
= 0; i
<= ONIG_MAX_CAPTURE_HISTORY_GROUP
; i
++) {
3791 if (BIT_STATUS_AT(reg
->capture_history
, i
) != 0)
3801 onig_copy_encoding(OnigEncoding to
, OnigEncoding from
)