* gc.c: __size__ removed. use the length of __members__ instead.
[ruby-svn.git] / regexec.c
blob684c5c86d4d7eee35374fcd1aed621f95b8ff90a
1 /**********************************************************************
2 regexec.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
30 #include "regint.h"
32 /* #define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
34 #ifdef USE_CRNL_AS_LINE_TERMINATOR
35 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
36 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
37 ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
38 #endif
40 #ifdef USE_CAPTURE_HISTORY
41 static void history_tree_free(OnigCaptureTreeNode* node);
43 static void
44 history_tree_clear(OnigCaptureTreeNode* node)
46 int i;
48 if (IS_NOT_NULL(node)) {
49 for (i = 0; i < node->num_childs; i++) {
50 if (IS_NOT_NULL(node->childs[i])) {
51 history_tree_free(node->childs[i]);
54 for (i = 0; i < node->allocated; i++) {
55 node->childs[i] = (OnigCaptureTreeNode* )0;
57 node->num_childs = 0;
58 node->beg = ONIG_REGION_NOTPOS;
59 node->end = ONIG_REGION_NOTPOS;
60 node->group = -1;
64 static void
65 history_tree_free(OnigCaptureTreeNode* node)
67 history_tree_clear(node);
68 xfree(node);
71 static void
72 history_root_free(OnigRegion* r)
74 if (IS_NOT_NULL(r->history_root)) {
75 history_tree_free(r->history_root);
76 r->history_root = (OnigCaptureTreeNode* )0;
80 static OnigCaptureTreeNode*
81 history_node_new(void)
83 OnigCaptureTreeNode* node;
85 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
86 CHECK_NULL_RETURN(node);
87 node->childs = (OnigCaptureTreeNode** )0;
88 node->allocated = 0;
89 node->num_childs = 0;
90 node->group = -1;
91 node->beg = ONIG_REGION_NOTPOS;
92 node->end = ONIG_REGION_NOTPOS;
94 return node;
97 static int
98 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
100 #define HISTORY_TREE_INIT_ALLOC_SIZE 8
102 if (parent->num_childs >= parent->allocated) {
103 int n, i;
105 if (IS_NULL(parent->childs)) {
106 n = HISTORY_TREE_INIT_ALLOC_SIZE;
107 parent->childs =
108 (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
110 else {
111 n = parent->allocated * 2;
112 parent->childs =
113 (OnigCaptureTreeNode** )xrealloc(parent->childs,
114 sizeof(OnigCaptureTreeNode*) * n);
116 CHECK_NULL_RETURN_MEMERR(parent->childs);
117 for (i = parent->allocated; i < n; i++) {
118 parent->childs[i] = (OnigCaptureTreeNode* )0;
120 parent->allocated = n;
123 parent->childs[parent->num_childs] = child;
124 parent->num_childs++;
125 return 0;
128 static OnigCaptureTreeNode*
129 history_tree_clone(OnigCaptureTreeNode* node)
131 int i;
132 OnigCaptureTreeNode *clone, *child;
134 clone = history_node_new();
135 CHECK_NULL_RETURN(clone);
137 clone->beg = node->beg;
138 clone->end = node->end;
139 for (i = 0; i < node->num_childs; i++) {
140 child = history_tree_clone(node->childs[i]);
141 if (IS_NULL(child)) {
142 history_tree_free(clone);
143 return (OnigCaptureTreeNode* )0;
145 history_tree_add_child(clone, child);
148 return clone;
151 extern OnigCaptureTreeNode*
152 onig_get_capture_tree(OnigRegion* region)
154 return region->history_root;
156 #endif /* USE_CAPTURE_HISTORY */
158 extern void
159 onig_region_clear(OnigRegion* region)
161 int i;
163 for (i = 0; i < region->num_regs; i++) {
164 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
166 #ifdef USE_CAPTURE_HISTORY
167 history_root_free(region);
168 #endif
171 extern int
172 onig_region_resize(OnigRegion* region, int n)
174 region->num_regs = n;
176 if (n < ONIG_NREGION)
177 n = ONIG_NREGION;
179 if (region->allocated == 0) {
180 region->beg = (int* )xmalloc(n * sizeof(int));
181 if (region->beg == 0)
182 return ONIGERR_MEMORY;
184 region->end = (int* )xmalloc(n * sizeof(int));
185 if (region->end == 0) {
186 xfree(region->beg);
187 return ONIGERR_MEMORY;
190 region->allocated = n;
192 else if (region->allocated < n) {
193 int *tmp;
195 region->allocated = 0;
196 tmp = (int* )xrealloc(region->beg, n * sizeof(int));
197 if (tmp == 0) {
198 xfree(region->beg);
199 xfree(region->end);
200 return ONIGERR_MEMORY;
202 region->beg = tmp;
203 tmp = (int* )xrealloc(region->end, n * sizeof(int));
204 if (tmp == 0) {
205 xfree(region->beg);
206 return ONIGERR_MEMORY;
208 region->end = tmp;
210 if (region->beg == 0 || region->end == 0)
211 return ONIGERR_MEMORY;
213 region->allocated = n;
216 return 0;
219 static int
220 onig_region_resize_clear(OnigRegion* region, int n)
222 int r;
224 r = onig_region_resize(region, n);
225 if (r != 0) return r;
226 onig_region_clear(region);
227 return 0;
230 extern int
231 onig_region_set(OnigRegion* region, int at, int beg, int end)
233 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
235 if (at >= region->allocated) {
236 int r = onig_region_resize(region, at + 1);
237 if (r < 0) return r;
240 region->beg[at] = beg;
241 region->end[at] = end;
242 return 0;
245 extern void
246 onig_region_init(OnigRegion* region)
248 region->num_regs = 0;
249 region->allocated = 0;
250 region->beg = (int* )0;
251 region->end = (int* )0;
252 region->history_root = (OnigCaptureTreeNode* )0;
255 extern OnigRegion*
256 onig_region_new(void)
258 OnigRegion* r;
260 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
261 if (r)
262 onig_region_init(r);
263 return r;
266 extern void
267 onig_region_free(OnigRegion* r, int free_self)
269 if (r) {
270 if (r->allocated > 0) {
271 if (r->beg) xfree(r->beg);
272 if (r->end) xfree(r->end);
273 r->allocated = 0;
275 #ifdef USE_CAPTURE_HISTORY
276 history_root_free(r);
277 #endif
278 if (free_self) xfree(r);
282 extern void
283 onig_region_copy(OnigRegion* to, OnigRegion* from)
285 #define RREGC_SIZE (sizeof(int) * from->num_regs)
286 int i;
288 if (to == from) return;
290 onig_region_resize(to, from->num_regs);
291 for (i = 0; i < from->num_regs; i++) {
292 to->beg[i] = from->beg[i];
293 to->end[i] = from->end[i];
295 to->num_regs = from->num_regs;
297 #ifdef USE_CAPTURE_HISTORY
298 history_root_free(to);
300 if (IS_NOT_NULL(from->history_root)) {
301 to->history_root = history_tree_clone(from->history_root);
303 #endif
307 /** stack **/
308 #define INVALID_STACK_INDEX -1
310 /* stack type */
311 /* used by normal-POP */
312 #define STK_ALT 0x0001
313 #define STK_LOOK_BEHIND_NOT 0x0002
314 #define STK_POS_NOT 0x0003
315 /* handled by normal-POP */
316 #define STK_MEM_START 0x0100
317 #define STK_MEM_END 0x8200
318 #define STK_REPEAT_INC 0x0300
319 #define STK_STATE_CHECK_MARK 0x1000
320 /* avoided by normal-POP */
321 #define STK_NULL_CHECK_START 0x3000
322 #define STK_NULL_CHECK_END 0x5000 /* for recursive call */
323 #define STK_MEM_END_MARK 0x8400
324 #define STK_POS 0x0500 /* used when POP-POS */
325 #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
326 #define STK_REPEAT 0x0700
327 #define STK_CALL_FRAME 0x0800
328 #define STK_RETURN 0x0900
329 #define STK_VOID 0x0a00 /* for fill a blank */
331 /* stack type check mask */
332 #define STK_MASK_POP_USED 0x00ff
333 #define STK_MASK_TO_VOID_TARGET 0x10ff
334 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
336 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
337 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
338 (msa).stack_p = (void* )0;\
339 (msa).options = (arg_option);\
340 (msa).region = (arg_region);\
341 (msa).start = (arg_start);\
342 (msa).best_len = ONIG_MISMATCH;\
343 } while(0)
344 #else
345 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
346 (msa).stack_p = (void* )0;\
347 (msa).options = (arg_option);\
348 (msa).region = (arg_region);\
349 (msa).start = (arg_start);\
350 } while(0)
351 #endif
353 #ifdef USE_COMBINATION_EXPLOSION_CHECK
355 #define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
357 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
358 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
359 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
360 offset = ((offset) * (state_num)) >> 3;\
361 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
362 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\
363 (msa).state_check_buff = (void* )xmalloc(size);\
364 CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\
366 else \
367 (msa).state_check_buff = (void* )xalloca(size);\
368 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
369 (size_t )(size - (offset))); \
370 (msa).state_check_buff_size = size;\
372 else {\
373 (msa).state_check_buff = (void* )0;\
374 (msa).state_check_buff_size = 0;\
377 else {\
378 (msa).state_check_buff = (void* )0;\
379 (msa).state_check_buff_size = 0;\
381 } while(0)
383 #define MATCH_ARG_FREE(msa) do {\
384 if ((msa).stack_p) xfree((msa).stack_p);\
385 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
386 if ((msa).state_check_buff) xfree((msa).state_check_buff);\
388 } while(0)
389 #else
390 #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
391 #endif
395 #define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
396 if (msa->stack_p) {\
397 alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\
398 stk_alloc = (OnigStackType* )(msa->stack_p);\
399 stk_base = stk_alloc;\
400 stk = stk_base;\
401 stk_end = stk_base + msa->stack_n;\
403 else {\
404 alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\
405 + sizeof(OnigStackType) * (stack_num));\
406 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\
407 stk_base = stk_alloc;\
408 stk = stk_base;\
409 stk_end = stk_base + (stack_num);\
411 } while(0)
413 #define STACK_SAVE do{\
414 if (stk_base != stk_alloc) {\
415 msa->stack_p = stk_base;\
416 msa->stack_n = stk_end - stk_base;\
418 } while(0)
420 static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
422 extern unsigned int
423 onig_get_match_stack_limit_size(void)
425 return MatchStackLimitSize;
428 extern int
429 onig_set_match_stack_limit_size(unsigned int size)
431 MatchStackLimitSize = size;
432 return 0;
435 static int
436 stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
437 OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
439 unsigned int n;
440 OnigStackType *x, *stk_base, *stk_end, *stk;
442 stk_base = *arg_stk_base;
443 stk_end = *arg_stk_end;
444 stk = *arg_stk;
446 n = stk_end - stk_base;
447 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
448 x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
449 if (IS_NULL(x)) {
450 STACK_SAVE;
451 return ONIGERR_MEMORY;
453 xmemcpy(x, stk_base, n * sizeof(OnigStackType));
454 n *= 2;
456 else {
457 n *= 2;
458 if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) {
459 if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize)
460 return ONIGERR_MATCH_STACK_LIMIT_OVER;
461 else
462 n = MatchStackLimitSize;
464 x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
465 if (IS_NULL(x)) {
466 STACK_SAVE;
467 return ONIGERR_MEMORY;
470 *arg_stk = x + (stk - stk_base);
471 *arg_stk_base = x;
472 *arg_stk_end = x + n;
473 return 0;
476 #define STACK_ENSURE(n) do {\
477 if (stk_end - stk < (n)) {\
478 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
479 if (r != 0) { STACK_SAVE; return r; } \
481 } while(0)
483 #define STACK_AT(index) (stk_base + (index))
484 #define GET_STACK_INDEX(stk) ((stk) - stk_base)
486 #define STACK_PUSH_TYPE(stack_type) do {\
487 STACK_ENSURE(1);\
488 stk->type = (stack_type);\
489 STACK_INC;\
490 } while(0)
492 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
494 #ifdef USE_COMBINATION_EXPLOSION_CHECK
495 #define STATE_CHECK_POS(s,snum) \
496 (((s) - str) * num_comb_exp_check + ((snum) - 1))
497 #define STATE_CHECK_VAL(v,snum) do {\
498 if (state_check_buff != NULL) {\
499 int x = STATE_CHECK_POS(s,snum);\
500 (v) = state_check_buff[x/8] & (1<<(x%8));\
502 else (v) = 0;\
503 } while(0)
506 #define ELSE_IF_STATE_CHECK_MARK(stk) \
507 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
508 int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
509 state_check_buff[x/8] |= (1<<(x%8)); \
512 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
513 STACK_ENSURE(1);\
514 stk->type = (stack_type);\
515 stk->u.state.pcode = (pat);\
516 stk->u.state.pstr = (s);\
517 stk->u.state.pstr_prev = (sprev);\
518 stk->u.state.state_check = 0;\
519 STACK_INC;\
520 } while(0)
522 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
523 stk->type = (stack_type);\
524 stk->u.state.pcode = (pat);\
525 stk->u.state.state_check = 0;\
526 STACK_INC;\
527 } while(0)
529 #define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\
530 STACK_ENSURE(1);\
531 stk->type = STK_ALT;\
532 stk->u.state.pcode = (pat);\
533 stk->u.state.pstr = (s);\
534 stk->u.state.pstr_prev = (sprev);\
535 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
536 STACK_INC;\
537 } while(0)
539 #define STACK_PUSH_STATE_CHECK(s,snum) do {\
540 if (state_check_buff != NULL) {\
541 STACK_ENSURE(1);\
542 stk->type = STK_STATE_CHECK_MARK;\
543 stk->u.state.pstr = (s);\
544 stk->u.state.state_check = (snum);\
545 STACK_INC;\
547 } while(0)
549 #else /* USE_COMBINATION_EXPLOSION_CHECK */
551 #define ELSE_IF_STATE_CHECK_MARK(stk)
553 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
554 STACK_ENSURE(1);\
555 stk->type = (stack_type);\
556 stk->u.state.pcode = (pat);\
557 stk->u.state.pstr = (s);\
558 stk->u.state.pstr_prev = (sprev);\
559 STACK_INC;\
560 } while(0)
562 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
563 stk->type = (stack_type);\
564 stk->u.state.pcode = (pat);\
565 STACK_INC;\
566 } while(0)
567 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
569 #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
570 #define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)
571 #define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev)
572 #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
573 #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \
574 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev)
576 #define STACK_PUSH_REPEAT(id, pat) do {\
577 STACK_ENSURE(1);\
578 stk->type = STK_REPEAT;\
579 stk->u.repeat.num = (id);\
580 stk->u.repeat.pcode = (pat);\
581 stk->u.repeat.count = 0;\
582 STACK_INC;\
583 } while(0)
585 #define STACK_PUSH_REPEAT_INC(sindex) do {\
586 STACK_ENSURE(1);\
587 stk->type = STK_REPEAT_INC;\
588 stk->u.repeat_inc.si = (sindex);\
589 STACK_INC;\
590 } while(0)
592 #define STACK_PUSH_MEM_START(mnum, s) do {\
593 STACK_ENSURE(1);\
594 stk->type = STK_MEM_START;\
595 stk->u.mem.num = (mnum);\
596 stk->u.mem.pstr = (s);\
597 stk->u.mem.start = mem_start_stk[mnum];\
598 stk->u.mem.end = mem_end_stk[mnum];\
599 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
600 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
601 STACK_INC;\
602 } while(0)
604 #define STACK_PUSH_MEM_END(mnum, s) do {\
605 STACK_ENSURE(1);\
606 stk->type = STK_MEM_END;\
607 stk->u.mem.num = (mnum);\
608 stk->u.mem.pstr = (s);\
609 stk->u.mem.start = mem_start_stk[mnum];\
610 stk->u.mem.end = mem_end_stk[mnum];\
611 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
612 STACK_INC;\
613 } while(0)
615 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
616 STACK_ENSURE(1);\
617 stk->type = STK_MEM_END_MARK;\
618 stk->u.mem.num = (mnum);\
619 STACK_INC;\
620 } while(0)
622 #define STACK_GET_MEM_START(mnum, k) do {\
623 int level = 0;\
624 k = stk;\
625 while (k > stk_base) {\
626 k--;\
627 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
628 && k->u.mem.num == (mnum)) {\
629 level++;\
631 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
632 if (level == 0) break;\
633 level--;\
636 } while(0)
638 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
639 int level = 0;\
640 while (k < stk) {\
641 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
642 if (level == 0) (start) = k->u.mem.pstr;\
643 level++;\
645 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
646 level--;\
647 if (level == 0) {\
648 (end) = k->u.mem.pstr;\
649 break;\
652 k++;\
654 } while(0)
656 #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
657 STACK_ENSURE(1);\
658 stk->type = STK_NULL_CHECK_START;\
659 stk->u.null_check.num = (cnum);\
660 stk->u.null_check.pstr = (s);\
661 STACK_INC;\
662 } while(0)
664 #define STACK_PUSH_NULL_CHECK_END(cnum) do {\
665 STACK_ENSURE(1);\
666 stk->type = STK_NULL_CHECK_END;\
667 stk->u.null_check.num = (cnum);\
668 STACK_INC;\
669 } while(0)
671 #define STACK_PUSH_CALL_FRAME(pat) do {\
672 STACK_ENSURE(1);\
673 stk->type = STK_CALL_FRAME;\
674 stk->u.call_frame.ret_addr = (pat);\
675 STACK_INC;\
676 } while(0)
678 #define STACK_PUSH_RETURN do {\
679 STACK_ENSURE(1);\
680 stk->type = STK_RETURN;\
681 STACK_INC;\
682 } while(0)
685 #ifdef ONIG_DEBUG
686 #define STACK_BASE_CHECK(p, at) \
687 if ((p) < stk_base) {\
688 fprintf(stderr, "at %s\n", at);\
689 goto stack_error;\
691 #else
692 #define STACK_BASE_CHECK(p, at)
693 #endif
695 #define STACK_POP_ONE do {\
696 stk--;\
697 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
698 } while(0)
700 #define STACK_POP do {\
701 switch (pop_level) {\
702 case STACK_POP_LEVEL_FREE:\
703 while (1) {\
704 stk--;\
705 STACK_BASE_CHECK(stk, "STACK_POP"); \
706 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
707 ELSE_IF_STATE_CHECK_MARK(stk);\
709 break;\
710 case STACK_POP_LEVEL_MEM_START:\
711 while (1) {\
712 stk--;\
713 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
714 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
715 else if (stk->type == STK_MEM_START) {\
716 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
717 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
719 ELSE_IF_STATE_CHECK_MARK(stk);\
721 break;\
722 default:\
723 while (1) {\
724 stk--;\
725 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
726 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
727 else if (stk->type == STK_MEM_START) {\
728 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
729 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
731 else if (stk->type == STK_REPEAT_INC) {\
732 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
734 else if (stk->type == STK_MEM_END) {\
735 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
736 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
738 ELSE_IF_STATE_CHECK_MARK(stk);\
740 break;\
742 } while(0)
744 #define STACK_POP_TIL_POS_NOT do {\
745 while (1) {\
746 stk--;\
747 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
748 if (stk->type == STK_POS_NOT) break;\
749 else if (stk->type == STK_MEM_START) {\
750 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
751 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
753 else if (stk->type == STK_REPEAT_INC) {\
754 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
756 else if (stk->type == STK_MEM_END) {\
757 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
758 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
760 ELSE_IF_STATE_CHECK_MARK(stk);\
762 } while(0)
764 #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
765 while (1) {\
766 stk--;\
767 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
768 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
769 else if (stk->type == STK_MEM_START) {\
770 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
771 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
773 else if (stk->type == STK_REPEAT_INC) {\
774 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
776 else if (stk->type == STK_MEM_END) {\
777 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
778 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
780 ELSE_IF_STATE_CHECK_MARK(stk);\
782 } while(0)
784 #define STACK_POS_END(k) do {\
785 k = stk;\
786 while (1) {\
787 k--;\
788 STACK_BASE_CHECK(k, "STACK_POS_END"); \
789 if (IS_TO_VOID_TARGET(k)) {\
790 k->type = STK_VOID;\
792 else if (k->type == STK_POS) {\
793 k->type = STK_VOID;\
794 break;\
797 } while(0)
799 #define STACK_STOP_BT_END do {\
800 OnigStackType *k = stk;\
801 while (1) {\
802 k--;\
803 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
804 if (IS_TO_VOID_TARGET(k)) {\
805 k->type = STK_VOID;\
807 else if (k->type == STK_STOP_BT) {\
808 k->type = STK_VOID;\
809 break;\
812 } while(0)
814 #define STACK_NULL_CHECK(isnull,id,s) do {\
815 OnigStackType* k = stk;\
816 while (1) {\
817 k--;\
818 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
819 if (k->type == STK_NULL_CHECK_START) {\
820 if (k->u.null_check.num == (id)) {\
821 (isnull) = (k->u.null_check.pstr == (s));\
822 break;\
826 } while(0)
828 #define STACK_NULL_CHECK_REC(isnull,id,s) do {\
829 int level = 0;\
830 OnigStackType* k = stk;\
831 while (1) {\
832 k--;\
833 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
834 if (k->type == STK_NULL_CHECK_START) {\
835 if (k->u.null_check.num == (id)) {\
836 if (level == 0) {\
837 (isnull) = (k->u.null_check.pstr == (s));\
838 break;\
840 else level--;\
843 else if (k->type == STK_NULL_CHECK_END) {\
844 level++;\
847 } while(0)
849 #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
850 OnigStackType* k = stk;\
851 while (1) {\
852 k--;\
853 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
854 if (k->type == STK_NULL_CHECK_START) {\
855 if (k->u.null_check.num == (id)) {\
856 if (k->u.null_check.pstr != (s)) {\
857 (isnull) = 0;\
858 break;\
860 else {\
861 UChar* endp;\
862 (isnull) = 1;\
863 while (k < stk) {\
864 if (k->type == STK_MEM_START) {\
865 if (k->u.mem.end == INVALID_STACK_INDEX) {\
866 (isnull) = 0; break;\
868 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
869 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
870 else\
871 endp = (UChar* )k->u.mem.end;\
872 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
873 (isnull) = 0; break;\
875 else if (endp != s) {\
876 (isnull) = -1; /* empty, but position changed */ \
879 k++;\
881 break;\
886 } while(0)
888 #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
889 int level = 0;\
890 OnigStackType* k = stk;\
891 while (1) {\
892 k--;\
893 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
894 if (k->type == STK_NULL_CHECK_START) {\
895 if (k->u.null_check.num == (id)) {\
896 if (level == 0) {\
897 if (k->u.null_check.pstr != (s)) {\
898 (isnull) = 0;\
899 break;\
901 else {\
902 UChar* endp;\
903 (isnull) = 1;\
904 while (k < stk) {\
905 if (k->type == STK_MEM_START) {\
906 if (k->u.mem.end == INVALID_STACK_INDEX) {\
907 (isnull) = 0; break;\
909 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
910 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
911 else\
912 endp = (UChar* )k->u.mem.end;\
913 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
914 (isnull) = 0; break;\
916 else if (endp != s) {\
917 (isnull) = -1; /* empty, but position changed */ \
920 k++;\
922 break;\
925 else {\
926 level--;\
930 else if (k->type == STK_NULL_CHECK_END) {\
931 if (k->u.null_check.num == (id)) level++;\
934 } while(0)
936 #define STACK_GET_REPEAT(id, k) do {\
937 int level = 0;\
938 k = stk;\
939 while (1) {\
940 k--;\
941 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
942 if (k->type == STK_REPEAT) {\
943 if (level == 0) {\
944 if (k->u.repeat.num == (id)) {\
945 break;\
949 else if (k->type == STK_CALL_FRAME) level--;\
950 else if (k->type == STK_RETURN) level++;\
952 } while(0)
954 #define STACK_RETURN(addr) do {\
955 int level = 0;\
956 OnigStackType* k = stk;\
957 while (1) {\
958 k--;\
959 STACK_BASE_CHECK(k, "STACK_RETURN"); \
960 if (k->type == STK_CALL_FRAME) {\
961 if (level == 0) {\
962 (addr) = k->u.call_frame.ret_addr;\
963 break;\
965 else level--;\
967 else if (k->type == STK_RETURN)\
968 level++;\
970 } while(0)
973 #define STRING_CMP(s1,s2,len) do {\
974 while (len-- > 0) {\
975 if (*s1++ != *s2++) goto fail;\
977 } while(0)
979 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\
980 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
981 goto fail; \
982 } while(0)
984 static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
985 UChar* s1, UChar** ps2, int mblen)
987 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
988 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
989 UChar *p1, *p2, *end1, *s2, *end2;
990 int len1, len2;
992 s2 = *ps2;
993 end1 = s1 + mblen;
994 end2 = s2 + mblen;
995 while (s1 < end1) {
996 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1);
997 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2);
998 if (len1 != len2) return 0;
999 p1 = buf1;
1000 p2 = buf2;
1001 while (len1-- > 0) {
1002 if (*p1 != *p2) return 0;
1003 p1++;
1004 p2++;
1008 *ps2 = s2;
1009 return 1;
1012 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1013 is_fail = 0;\
1014 while (len-- > 0) {\
1015 if (*s1++ != *s2++) {\
1016 is_fail = 1; break;\
1019 } while(0)
1021 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\
1022 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
1023 is_fail = 1; \
1024 else \
1025 is_fail = 0; \
1026 } while(0)
1029 #define IS_EMPTY_STR (str == end)
1030 #define ON_STR_BEGIN(s) ((s) == str)
1031 #define ON_STR_END(s) ((s) == end)
1032 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1033 #define DATA_ENSURE_CHECK1 (s < right_range)
1034 #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1035 #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1036 #else
1037 #define DATA_ENSURE_CHECK1 (s < end)
1038 #define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1039 #define DATA_ENSURE(n) if (s + (n) > end) goto fail
1040 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1043 #ifdef USE_CAPTURE_HISTORY
1044 static int
1045 make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
1046 OnigStackType* stk_top, UChar* str, regex_t* reg)
1048 int n, r;
1049 OnigCaptureTreeNode* child;
1050 OnigStackType* k = *kp;
1052 while (k < stk_top) {
1053 if (k->type == STK_MEM_START) {
1054 n = k->u.mem.num;
1055 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1056 BIT_STATUS_AT(reg->capture_history, n) != 0) {
1057 child = history_node_new();
1058 CHECK_NULL_RETURN_MEMERR(child);
1059 child->group = n;
1060 child->beg = (int )(k->u.mem.pstr - str);
1061 r = history_tree_add_child(node, child);
1062 if (r != 0) return r;
1063 *kp = (k + 1);
1064 r = make_capture_history_tree(child, kp, stk_top, str, reg);
1065 if (r != 0) return r;
1067 k = *kp;
1068 child->end = (int )(k->u.mem.pstr - str);
1071 else if (k->type == STK_MEM_END) {
1072 if (k->u.mem.num == node->group) {
1073 node->end = (int )(k->u.mem.pstr - str);
1074 *kp = k;
1075 return 0;
1078 k++;
1081 return 1; /* 1: root node ending. */
1083 #endif
1085 #ifdef USE_BACKREF_WITH_LEVEL
1086 static int mem_is_in_memp(int mem, int num, UChar* memp)
1088 int i;
1089 MemNumType m;
1091 for (i = 0; i < num; i++) {
1092 GET_MEMNUM_INC(m, memp);
1093 if (mem == (int )m) return 1;
1095 return 0;
1098 static int backref_match_at_nested_level(regex_t* reg
1099 , OnigStackType* top, OnigStackType* stk_base
1100 , int ignore_case, int case_fold_flag
1101 , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
1103 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
1104 int level;
1105 OnigStackType* k;
1107 level = 0;
1108 k = top;
1109 k--;
1110 while (k >= stk_base) {
1111 if (k->type == STK_CALL_FRAME) {
1112 level--;
1114 else if (k->type == STK_RETURN) {
1115 level++;
1117 else if (level == nest) {
1118 if (k->type == STK_MEM_START) {
1119 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1120 pstart = k->u.mem.pstr;
1121 if (pend != NULL_UCHARP) {
1122 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
1123 p = pstart;
1124 ss = *s;
1126 if (ignore_case != 0) {
1127 if (string_cmp_ic(reg->enc, case_fold_flag,
1128 pstart, &ss, (int )(pend - pstart)) == 0)
1129 return 0; /* or goto next_mem; */
1131 else {
1132 while (p < pend) {
1133 if (*p++ != *ss++) return 0; /* or goto next_mem; */
1137 *s = ss;
1138 return 1;
1142 else if (k->type == STK_MEM_END) {
1143 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1144 pend = k->u.mem.pstr;
1148 k--;
1151 return 0;
1153 #endif /* USE_BACKREF_WITH_LEVEL */
1156 #ifdef ONIG_DEBUG_STATISTICS
1158 #define USE_TIMEOFDAY
1160 #ifdef USE_TIMEOFDAY
1161 #ifdef HAVE_SYS_TIME_H
1162 #include <sys/time.h>
1163 #endif
1164 #ifdef HAVE_UNISTD_H
1165 #include <unistd.h>
1166 #endif
1167 static struct timeval ts, te;
1168 #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
1169 #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
1170 (((te).tv_sec - (ts).tv_sec)*1000000))
1171 #else
1172 #ifdef HAVE_SYS_TIMES_H
1173 #include <sys/times.h>
1174 #endif
1175 static struct tms ts, te;
1176 #define GETTIME(t) times(&(t))
1177 #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
1178 #endif
1180 static int OpCounter[256];
1181 static int OpPrevCounter[256];
1182 static unsigned long OpTime[256];
1183 static int OpCurr = OP_FINISH;
1184 static int OpPrevTarget = OP_FAIL;
1185 static int MaxStackDepth = 0;
1187 #define MOP_IN(opcode) do {\
1188 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
1189 OpCurr = opcode;\
1190 OpCounter[opcode]++;\
1191 GETTIME(ts);\
1192 } while(0)
1194 #define MOP_OUT do {\
1195 GETTIME(te);\
1196 OpTime[OpCurr] += TIMEDIFF(te, ts);\
1197 } while(0)
1199 extern void
1200 onig_statistics_init(void)
1202 int i;
1203 for (i = 0; i < 256; i++) {
1204 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
1206 MaxStackDepth = 0;
1209 extern void
1210 onig_print_statistics(FILE* f)
1212 int i;
1213 fprintf(f, " count prev time\n");
1214 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
1215 fprintf(f, "%8d: %8d: %10ld: %s\n",
1216 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
1218 fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
1221 #define STACK_INC do {\
1222 stk++;\
1223 if (stk - stk_base > MaxStackDepth) \
1224 MaxStackDepth = stk - stk_base;\
1225 } while(0)
1227 #else
1228 #define STACK_INC stk++
1230 #define MOP_IN(opcode)
1231 #define MOP_OUT
1232 #endif
1235 /* matching region of POSIX API */
1236 typedef int regoff_t;
1238 typedef struct {
1239 regoff_t rm_so;
1240 regoff_t rm_eo;
1241 } posix_regmatch_t;
1243 /* match data(str - end) from position (sstart). */
1244 /* if sstart == str then set sprev to NULL. */
1245 static int
1246 match_at(regex_t* reg, const UChar* str, const UChar* end,
1247 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1248 const UChar* right_range,
1249 #endif
1250 const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
1252 static UChar FinishCode[] = { OP_FINISH };
1254 int i, n, num_mem, best_len, pop_level;
1255 LengthType tlen, tlen2;
1256 MemNumType mem;
1257 RelAddrType addr;
1258 OnigOptionType option = reg->options;
1259 OnigEncoding encode = reg->enc;
1260 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
1261 UChar *s, *q, *sbegin;
1262 UChar *p = reg->p;
1263 char *alloca_base;
1264 OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
1265 OnigStackType *stkp; /* used as any purpose. */
1266 OnigStackIndex si;
1267 OnigStackIndex *repeat_stk;
1268 OnigStackIndex *mem_start_stk, *mem_end_stk;
1269 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1270 int scv;
1271 unsigned char* state_check_buff = msa->state_check_buff;
1272 int num_comb_exp_check = reg->num_comb_exp_check;
1273 #endif
1274 n = reg->num_repeat + reg->num_mem * 2;
1276 STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
1277 pop_level = reg->stack_pop_level;
1278 num_mem = reg->num_mem;
1279 repeat_stk = (OnigStackIndex* )alloca_base;
1281 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
1282 mem_end_stk = mem_start_stk + num_mem;
1283 mem_start_stk--; /* for index start from 1,
1284 mem_start_stk[1]..mem_start_stk[num_mem] */
1285 mem_end_stk--; /* for index start from 1,
1286 mem_end_stk[1]..mem_end_stk[num_mem] */
1287 for (i = 1; i <= num_mem; i++) {
1288 mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
1291 #ifdef ONIG_DEBUG_MATCH
1292 fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n",
1293 (int )str, (int )end, (int )sstart, (int )sprev);
1294 fprintf(stderr, "size: %d, start offset: %d\n",
1295 (int )(end - str), (int )(sstart - str));
1296 #endif
1298 STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */
1299 best_len = ONIG_MISMATCH;
1300 s = (UChar* )sstart;
1301 while (1) {
1302 #ifdef ONIG_DEBUG_MATCH
1304 UChar *q, *bp, buf[50];
1305 int len;
1306 fprintf(stderr, "%4d> \"", (int )(s - str));
1307 bp = buf;
1308 for (i = 0, q = s; i < 7 && q < end; i++) {
1309 len = enclen(encode, q);
1310 while (len-- > 0) *bp++ = *q++;
1312 if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
1313 else { xmemcpy(bp, "\"", 1); bp += 1; }
1314 *bp = 0;
1315 fputs((char* )buf, stderr);
1316 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
1317 onig_print_compiled_byte_code(stderr, p, NULL, encode);
1318 fprintf(stderr, "\n");
1320 #endif
1322 sbegin = s;
1323 switch (*p++) {
1324 case OP_END: MOP_IN(OP_END);
1325 n = s - sstart;
1326 if (n > best_len) {
1327 OnigRegion* region;
1328 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1329 if (IS_FIND_LONGEST(option)) {
1330 if (n > msa->best_len) {
1331 msa->best_len = n;
1332 msa->best_s = (UChar* )sstart;
1334 else
1335 goto end_best_len;
1337 #endif
1338 best_len = n;
1339 region = msa->region;
1340 if (region) {
1341 #ifdef USE_POSIX_API_REGION_OPTION
1342 if (IS_POSIX_REGION(msa->options)) {
1343 posix_regmatch_t* rmt = (posix_regmatch_t* )region;
1345 rmt[0].rm_so = sstart - str;
1346 rmt[0].rm_eo = s - str;
1347 for (i = 1; i <= num_mem; i++) {
1348 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1349 if (BIT_STATUS_AT(reg->bt_mem_start, i))
1350 rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
1351 else
1352 rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str;
1354 rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i)
1355 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1356 : (UChar* )((void* )mem_end_stk[i])) - str;
1358 else {
1359 rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
1363 else {
1364 #endif /* USE_POSIX_API_REGION_OPTION */
1365 region->beg[0] = sstart - str;
1366 region->end[0] = s - str;
1367 for (i = 1; i <= num_mem; i++) {
1368 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1369 if (BIT_STATUS_AT(reg->bt_mem_start, i))
1370 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
1371 else
1372 region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
1374 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
1375 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1376 : (UChar* )((void* )mem_end_stk[i])) - str;
1378 else {
1379 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
1383 #ifdef USE_CAPTURE_HISTORY
1384 if (reg->capture_history != 0) {
1385 int r;
1386 OnigCaptureTreeNode* node;
1388 if (IS_NULL(region->history_root)) {
1389 region->history_root = node = history_node_new();
1390 CHECK_NULL_RETURN_MEMERR(node);
1392 else {
1393 node = region->history_root;
1394 history_tree_clear(node);
1397 node->group = 0;
1398 node->beg = sstart - str;
1399 node->end = s - str;
1401 stkp = stk_base;
1402 r = make_capture_history_tree(region->history_root, &stkp,
1403 stk, (UChar* )str, reg);
1404 if (r < 0) {
1405 best_len = r; /* error code */
1406 goto finish;
1409 #endif /* USE_CAPTURE_HISTORY */
1410 #ifdef USE_POSIX_API_REGION_OPTION
1411 } /* else IS_POSIX_REGION() */
1412 #endif
1413 } /* if (region) */
1414 } /* n > best_len */
1416 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1417 end_best_len:
1418 #endif
1419 MOP_OUT;
1421 if (IS_FIND_CONDITION(option)) {
1422 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
1423 best_len = ONIG_MISMATCH;
1424 goto fail; /* for retry */
1426 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
1427 goto fail; /* for retry */
1431 /* default behavior: return first-matching result. */
1432 goto finish;
1433 break;
1435 case OP_EXACT1: MOP_IN(OP_EXACT1);
1436 #if 0
1437 DATA_ENSURE(1);
1438 if (*p != *s) goto fail;
1439 p++; s++;
1440 #endif
1441 if (*p != *s++) goto fail;
1442 DATA_ENSURE(0);
1443 p++;
1444 MOP_OUT;
1445 break;
1447 case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC);
1449 int len;
1450 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1452 DATA_ENSURE(1);
1453 len = ONIGENC_MBC_CASE_FOLD(encode,
1454 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1455 case_fold_flag,
1456 &s, end, lowbuf);
1457 DATA_ENSURE(0);
1458 q = lowbuf;
1459 while (len-- > 0) {
1460 if (*p != *q) {
1461 goto fail;
1463 p++; q++;
1466 MOP_OUT;
1467 break;
1469 case OP_EXACT2: MOP_IN(OP_EXACT2);
1470 DATA_ENSURE(2);
1471 if (*p != *s) goto fail;
1472 p++; s++;
1473 if (*p != *s) goto fail;
1474 sprev = s;
1475 p++; s++;
1476 MOP_OUT;
1477 continue;
1478 break;
1480 case OP_EXACT3: MOP_IN(OP_EXACT3);
1481 DATA_ENSURE(3);
1482 if (*p != *s) goto fail;
1483 p++; s++;
1484 if (*p != *s) goto fail;
1485 p++; s++;
1486 if (*p != *s) goto fail;
1487 sprev = s;
1488 p++; s++;
1489 MOP_OUT;
1490 continue;
1491 break;
1493 case OP_EXACT4: MOP_IN(OP_EXACT4);
1494 DATA_ENSURE(4);
1495 if (*p != *s) goto fail;
1496 p++; s++;
1497 if (*p != *s) goto fail;
1498 p++; s++;
1499 if (*p != *s) goto fail;
1500 p++; s++;
1501 if (*p != *s) goto fail;
1502 sprev = s;
1503 p++; s++;
1504 MOP_OUT;
1505 continue;
1506 break;
1508 case OP_EXACT5: MOP_IN(OP_EXACT5);
1509 DATA_ENSURE(5);
1510 if (*p != *s) goto fail;
1511 p++; s++;
1512 if (*p != *s) goto fail;
1513 p++; s++;
1514 if (*p != *s) goto fail;
1515 p++; s++;
1516 if (*p != *s) goto fail;
1517 p++; s++;
1518 if (*p != *s) goto fail;
1519 sprev = s;
1520 p++; s++;
1521 MOP_OUT;
1522 continue;
1523 break;
1525 case OP_EXACTN: MOP_IN(OP_EXACTN);
1526 GET_LENGTH_INC(tlen, p);
1527 DATA_ENSURE(tlen);
1528 while (tlen-- > 0) {
1529 if (*p++ != *s++) goto fail;
1531 sprev = s - 1;
1532 MOP_OUT;
1533 continue;
1534 break;
1536 case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC);
1538 int len;
1539 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1541 GET_LENGTH_INC(tlen, p);
1542 endp = p + tlen;
1544 while (p < endp) {
1545 sprev = s;
1546 DATA_ENSURE(1);
1547 len = ONIGENC_MBC_CASE_FOLD(encode,
1548 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1549 case_fold_flag,
1550 &s, end, lowbuf);
1551 DATA_ENSURE(0);
1552 q = lowbuf;
1553 while (len-- > 0) {
1554 if (*p != *q) goto fail;
1555 p++; q++;
1560 MOP_OUT;
1561 continue;
1562 break;
1564 case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1);
1565 DATA_ENSURE(2);
1566 if (*p != *s) goto fail;
1567 p++; s++;
1568 if (*p != *s) goto fail;
1569 p++; s++;
1570 MOP_OUT;
1571 break;
1573 case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2);
1574 DATA_ENSURE(4);
1575 if (*p != *s) goto fail;
1576 p++; s++;
1577 if (*p != *s) goto fail;
1578 p++; s++;
1579 sprev = s;
1580 if (*p != *s) goto fail;
1581 p++; s++;
1582 if (*p != *s) goto fail;
1583 p++; s++;
1584 MOP_OUT;
1585 continue;
1586 break;
1588 case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3);
1589 DATA_ENSURE(6);
1590 if (*p != *s) goto fail;
1591 p++; s++;
1592 if (*p != *s) goto fail;
1593 p++; s++;
1594 if (*p != *s) goto fail;
1595 p++; s++;
1596 if (*p != *s) goto fail;
1597 p++; s++;
1598 sprev = s;
1599 if (*p != *s) goto fail;
1600 p++; s++;
1601 if (*p != *s) goto fail;
1602 p++; s++;
1603 MOP_OUT;
1604 continue;
1605 break;
1607 case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N);
1608 GET_LENGTH_INC(tlen, p);
1609 DATA_ENSURE(tlen * 2);
1610 while (tlen-- > 0) {
1611 if (*p != *s) goto fail;
1612 p++; s++;
1613 if (*p != *s) goto fail;
1614 p++; s++;
1616 sprev = s - 2;
1617 MOP_OUT;
1618 continue;
1619 break;
1621 case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N);
1622 GET_LENGTH_INC(tlen, p);
1623 DATA_ENSURE(tlen * 3);
1624 while (tlen-- > 0) {
1625 if (*p != *s) goto fail;
1626 p++; s++;
1627 if (*p != *s) goto fail;
1628 p++; s++;
1629 if (*p != *s) goto fail;
1630 p++; s++;
1632 sprev = s - 3;
1633 MOP_OUT;
1634 continue;
1635 break;
1637 case OP_EXACTMBN: MOP_IN(OP_EXACTMBN);
1638 GET_LENGTH_INC(tlen, p); /* mb-len */
1639 GET_LENGTH_INC(tlen2, p); /* string len */
1640 tlen2 *= tlen;
1641 DATA_ENSURE(tlen2);
1642 while (tlen2-- > 0) {
1643 if (*p != *s) goto fail;
1644 p++; s++;
1646 sprev = s - tlen;
1647 MOP_OUT;
1648 continue;
1649 break;
1651 case OP_CCLASS: MOP_IN(OP_CCLASS);
1652 DATA_ENSURE(1);
1653 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
1654 p += SIZE_BITSET;
1655 s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */
1656 MOP_OUT;
1657 break;
1659 case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB);
1660 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail;
1662 cclass_mb:
1663 GET_LENGTH_INC(tlen, p);
1665 OnigCodePoint code;
1666 UChar *ss;
1667 int mb_len;
1669 DATA_ENSURE(1);
1670 mb_len = enclen(encode, s, end);
1671 DATA_ENSURE(mb_len);
1672 ss = s;
1673 s += mb_len;
1674 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1676 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1677 if (! onig_is_in_code_range(p, code)) goto fail;
1678 #else
1679 q = p;
1680 ALIGNMENT_RIGHT(q);
1681 if (! onig_is_in_code_range(q, code)) goto fail;
1682 #endif
1684 p += tlen;
1685 MOP_OUT;
1686 break;
1688 case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX);
1689 DATA_ENSURE(1);
1690 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
1691 p += SIZE_BITSET;
1692 goto cclass_mb;
1694 else {
1695 if (BITSET_AT(((BitSetRef )p), *s) == 0)
1696 goto fail;
1698 p += SIZE_BITSET;
1699 GET_LENGTH_INC(tlen, p);
1700 p += tlen;
1701 s++;
1703 MOP_OUT;
1704 break;
1706 case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT);
1707 DATA_ENSURE(1);
1708 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
1709 p += SIZE_BITSET;
1710 s += enclen(encode, s, end);
1711 MOP_OUT;
1712 break;
1714 case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT);
1715 DATA_ENSURE(1);
1716 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
1717 s++;
1718 GET_LENGTH_INC(tlen, p);
1719 p += tlen;
1720 goto cc_mb_not_success;
1723 cclass_mb_not:
1724 GET_LENGTH_INC(tlen, p);
1726 OnigCodePoint code;
1727 UChar *ss;
1728 int mb_len = enclen(encode, s, end);
1730 if (! DATA_ENSURE_CHECK(mb_len)) {
1731 DATA_ENSURE(1);
1732 s = (UChar* )end;
1733 p += tlen;
1734 goto cc_mb_not_success;
1737 ss = s;
1738 s += mb_len;
1739 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1741 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1742 if (onig_is_in_code_range(p, code)) goto fail;
1743 #else
1744 q = p;
1745 ALIGNMENT_RIGHT(q);
1746 if (onig_is_in_code_range(q, code)) goto fail;
1747 #endif
1749 p += tlen;
1751 cc_mb_not_success:
1752 MOP_OUT;
1753 break;
1755 case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT);
1756 DATA_ENSURE(1);
1757 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
1758 p += SIZE_BITSET;
1759 goto cclass_mb_not;
1761 else {
1762 if (BITSET_AT(((BitSetRef )p), *s) != 0)
1763 goto fail;
1765 p += SIZE_BITSET;
1766 GET_LENGTH_INC(tlen, p);
1767 p += tlen;
1768 s++;
1770 MOP_OUT;
1771 break;
1773 case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE);
1775 OnigCodePoint code;
1776 void *node;
1777 int mb_len;
1778 UChar *ss;
1780 DATA_ENSURE(1);
1781 GET_POINTER_INC(node, p);
1782 mb_len = enclen(encode, s, end);
1783 ss = s;
1784 s += mb_len;
1785 DATA_ENSURE(0);
1786 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1787 if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail;
1789 MOP_OUT;
1790 break;
1792 case OP_ANYCHAR: MOP_IN(OP_ANYCHAR);
1793 DATA_ENSURE(1);
1794 n = enclen(encode, s, end);
1795 DATA_ENSURE(n);
1796 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1797 s += n;
1798 MOP_OUT;
1799 break;
1801 case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML);
1802 DATA_ENSURE(1);
1803 n = enclen(encode, s, end);
1804 DATA_ENSURE(n);
1805 s += n;
1806 MOP_OUT;
1807 break;
1809 case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR);
1810 while (DATA_ENSURE_CHECK1) {
1811 STACK_PUSH_ALT(p, s, sprev);
1812 n = enclen(encode, s, end);
1813 DATA_ENSURE(n);
1814 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1815 sprev = s;
1816 s += n;
1818 MOP_OUT;
1819 break;
1821 case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR);
1822 while (DATA_ENSURE_CHECK1) {
1823 STACK_PUSH_ALT(p, s, sprev);
1824 n = enclen(encode, s, end);
1825 if (n > 1) {
1826 DATA_ENSURE(n);
1827 sprev = s;
1828 s += n;
1830 else {
1831 sprev = s;
1832 s++;
1835 MOP_OUT;
1836 break;
1838 case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
1839 while (DATA_ENSURE_CHECK1) {
1840 if (*p == *s) {
1841 STACK_PUSH_ALT(p + 1, s, sprev);
1843 n = enclen(encode, s, end);
1844 DATA_ENSURE(n);
1845 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1846 sprev = s;
1847 s += n;
1849 p++;
1850 MOP_OUT;
1851 break;
1853 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1854 while (DATA_ENSURE_CHECK1) {
1855 if (*p == *s) {
1856 STACK_PUSH_ALT(p + 1, s, sprev);
1858 n = enclen(encode, s, end);
1859 if (n > 1) {
1860 DATA_ENSURE(n);
1861 sprev = s;
1862 s += n;
1864 else {
1865 sprev = s;
1866 s++;
1869 p++;
1870 MOP_OUT;
1871 break;
1873 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1874 case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
1875 GET_STATE_CHECK_NUM_INC(mem, p);
1876 while (DATA_ENSURE_CHECK1) {
1877 STATE_CHECK_VAL(scv, mem);
1878 if (scv) goto fail;
1880 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
1881 n = enclen(encode, s);
1882 DATA_ENSURE(n);
1883 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1884 sprev = s;
1885 s += n;
1887 MOP_OUT;
1888 break;
1890 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
1891 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
1893 GET_STATE_CHECK_NUM_INC(mem, p);
1894 while (DATA_ENSURE_CHECK1) {
1895 STATE_CHECK_VAL(scv, mem);
1896 if (scv) goto fail;
1898 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
1899 n = enclen(encode, s);
1900 if (n > 1) {
1901 DATA_ENSURE(n);
1902 sprev = s;
1903 s += n;
1905 else {
1906 sprev = s;
1907 s++;
1910 MOP_OUT;
1911 break;
1912 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1914 case OP_WORD: MOP_IN(OP_WORD);
1915 DATA_ENSURE(1);
1916 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
1917 goto fail;
1919 s += enclen(encode, s, end);
1920 MOP_OUT;
1921 break;
1923 case OP_NOT_WORD: MOP_IN(OP_NOT_WORD);
1924 DATA_ENSURE(1);
1925 if (ONIGENC_IS_MBC_WORD(encode, s, end))
1926 goto fail;
1928 s += enclen(encode, s, end);
1929 MOP_OUT;
1930 break;
1932 case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND);
1933 if (ON_STR_BEGIN(s)) {
1934 DATA_ENSURE(1);
1935 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
1936 goto fail;
1938 else if (ON_STR_END(s)) {
1939 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
1940 goto fail;
1942 else {
1943 if (ONIGENC_IS_MBC_WORD(encode, s, end)
1944 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
1945 goto fail;
1947 MOP_OUT;
1948 continue;
1949 break;
1951 case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND);
1952 if (ON_STR_BEGIN(s)) {
1953 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
1954 goto fail;
1956 else if (ON_STR_END(s)) {
1957 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
1958 goto fail;
1960 else {
1961 if (ONIGENC_IS_MBC_WORD(encode, s, end)
1962 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
1963 goto fail;
1965 MOP_OUT;
1966 continue;
1967 break;
1969 #ifdef USE_WORD_BEGIN_END
1970 case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN);
1971 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
1972 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
1973 MOP_OUT;
1974 continue;
1977 goto fail;
1978 break;
1980 case OP_WORD_END: MOP_IN(OP_WORD_END);
1981 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
1982 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
1983 MOP_OUT;
1984 continue;
1987 goto fail;
1988 break;
1989 #endif
1991 case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF);
1992 if (! ON_STR_BEGIN(s)) goto fail;
1994 MOP_OUT;
1995 continue;
1996 break;
1998 case OP_END_BUF: MOP_IN(OP_END_BUF);
1999 if (! ON_STR_END(s)) goto fail;
2001 MOP_OUT;
2002 continue;
2003 break;
2005 case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE);
2006 if (ON_STR_BEGIN(s)) {
2007 if (IS_NOTBOL(msa->options)) goto fail;
2008 MOP_OUT;
2009 continue;
2011 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
2012 MOP_OUT;
2013 continue;
2015 goto fail;
2016 break;
2018 case OP_END_LINE: MOP_IN(OP_END_LINE);
2019 if (ON_STR_END(s)) {
2020 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2021 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
2022 #endif
2023 if (IS_NOTEOL(msa->options)) goto fail;
2024 MOP_OUT;
2025 continue;
2026 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2028 #endif
2030 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
2031 MOP_OUT;
2032 continue;
2034 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2035 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2036 MOP_OUT;
2037 continue;
2039 #endif
2040 goto fail;
2041 break;
2043 case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF);
2044 if (ON_STR_END(s)) {
2045 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2046 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
2047 #endif
2048 if (IS_NOTEOL(msa->options)) goto fail;
2049 MOP_OUT;
2050 continue;
2051 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2053 #endif
2055 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
2056 ON_STR_END(s + enclen(encode, s, end))) {
2057 MOP_OUT;
2058 continue;
2060 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2061 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2062 UChar* ss = s + enclen(encode, s);
2063 ss += enclen(encode, ss);
2064 if (ON_STR_END(ss)) {
2065 MOP_OUT;
2066 continue;
2069 #endif
2070 goto fail;
2071 break;
2073 case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION);
2074 if (s != msa->start)
2075 goto fail;
2077 MOP_OUT;
2078 continue;
2079 break;
2081 case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH);
2082 GET_MEMNUM_INC(mem, p);
2083 STACK_PUSH_MEM_START(mem, s);
2084 MOP_OUT;
2085 continue;
2086 break;
2088 case OP_MEMORY_START: MOP_IN(OP_MEMORY_START);
2089 GET_MEMNUM_INC(mem, p);
2090 mem_start_stk[mem] = (OnigStackIndex )((void* )s);
2091 MOP_OUT;
2092 continue;
2093 break;
2095 case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH);
2096 GET_MEMNUM_INC(mem, p);
2097 STACK_PUSH_MEM_END(mem, s);
2098 MOP_OUT;
2099 continue;
2100 break;
2102 case OP_MEMORY_END: MOP_IN(OP_MEMORY_END);
2103 GET_MEMNUM_INC(mem, p);
2104 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2105 MOP_OUT;
2106 continue;
2107 break;
2109 #ifdef USE_SUBEXP_CALL
2110 case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC);
2111 GET_MEMNUM_INC(mem, p);
2112 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
2113 STACK_PUSH_MEM_END(mem, s);
2114 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2115 MOP_OUT;
2116 continue;
2117 break;
2119 case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC);
2120 GET_MEMNUM_INC(mem, p);
2121 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2122 STACK_GET_MEM_START(mem, stkp);
2124 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2125 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2126 else
2127 mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
2129 STACK_PUSH_MEM_END_MARK(mem);
2130 MOP_OUT;
2131 continue;
2132 break;
2133 #endif
2135 case OP_BACKREF1: MOP_IN(OP_BACKREF1);
2136 mem = 1;
2137 goto backref;
2138 break;
2140 case OP_BACKREF2: MOP_IN(OP_BACKREF2);
2141 mem = 2;
2142 goto backref;
2143 break;
2145 case OP_BACKREFN: MOP_IN(OP_BACKREFN);
2146 GET_MEMNUM_INC(mem, p);
2147 backref:
2149 int len;
2150 UChar *pstart, *pend;
2152 /* if you want to remove following line,
2153 you should check in parse and compile time. */
2154 if (mem > num_mem) goto fail;
2155 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2156 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2158 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2159 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2160 else
2161 pstart = (UChar* )((void* )mem_start_stk[mem]);
2163 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2164 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2165 : (UChar* )((void* )mem_end_stk[mem]));
2166 n = pend - pstart;
2167 DATA_ENSURE(n);
2168 sprev = s;
2169 STRING_CMP(pstart, s, n);
2170 while (sprev + (len = enclen(encode, sprev, end)) < s)
2171 sprev += len;
2173 MOP_OUT;
2174 continue;
2176 break;
2178 case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC);
2179 GET_MEMNUM_INC(mem, p);
2181 int len;
2182 UChar *pstart, *pend;
2184 /* if you want to remove following line,
2185 you should check in parse and compile time. */
2186 if (mem > num_mem) goto fail;
2187 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2188 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2190 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2191 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2192 else
2193 pstart = (UChar* )((void* )mem_start_stk[mem]);
2195 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2196 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2197 : (UChar* )((void* )mem_end_stk[mem]));
2198 n = pend - pstart;
2199 DATA_ENSURE(n);
2200 sprev = s;
2201 STRING_CMP_IC(case_fold_flag, pstart, &s, n);
2202 while (sprev + (len = enclen(encode, sprev, end)) < s)
2203 sprev += len;
2205 MOP_OUT;
2206 continue;
2208 break;
2210 case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI);
2212 int len, is_fail;
2213 UChar *pstart, *pend, *swork;
2215 GET_LENGTH_INC(tlen, p);
2216 for (i = 0; i < tlen; i++) {
2217 GET_MEMNUM_INC(mem, p);
2219 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2220 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2222 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2223 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2224 else
2225 pstart = (UChar* )((void* )mem_start_stk[mem]);
2227 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2228 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2229 : (UChar* )((void* )mem_end_stk[mem]));
2230 n = pend - pstart;
2231 DATA_ENSURE(n);
2232 sprev = s;
2233 swork = s;
2234 STRING_CMP_VALUE(pstart, swork, n, is_fail);
2235 if (is_fail) continue;
2236 s = swork;
2237 while (sprev + (len = enclen(encode, sprev, end)) < s)
2238 sprev += len;
2240 p += (SIZE_MEMNUM * (tlen - i - 1));
2241 break; /* success */
2243 if (i == tlen) goto fail;
2244 MOP_OUT;
2245 continue;
2247 break;
2249 case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC);
2251 int len, is_fail;
2252 UChar *pstart, *pend, *swork;
2254 GET_LENGTH_INC(tlen, p);
2255 for (i = 0; i < tlen; i++) {
2256 GET_MEMNUM_INC(mem, p);
2258 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2259 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2261 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2262 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2263 else
2264 pstart = (UChar* )((void* )mem_start_stk[mem]);
2266 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2267 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2268 : (UChar* )((void* )mem_end_stk[mem]));
2269 n = pend - pstart;
2270 DATA_ENSURE(n);
2271 sprev = s;
2272 swork = s;
2273 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
2274 if (is_fail) continue;
2275 s = swork;
2276 while (sprev + (len = enclen(encode, sprev, end)) < s)
2277 sprev += len;
2279 p += (SIZE_MEMNUM * (tlen - i - 1));
2280 break; /* success */
2282 if (i == tlen) goto fail;
2283 MOP_OUT;
2284 continue;
2286 break;
2288 #ifdef USE_BACKREF_WITH_LEVEL
2289 case OP_BACKREF_WITH_LEVEL:
2291 int len;
2292 OnigOptionType ic;
2293 LengthType level;
2295 GET_OPTION_INC(ic, p);
2296 GET_LENGTH_INC(level, p);
2297 GET_LENGTH_INC(tlen, p);
2299 sprev = s;
2300 if (backref_match_at_nested_level(reg, stk, stk_base, ic
2301 , case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
2302 while (sprev + (len = enclen(encode, sprev, end)) < s)
2303 sprev += len;
2305 p += (SIZE_MEMNUM * tlen);
2307 else
2308 goto fail;
2310 MOP_OUT;
2311 continue;
2314 break;
2315 #endif
2317 #if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
2318 case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH);
2319 GET_OPTION_INC(option, p);
2320 STACK_PUSH_ALT(p, s, sprev);
2321 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
2322 MOP_OUT;
2323 continue;
2324 break;
2326 case OP_SET_OPTION: MOP_IN(OP_SET_OPTION);
2327 GET_OPTION_INC(option, p);
2328 MOP_OUT;
2329 continue;
2330 break;
2331 #endif
2333 case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START);
2334 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2335 STACK_PUSH_NULL_CHECK_START(mem, s);
2336 MOP_OUT;
2337 continue;
2338 break;
2340 case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END);
2342 int isnull;
2344 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2345 STACK_NULL_CHECK(isnull, mem, s);
2346 if (isnull) {
2347 #ifdef ONIG_DEBUG_MATCH
2348 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n",
2349 (int )mem, (int )s);
2350 #endif
2351 null_check_found:
2352 /* empty loop founded, skip next instruction */
2353 switch (*p++) {
2354 case OP_JUMP:
2355 case OP_PUSH:
2356 p += SIZE_RELADDR;
2357 break;
2358 case OP_REPEAT_INC:
2359 case OP_REPEAT_INC_NG:
2360 case OP_REPEAT_INC_SG:
2361 case OP_REPEAT_INC_NG_SG:
2362 p += SIZE_MEMNUM;
2363 break;
2364 default:
2365 goto unexpected_bytecode_error;
2366 break;
2370 MOP_OUT;
2371 continue;
2372 break;
2374 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2375 case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST);
2377 int isnull;
2379 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2380 STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
2381 if (isnull) {
2382 #ifdef ONIG_DEBUG_MATCH
2383 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n",
2384 (int )mem, (int )s);
2385 #endif
2386 if (isnull == -1) goto fail;
2387 goto null_check_found;
2390 MOP_OUT;
2391 continue;
2392 break;
2393 #endif
2395 #ifdef USE_SUBEXP_CALL
2396 case OP_NULL_CHECK_END_MEMST_PUSH:
2397 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
2399 int isnull;
2401 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2402 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2403 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
2404 #else
2405 STACK_NULL_CHECK_REC(isnull, mem, s);
2406 #endif
2407 if (isnull) {
2408 #ifdef ONIG_DEBUG_MATCH
2409 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n",
2410 (int )mem, (int )s);
2411 #endif
2412 if (isnull == -1) goto fail;
2413 goto null_check_found;
2415 else {
2416 STACK_PUSH_NULL_CHECK_END(mem);
2419 MOP_OUT;
2420 continue;
2421 break;
2422 #endif
2424 case OP_JUMP: MOP_IN(OP_JUMP);
2425 GET_RELADDR_INC(addr, p);
2426 p += addr;
2427 MOP_OUT;
2428 CHECK_INTERRUPT_IN_MATCH_AT;
2429 continue;
2430 break;
2432 case OP_PUSH: MOP_IN(OP_PUSH);
2433 GET_RELADDR_INC(addr, p);
2434 STACK_PUSH_ALT(p + addr, s, sprev);
2435 MOP_OUT;
2436 continue;
2437 break;
2439 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2440 case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH);
2441 GET_STATE_CHECK_NUM_INC(mem, p);
2442 STATE_CHECK_VAL(scv, mem);
2443 if (scv) goto fail;
2445 GET_RELADDR_INC(addr, p);
2446 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
2447 MOP_OUT;
2448 continue;
2449 break;
2451 case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
2452 GET_STATE_CHECK_NUM_INC(mem, p);
2453 GET_RELADDR_INC(addr, p);
2454 STATE_CHECK_VAL(scv, mem);
2455 if (scv) {
2456 p += addr;
2458 else {
2459 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
2461 MOP_OUT;
2462 continue;
2463 break;
2465 case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK);
2466 GET_STATE_CHECK_NUM_INC(mem, p);
2467 STATE_CHECK_VAL(scv, mem);
2468 if (scv) goto fail;
2470 STACK_PUSH_STATE_CHECK(s, mem);
2471 MOP_OUT;
2472 continue;
2473 break;
2474 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
2476 case OP_POP: MOP_IN(OP_POP);
2477 STACK_POP_ONE;
2478 MOP_OUT;
2479 continue;
2480 break;
2482 case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
2483 GET_RELADDR_INC(addr, p);
2484 if (*p == *s && DATA_ENSURE_CHECK1) {
2485 p++;
2486 STACK_PUSH_ALT(p + addr, s, sprev);
2487 MOP_OUT;
2488 continue;
2490 p += (addr + 1);
2491 MOP_OUT;
2492 continue;
2493 break;
2495 case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT);
2496 GET_RELADDR_INC(addr, p);
2497 if (*p == *s) {
2498 p++;
2499 STACK_PUSH_ALT(p + addr, s, sprev);
2500 MOP_OUT;
2501 continue;
2503 p++;
2504 MOP_OUT;
2505 continue;
2506 break;
2508 case OP_REPEAT: MOP_IN(OP_REPEAT);
2510 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2511 GET_RELADDR_INC(addr, p);
2513 STACK_ENSURE(1);
2514 repeat_stk[mem] = GET_STACK_INDEX(stk);
2515 STACK_PUSH_REPEAT(mem, p);
2517 if (reg->repeat_range[mem].lower == 0) {
2518 STACK_PUSH_ALT(p + addr, s, sprev);
2521 MOP_OUT;
2522 continue;
2523 break;
2525 case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG);
2527 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2528 GET_RELADDR_INC(addr, p);
2530 STACK_ENSURE(1);
2531 repeat_stk[mem] = GET_STACK_INDEX(stk);
2532 STACK_PUSH_REPEAT(mem, p);
2534 if (reg->repeat_range[mem].lower == 0) {
2535 STACK_PUSH_ALT(p, s, sprev);
2536 p += addr;
2539 MOP_OUT;
2540 continue;
2541 break;
2543 case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC);
2544 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2545 si = repeat_stk[mem];
2546 stkp = STACK_AT(si);
2548 repeat_inc:
2549 stkp->u.repeat.count++;
2550 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
2551 /* end of repeat. Nothing to do. */
2553 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2554 STACK_PUSH_ALT(p, s, sprev);
2555 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
2557 else {
2558 p = stkp->u.repeat.pcode;
2560 STACK_PUSH_REPEAT_INC(si);
2561 MOP_OUT;
2562 CHECK_INTERRUPT_IN_MATCH_AT;
2563 continue;
2564 break;
2566 case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG);
2567 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2568 STACK_GET_REPEAT(mem, stkp);
2569 si = GET_STACK_INDEX(stkp);
2570 goto repeat_inc;
2571 break;
2573 case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG);
2574 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2575 si = repeat_stk[mem];
2576 stkp = STACK_AT(si);
2578 repeat_inc_ng:
2579 stkp->u.repeat.count++;
2580 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
2581 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2582 UChar* pcode = stkp->u.repeat.pcode;
2584 STACK_PUSH_REPEAT_INC(si);
2585 STACK_PUSH_ALT(pcode, s, sprev);
2587 else {
2588 p = stkp->u.repeat.pcode;
2589 STACK_PUSH_REPEAT_INC(si);
2592 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
2593 STACK_PUSH_REPEAT_INC(si);
2595 MOP_OUT;
2596 CHECK_INTERRUPT_IN_MATCH_AT;
2597 continue;
2598 break;
2600 case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG);
2601 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2602 STACK_GET_REPEAT(mem, stkp);
2603 si = GET_STACK_INDEX(stkp);
2604 goto repeat_inc_ng;
2605 break;
2607 case OP_PUSH_POS: MOP_IN(OP_PUSH_POS);
2608 STACK_PUSH_POS(s, sprev);
2609 MOP_OUT;
2610 continue;
2611 break;
2613 case OP_POP_POS: MOP_IN(OP_POP_POS);
2615 STACK_POS_END(stkp);
2616 s = stkp->u.state.pstr;
2617 sprev = stkp->u.state.pstr_prev;
2619 MOP_OUT;
2620 continue;
2621 break;
2623 case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT);
2624 GET_RELADDR_INC(addr, p);
2625 STACK_PUSH_POS_NOT(p + addr, s, sprev);
2626 MOP_OUT;
2627 continue;
2628 break;
2630 case OP_FAIL_POS: MOP_IN(OP_FAIL_POS);
2631 STACK_POP_TIL_POS_NOT;
2632 goto fail;
2633 break;
2635 case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT);
2636 STACK_PUSH_STOP_BT;
2637 MOP_OUT;
2638 continue;
2639 break;
2641 case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT);
2642 STACK_STOP_BT_END;
2643 MOP_OUT;
2644 continue;
2645 break;
2647 case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND);
2648 GET_LENGTH_INC(tlen, p);
2649 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
2650 if (IS_NULL(s)) goto fail;
2651 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
2652 MOP_OUT;
2653 continue;
2654 break;
2656 case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
2657 GET_RELADDR_INC(addr, p);
2658 GET_LENGTH_INC(tlen, p);
2659 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
2660 if (IS_NULL(q)) {
2661 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
2662 If you want to change to fail, replace following line. */
2663 p += addr;
2664 /* goto fail; */
2666 else {
2667 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev);
2668 s = q;
2669 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
2671 MOP_OUT;
2672 continue;
2673 break;
2675 case OP_FAIL_LOOK_BEHIND_NOT: MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
2676 STACK_POP_TIL_LOOK_BEHIND_NOT;
2677 goto fail;
2678 break;
2680 #ifdef USE_SUBEXP_CALL
2681 case OP_CALL: MOP_IN(OP_CALL);
2682 GET_ABSADDR_INC(addr, p);
2683 STACK_PUSH_CALL_FRAME(p);
2684 p = reg->p + addr;
2685 MOP_OUT;
2686 continue;
2687 break;
2689 case OP_RETURN: MOP_IN(OP_RETURN);
2690 STACK_RETURN(p);
2691 STACK_PUSH_RETURN;
2692 MOP_OUT;
2693 continue;
2694 break;
2695 #endif
2697 case OP_FINISH:
2698 goto finish;
2699 break;
2701 fail:
2702 MOP_OUT;
2703 /* fall */
2704 case OP_FAIL: MOP_IN(OP_FAIL);
2705 STACK_POP;
2706 p = stk->u.state.pcode;
2707 s = stk->u.state.pstr;
2708 sprev = stk->u.state.pstr_prev;
2710 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2711 if (stk->u.state.state_check != 0) {
2712 stk->type = STK_STATE_CHECK_MARK;
2713 stk++;
2715 #endif
2717 MOP_OUT;
2718 continue;
2719 break;
2721 default:
2722 goto bytecode_error;
2724 } /* end of switch */
2725 sprev = sbegin;
2726 } /* end of while(1) */
2728 finish:
2729 STACK_SAVE;
2730 return best_len;
2732 #ifdef ONIG_DEBUG
2733 stack_error:
2734 STACK_SAVE;
2735 return ONIGERR_STACK_BUG;
2736 #endif
2738 bytecode_error:
2739 STACK_SAVE;
2740 return ONIGERR_UNDEFINED_BYTECODE;
2742 unexpected_bytecode_error:
2743 STACK_SAVE;
2744 return ONIGERR_UNEXPECTED_BYTECODE;
2748 static UChar*
2749 slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
2750 const UChar* text, const UChar* text_end, UChar* text_range)
2752 UChar *t, *p, *s, *end;
2754 end = (UChar* )text_end;
2755 end -= target_end - target - 1;
2756 if (end > text_range)
2757 end = text_range;
2759 s = (UChar* )text;
2761 if (enc->max_enc_len == enc->min_enc_len) {
2762 int n = enc->max_enc_len;
2764 while (s < end) {
2765 if (*s == *target) {
2766 p = s + 1;
2767 t = target + 1;
2768 if (target_end == t || memcmp(t, p, target_end - t) == 0)
2769 return s;
2771 s += n;
2773 return (UChar*)NULL;
2775 while (s < end) {
2776 if (*s == *target) {
2777 p = s + 1;
2778 t = target + 1;
2779 if (target_end == t || memcmp(t, p, target_end - t) == 0)
2780 return s;
2782 s += enclen(enc, s, end);
2785 return (UChar* )NULL;
2788 static int
2789 str_lower_case_match(OnigEncoding enc, int case_fold_flag,
2790 const UChar* t, const UChar* tend,
2791 const UChar* p, const UChar* end)
2793 int lowlen;
2794 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2796 while (t < tend) {
2797 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
2798 q = lowbuf;
2799 while (lowlen > 0) {
2800 if (*t++ != *q++) return 0;
2801 lowlen--;
2805 return 1;
2808 static UChar*
2809 slow_search_ic(OnigEncoding enc, int case_fold_flag,
2810 UChar* target, UChar* target_end,
2811 const UChar* text, const UChar* text_end, UChar* text_range)
2813 UChar *s, *end;
2815 end = (UChar* )text_end;
2816 end -= target_end - target - 1;
2817 if (end > text_range)
2818 end = text_range;
2820 s = (UChar* )text;
2822 while (s < end) {
2823 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
2824 s, text_end))
2825 return s;
2827 s += enclen(enc, s, text_end);
2830 return (UChar* )NULL;
2833 static UChar*
2834 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
2835 const UChar* text, const UChar* adjust_text,
2836 const UChar* text_end, const UChar* text_start)
2838 UChar *t, *p, *s;
2840 s = (UChar* )text_end;
2841 s -= (target_end - target);
2842 if (s > text_start)
2843 s = (UChar* )text_start;
2844 else
2845 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
2847 while (s >= text) {
2848 if (*s == *target) {
2849 p = s + 1;
2850 t = target + 1;
2851 while (t < target_end) {
2852 if (*t != *p++)
2853 break;
2854 t++;
2856 if (t == target_end)
2857 return s;
2859 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
2862 return (UChar* )NULL;
2865 static UChar*
2866 slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
2867 UChar* target, UChar* target_end,
2868 const UChar* text, const UChar* adjust_text,
2869 const UChar* text_end, const UChar* text_start)
2871 UChar *s;
2873 s = (UChar* )text_end;
2874 s -= (target_end - target);
2875 if (s > text_start)
2876 s = (UChar* )text_start;
2877 else
2878 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
2880 while (s >= text) {
2881 if (str_lower_case_match(enc, case_fold_flag,
2882 target, target_end, s, text_end))
2883 return s;
2885 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
2888 return (UChar* )NULL;
2891 static UChar*
2892 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
2893 const UChar* text, const UChar* text_end,
2894 const UChar* text_range)
2896 const UChar *s, *se, *t, *p, *end;
2897 const UChar *tail;
2898 int skip, tlen1;
2900 #ifdef ONIG_DEBUG_SEARCH
2901 fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
2902 (int )text, (int )text_end, (int )text_range);
2903 #endif
2905 tail = target_end - 1;
2906 tlen1 = tail - target;
2907 end = text_range;
2908 if (end + tlen1 > text_end)
2909 end = text_end - tlen1;
2911 s = text;
2913 if (IS_NULL(reg->int_map)) {
2914 while (s < end) {
2915 p = se = s + tlen1;
2916 t = tail;
2917 while (t >= target && *p == *t) {
2918 p--; t--;
2920 if (t < target) return (UChar* )s;
2922 skip = reg->map[*se];
2923 t = s;
2924 do {
2925 s += enclen(reg->enc, s, end);
2926 } while ((s - t) < skip && s < end);
2929 else {
2930 while (s < end) {
2931 p = se = s + tlen1;
2932 t = tail;
2933 while (t >= target && *p == *t) {
2934 p--; t--;
2936 if (t < target) return (UChar* )s;
2938 skip = reg->int_map[*se];
2939 t = s;
2940 do {
2941 s += enclen(reg->enc, s, end);
2942 } while ((s - t) < skip && s < end);
2946 return (UChar* )NULL;
2949 static UChar*
2950 bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
2951 const UChar* text, const UChar* text_end, const UChar* text_range)
2953 const UChar *s, *t, *p, *end;
2954 const UChar *tail;
2956 end = text_range + (target_end - target) - 1;
2957 if (end > text_end)
2958 end = text_end;
2960 tail = target_end - 1;
2961 s = text + (target_end - target) - 1;
2962 if (IS_NULL(reg->int_map)) {
2963 while (s < end) {
2964 p = s;
2965 t = tail;
2966 while (t >= target && *p == *t) {
2967 p--; t--;
2969 if (t < target) return (UChar* )(p + 1);
2970 s += reg->map[*s];
2973 else { /* see int_map[] */
2974 while (s < end) {
2975 p = s;
2976 t = tail;
2977 while (t >= target && *p == *t) {
2978 p--; t--;
2980 if (t < target) return (UChar* )(p + 1);
2981 s += reg->int_map[*s];
2984 return (UChar* )NULL;
2987 static int
2988 set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
2989 int** skip)
2992 int i, len;
2994 if (IS_NULL(*skip)) {
2995 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
2996 if (IS_NULL(*skip)) return ONIGERR_MEMORY;
2999 len = end - s;
3000 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
3001 (*skip)[i] = len;
3003 for (i = len - 1; i > 0; i--)
3004 (*skip)[s[i]] = i;
3006 return 0;
3009 static UChar*
3010 bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
3011 const UChar* text, const UChar* adjust_text,
3012 const UChar* text_end, const UChar* text_start)
3014 const UChar *s, *t, *p;
3016 s = text_end - (target_end - target);
3017 if (text_start < s)
3018 s = text_start;
3019 else
3020 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
3022 while (s >= text) {
3023 p = s;
3024 t = target;
3025 while (t < target_end && *p == *t) {
3026 p++; t++;
3028 if (t == target_end)
3029 return (UChar* )s;
3031 s -= reg->int_map_backward[*s];
3032 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
3035 return (UChar* )NULL;
3038 static UChar*
3039 map_search(OnigEncoding enc, UChar map[],
3040 const UChar* text, const UChar* text_range)
3042 const UChar *s = text;
3044 while (s < text_range) {
3045 if (map[*s]) return (UChar* )s;
3047 s += enclen(enc, s, text_range);
3049 return (UChar* )NULL;
3052 static UChar*
3053 map_search_backward(OnigEncoding enc, UChar map[],
3054 const UChar* text, const UChar* adjust_text,
3055 const UChar* text_start)
3057 const UChar *s = text_start;
3059 while (s >= text) {
3060 if (map[*s]) return (UChar* )s;
3062 s = onigenc_get_prev_char_head(enc, adjust_text, s);
3064 return (UChar* )NULL;
3067 extern int
3068 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
3069 OnigOptionType option)
3071 int r;
3072 UChar *prev;
3073 OnigMatchArg msa;
3075 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
3076 start:
3077 THREAD_ATOMIC_START;
3078 if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
3079 ONIG_STATE_INC(reg);
3080 if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
3081 onig_chain_reduce(reg);
3082 ONIG_STATE_INC(reg);
3085 else {
3086 int n;
3088 THREAD_ATOMIC_END;
3089 n = 0;
3090 while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
3091 if (++n > THREAD_PASS_LIMIT_COUNT)
3092 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
3093 THREAD_PASS;
3095 goto start;
3097 THREAD_ATOMIC_END;
3098 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
3100 MATCH_ARG_INIT(msa, option, region, at);
3101 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3103 int offset = at - str;
3104 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3106 #endif
3108 if (region
3109 #ifdef USE_POSIX_API_REGION_OPTION
3110 && !IS_POSIX_REGION(option)
3111 #endif
3113 r = onig_region_resize_clear(region, reg->num_mem + 1);
3115 else
3116 r = 0;
3118 if (r == 0) {
3119 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
3120 r = match_at(reg, str, end,
3121 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3122 end,
3123 #endif
3124 at, prev, &msa);
3127 MATCH_ARG_FREE(msa);
3128 ONIG_STATE_DEC_THREAD(reg);
3129 return r;
3132 static int
3133 forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
3134 UChar* range, UChar** low, UChar** high, UChar** low_prev)
3136 UChar *p, *pprev = (UChar* )NULL;
3138 #ifdef ONIG_DEBUG_SEARCH
3139 fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n",
3140 (int )str, (int )end, (int )s, (int )range);
3141 #endif
3143 p = s;
3144 if (reg->dmin > 0) {
3145 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
3146 p += reg->dmin;
3148 else {
3149 UChar *q = p + reg->dmin;
3150 while (p < q) p += enclen(reg->enc, p, end);
3154 retry:
3155 switch (reg->optimize) {
3156 case ONIG_OPTIMIZE_EXACT:
3157 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
3158 break;
3159 case ONIG_OPTIMIZE_EXACT_IC:
3160 p = slow_search_ic(reg->enc, reg->case_fold_flag,
3161 reg->exact, reg->exact_end, p, end, range);
3162 break;
3164 case ONIG_OPTIMIZE_EXACT_BM:
3165 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
3166 break;
3168 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3169 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
3170 break;
3172 case ONIG_OPTIMIZE_MAP:
3173 p = map_search(reg->enc, reg->map, p, range);
3174 break;
3177 if (p && p < range) {
3178 if (p - reg->dmin < s) {
3179 retry_gate:
3180 pprev = p;
3181 p += enclen(reg->enc, p, end);
3182 goto retry;
3185 if (reg->sub_anchor) {
3186 UChar* prev;
3188 switch (reg->sub_anchor) {
3189 case ANCHOR_BEGIN_LINE:
3190 if (!ON_STR_BEGIN(p)) {
3191 prev = onigenc_get_prev_char_head(reg->enc,
3192 (pprev ? pprev : str), p);
3193 if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
3194 goto retry_gate;
3196 break;
3198 case ANCHOR_END_LINE:
3199 if (ON_STR_END(p)) {
3200 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3201 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
3202 (pprev ? pprev : str), p);
3203 if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
3204 goto retry_gate;
3205 #endif
3207 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
3208 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3209 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
3210 #endif
3212 goto retry_gate;
3213 break;
3217 if (reg->dmax == 0) {
3218 *low = p;
3219 if (low_prev) {
3220 if (*low > s)
3221 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p);
3222 else
3223 *low_prev = onigenc_get_prev_char_head(reg->enc,
3224 (pprev ? pprev : str), p);
3227 else {
3228 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3229 *low = p - reg->dmax;
3230 if (*low > s) {
3231 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
3232 *low, (const UChar** )low_prev);
3233 if (low_prev && IS_NULL(*low_prev))
3234 *low_prev = onigenc_get_prev_char_head(reg->enc,
3235 (pprev ? pprev : s), *low);
3237 else {
3238 if (low_prev)
3239 *low_prev = onigenc_get_prev_char_head(reg->enc,
3240 (pprev ? pprev : str), *low);
3244 /* no needs to adjust *high, *high is used as range check only */
3245 *high = p - reg->dmin;
3247 #ifdef ONIG_DEBUG_SEARCH
3248 fprintf(stderr,
3249 "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
3250 (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
3251 #endif
3252 return 1; /* success */
3255 return 0; /* fail */
3258 static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc,
3259 int** skip));
3261 #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
3263 static int
3264 backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
3265 UChar* s, const UChar* range, UChar* adjrange,
3266 UChar** low, UChar** high)
3268 int r;
3269 UChar *p;
3271 range += reg->dmin;
3272 p = s;
3274 retry:
3275 switch (reg->optimize) {
3276 case ONIG_OPTIMIZE_EXACT:
3277 exact_method:
3278 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
3279 range, adjrange, end, p);
3280 break;
3282 case ONIG_OPTIMIZE_EXACT_IC:
3283 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
3284 reg->exact, reg->exact_end,
3285 range, adjrange, end, p);
3286 break;
3288 case ONIG_OPTIMIZE_EXACT_BM:
3289 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3290 if (IS_NULL(reg->int_map_backward)) {
3291 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
3292 goto exact_method;
3294 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
3295 &(reg->int_map_backward));
3296 if (r) return r;
3298 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
3299 end, p);
3300 break;
3302 case ONIG_OPTIMIZE_MAP:
3303 p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
3304 break;
3307 if (p) {
3308 if (reg->sub_anchor) {
3309 UChar* prev;
3311 switch (reg->sub_anchor) {
3312 case ANCHOR_BEGIN_LINE:
3313 if (!ON_STR_BEGIN(p)) {
3314 prev = onigenc_get_prev_char_head(reg->enc, str, p);
3315 if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
3316 p = prev;
3317 goto retry;
3320 break;
3322 case ANCHOR_END_LINE:
3323 if (ON_STR_END(p)) {
3324 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3325 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
3326 if (IS_NULL(prev)) goto fail;
3327 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
3328 p = prev;
3329 goto retry;
3331 #endif
3333 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
3334 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3335 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
3336 #endif
3338 p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
3339 if (IS_NULL(p)) goto fail;
3340 goto retry;
3342 break;
3346 /* no needs to adjust *high, *high is used as range check only */
3347 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3348 *low = p - reg->dmax;
3349 *high = p - reg->dmin;
3350 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
3353 #ifdef ONIG_DEBUG_SEARCH
3354 fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
3355 (int )(*low - str), (int )(*high - str));
3356 #endif
3357 return 1; /* success */
3360 fail:
3361 #ifdef ONIG_DEBUG_SEARCH
3362 fprintf(stderr, "backward_search_range: fail.\n");
3363 #endif
3364 return 0; /* fail */
3368 extern int
3369 onig_search(regex_t* reg, const UChar* str, const UChar* end,
3370 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
3372 int r;
3373 UChar *s, *prev;
3374 OnigMatchArg msa;
3375 const UChar *orig_start = start;
3376 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3377 const UChar *orig_range = range;
3378 #endif
3380 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
3381 start:
3382 THREAD_ATOMIC_START;
3383 if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
3384 ONIG_STATE_INC(reg);
3385 if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
3386 onig_chain_reduce(reg);
3387 ONIG_STATE_INC(reg);
3390 else {
3391 int n;
3393 THREAD_ATOMIC_END;
3394 n = 0;
3395 while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
3396 if (++n > THREAD_PASS_LIMIT_COUNT)
3397 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
3398 THREAD_PASS;
3400 goto start;
3402 THREAD_ATOMIC_END;
3403 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
3405 #ifdef ONIG_DEBUG_SEARCH
3406 fprintf(stderr,
3407 "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
3408 (int )str, (int )(end - str), (int )(start - str), (int )(range - str));
3409 #endif
3411 if (region
3412 #ifdef USE_POSIX_API_REGION_OPTION
3413 && !IS_POSIX_REGION(option)
3414 #endif
3416 r = onig_region_resize_clear(region, reg->num_mem + 1);
3417 if (r) goto finish_no_msa;
3420 if (start > end || start < str) goto mismatch_no_msa;
3423 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3424 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3425 #define MATCH_AND_RETURN_CHECK(upper_range) \
3426 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
3427 if (r != ONIG_MISMATCH) {\
3428 if (r >= 0) {\
3429 if (! IS_FIND_LONGEST(reg->options)) {\
3430 goto match;\
3433 else goto finish; /* error */ \
3435 #else
3436 #define MATCH_AND_RETURN_CHECK(upper_range) \
3437 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
3438 if (r != ONIG_MISMATCH) {\
3439 if (r >= 0) {\
3440 goto match;\
3442 else goto finish; /* error */ \
3444 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
3445 #else
3446 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3447 #define MATCH_AND_RETURN_CHECK(none) \
3448 r = match_at(reg, str, end, s, prev, &msa);\
3449 if (r != ONIG_MISMATCH) {\
3450 if (r >= 0) {\
3451 if (! IS_FIND_LONGEST(reg->options)) {\
3452 goto match;\
3455 else goto finish; /* error */ \
3457 #else
3458 #define MATCH_AND_RETURN_CHECK(none) \
3459 r = match_at(reg, str, end, s, prev, &msa);\
3460 if (r != ONIG_MISMATCH) {\
3461 if (r >= 0) {\
3462 goto match;\
3464 else goto finish; /* error */ \
3466 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
3467 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
3470 /* anchor optimize: resume search range */
3471 if (reg->anchor != 0 && str < end) {
3472 UChar *min_semi_end, *max_semi_end;
3474 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
3475 /* search start-position only */
3476 begin_position:
3477 if (range > start)
3478 range = start + 1;
3479 else
3480 range = start;
3482 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
3483 /* search str-position only */
3484 if (range > start) {
3485 if (start != str) goto mismatch_no_msa;
3486 range = str + 1;
3488 else {
3489 if (range <= str) {
3490 start = str;
3491 range = str;
3493 else
3494 goto mismatch_no_msa;
3497 else if (reg->anchor & ANCHOR_END_BUF) {
3498 min_semi_end = max_semi_end = (UChar* )end;
3500 end_buf:
3501 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
3502 goto mismatch_no_msa;
3504 if (range > start) {
3505 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
3506 start = min_semi_end - reg->anchor_dmax;
3507 if (start < end)
3508 start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
3509 else { /* match with empty at end */
3510 start = onigenc_get_prev_char_head(reg->enc, str, end);
3513 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
3514 range = max_semi_end - reg->anchor_dmin + 1;
3517 if (start >= range) goto mismatch_no_msa;
3519 else {
3520 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
3521 range = min_semi_end - reg->anchor_dmax;
3523 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
3524 start = max_semi_end - reg->anchor_dmin;
3525 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
3527 if (range > start) goto mismatch_no_msa;
3530 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
3531 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
3533 max_semi_end = (UChar* )end;
3534 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
3535 min_semi_end = pre_end;
3537 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3538 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
3539 if (IS_NOT_NULL(pre_end) &&
3540 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
3541 min_semi_end = pre_end;
3543 #endif
3544 if (min_semi_end > str && start <= min_semi_end) {
3545 goto end_buf;
3548 else {
3549 min_semi_end = (UChar* )end;
3550 goto end_buf;
3553 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
3554 goto begin_position;
3557 else if (str == end) { /* empty string */
3558 static const UChar* address_for_empty_string = (UChar* )"";
3560 #ifdef ONIG_DEBUG_SEARCH
3561 fprintf(stderr, "onig_search: empty string.\n");
3562 #endif
3564 if (reg->threshold_len == 0) {
3565 start = end = str = address_for_empty_string;
3566 s = (UChar* )start;
3567 prev = (UChar* )NULL;
3569 MATCH_ARG_INIT(msa, option, region, start);
3570 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3571 msa.state_check_buff = (void* )0;
3572 msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
3573 #endif
3574 MATCH_AND_RETURN_CHECK(end);
3575 goto mismatch;
3577 goto mismatch_no_msa;
3580 #ifdef ONIG_DEBUG_SEARCH
3581 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
3582 (int )(end - str), (int )(start - str), (int )(range - str));
3583 #endif
3585 MATCH_ARG_INIT(msa, option, region, orig_start);
3586 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3588 int offset = (MIN(start, range) - str);
3589 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3591 #endif
3593 s = (UChar* )start;
3594 if (range > start) { /* forward search */
3595 if (s > str)
3596 prev = onigenc_get_prev_char_head(reg->enc, str, s);
3597 else
3598 prev = (UChar* )NULL;
3600 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
3601 UChar *sch_range, *low, *high, *low_prev;
3603 sch_range = (UChar* )range;
3604 if (reg->dmax != 0) {
3605 if (reg->dmax == ONIG_INFINITE_DISTANCE)
3606 sch_range = (UChar* )end;
3607 else {
3608 sch_range += reg->dmax;
3609 if (sch_range > end) sch_range = (UChar* )end;
3613 if ((end - start) < reg->threshold_len)
3614 goto mismatch;
3616 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3617 do {
3618 if (! forward_search_range(reg, str, end, s, sch_range,
3619 &low, &high, &low_prev)) goto mismatch;
3620 if (s < low) {
3621 s = low;
3622 prev = low_prev;
3624 while (s <= high) {
3625 MATCH_AND_RETURN_CHECK(orig_range);
3626 prev = s;
3627 s += enclen(reg->enc, s, end);
3629 } while (s < range);
3630 goto mismatch;
3632 else { /* check only. */
3633 if (! forward_search_range(reg, str, end, s, sch_range,
3634 &low, &high, (UChar** )NULL)) goto mismatch;
3636 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
3637 do {
3638 MATCH_AND_RETURN_CHECK(orig_range);
3639 prev = s;
3640 s += enclen(reg->enc, s, end);
3642 while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
3643 prev = s;
3644 s += enclen(reg->enc, s, end);
3646 } while (s < range);
3647 goto mismatch;
3652 do {
3653 MATCH_AND_RETURN_CHECK(orig_range);
3654 prev = s;
3655 s += enclen(reg->enc, s, end);
3656 } while (s < range);
3658 if (s == range) { /* because empty match with /$/. */
3659 MATCH_AND_RETURN_CHECK(orig_range);
3662 else { /* backward search */
3663 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3664 if (orig_start < end)
3665 orig_start += enclen(reg->enc, orig_start, end); /* is upper range */
3666 #endif
3668 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
3669 UChar *low, *high, *adjrange, *sch_start;
3671 if (range < end)
3672 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
3673 else
3674 adjrange = (UChar* )end;
3676 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
3677 (end - range) >= reg->threshold_len) {
3678 do {
3679 sch_start = s + reg->dmax;
3680 if (sch_start > end) sch_start = (UChar* )end;
3681 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
3682 &low, &high) <= 0)
3683 goto mismatch;
3685 if (s > high)
3686 s = high;
3688 while (s >= low) {
3689 prev = onigenc_get_prev_char_head(reg->enc, str, s);
3690 MATCH_AND_RETURN_CHECK(orig_start);
3691 s = prev;
3693 } while (s >= range);
3694 goto mismatch;
3696 else { /* check only. */
3697 if ((end - range) < reg->threshold_len) goto mismatch;
3699 sch_start = s;
3700 if (reg->dmax != 0) {
3701 if (reg->dmax == ONIG_INFINITE_DISTANCE)
3702 sch_start = (UChar* )end;
3703 else {
3704 sch_start += reg->dmax;
3705 if (sch_start > end) sch_start = (UChar* )end;
3706 else
3707 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
3708 start, sch_start);
3711 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
3712 &low, &high) <= 0) goto mismatch;
3716 do {
3717 prev = onigenc_get_prev_char_head(reg->enc, str, s);
3718 MATCH_AND_RETURN_CHECK(orig_start);
3719 s = prev;
3720 } while (s >= range);
3723 mismatch:
3724 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3725 if (IS_FIND_LONGEST(reg->options)) {
3726 if (msa.best_len >= 0) {
3727 s = msa.best_s;
3728 goto match;
3731 #endif
3732 r = ONIG_MISMATCH;
3734 finish:
3735 MATCH_ARG_FREE(msa);
3736 ONIG_STATE_DEC_THREAD(reg);
3738 /* If result is mismatch and no FIND_NOT_EMPTY option,
3739 then the region is not setted in match_at(). */
3740 if (IS_FIND_NOT_EMPTY(reg->options) && region
3741 #ifdef USE_POSIX_API_REGION_OPTION
3742 && !IS_POSIX_REGION(option)
3743 #endif
3745 onig_region_clear(region);
3748 #ifdef ONIG_DEBUG
3749 if (r != ONIG_MISMATCH)
3750 fprintf(stderr, "onig_search: error %d\n", r);
3751 #endif
3752 return r;
3754 mismatch_no_msa:
3755 r = ONIG_MISMATCH;
3756 finish_no_msa:
3757 ONIG_STATE_DEC_THREAD(reg);
3758 #ifdef ONIG_DEBUG
3759 if (r != ONIG_MISMATCH)
3760 fprintf(stderr, "onig_search: error %d\n", r);
3761 #endif
3762 return r;
3764 match:
3765 ONIG_STATE_DEC_THREAD(reg);
3766 MATCH_ARG_FREE(msa);
3767 return s - str;
3770 extern OnigEncoding
3771 onig_get_encoding(regex_t* reg)
3773 return reg->enc;
3776 extern OnigOptionType
3777 onig_get_options(regex_t* reg)
3779 return reg->options;
3782 extern OnigCaseFoldType
3783 onig_get_case_fold_flag(regex_t* reg)
3785 return reg->case_fold_flag;
3788 extern OnigSyntaxType*
3789 onig_get_syntax(regex_t* reg)
3791 return reg->syntax;
3794 extern int
3795 onig_number_of_captures(regex_t* reg)
3797 return reg->num_mem;
3800 extern int
3801 onig_number_of_capture_histories(regex_t* reg)
3803 #ifdef USE_CAPTURE_HISTORY
3804 int i, n;
3806 n = 0;
3807 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
3808 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
3809 n++;
3811 return n;
3812 #else
3813 return 0;
3814 #endif
3817 extern void
3818 onig_copy_encoding(OnigEncoding to, OnigEncoding from)
3820 *to = *from;