* process.c (rb_spawn_internal): new function to specify
[ruby-svn.git] / regexec.c
blob47fd67140e0a98edcf656498bf30f958cd60e553
1 /**********************************************************************
2 regexec.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
30 #include "regint.h"
32 /* #define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
34 #ifdef USE_CRNL_AS_LINE_TERMINATOR
35 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
36 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
37 ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
38 #endif
40 #ifdef USE_CAPTURE_HISTORY
41 static void history_tree_free(OnigCaptureTreeNode* node);
43 static void
44 history_tree_clear(OnigCaptureTreeNode* node)
46 int i;
48 if (IS_NOT_NULL(node)) {
49 for (i = 0; i < node->num_childs; i++) {
50 if (IS_NOT_NULL(node->childs[i])) {
51 history_tree_free(node->childs[i]);
54 for (i = 0; i < node->allocated; i++) {
55 node->childs[i] = (OnigCaptureTreeNode* )0;
57 node->num_childs = 0;
58 node->beg = ONIG_REGION_NOTPOS;
59 node->end = ONIG_REGION_NOTPOS;
60 node->group = -1;
64 static void
65 history_tree_free(OnigCaptureTreeNode* node)
67 history_tree_clear(node);
68 xfree(node);
71 static void
72 history_root_free(OnigRegion* r)
74 if (IS_NOT_NULL(r->history_root)) {
75 history_tree_free(r->history_root);
76 r->history_root = (OnigCaptureTreeNode* )0;
80 static OnigCaptureTreeNode*
81 history_node_new(void)
83 OnigCaptureTreeNode* node;
85 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
86 CHECK_NULL_RETURN(node);
87 node->childs = (OnigCaptureTreeNode** )0;
88 node->allocated = 0;
89 node->num_childs = 0;
90 node->group = -1;
91 node->beg = ONIG_REGION_NOTPOS;
92 node->end = ONIG_REGION_NOTPOS;
94 return node;
97 static int
98 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
100 #define HISTORY_TREE_INIT_ALLOC_SIZE 8
102 if (parent->num_childs >= parent->allocated) {
103 int n, i;
105 if (IS_NULL(parent->childs)) {
106 n = HISTORY_TREE_INIT_ALLOC_SIZE;
107 parent->childs =
108 (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
110 else {
111 n = parent->allocated * 2;
112 parent->childs =
113 (OnigCaptureTreeNode** )xrealloc(parent->childs,
114 sizeof(OnigCaptureTreeNode*) * n);
116 CHECK_NULL_RETURN_MEMERR(parent->childs);
117 for (i = parent->allocated; i < n; i++) {
118 parent->childs[i] = (OnigCaptureTreeNode* )0;
120 parent->allocated = n;
123 parent->childs[parent->num_childs] = child;
124 parent->num_childs++;
125 return 0;
128 static OnigCaptureTreeNode*
129 history_tree_clone(OnigCaptureTreeNode* node)
131 int i;
132 OnigCaptureTreeNode *clone, *child;
134 clone = history_node_new();
135 CHECK_NULL_RETURN(clone);
137 clone->beg = node->beg;
138 clone->end = node->end;
139 for (i = 0; i < node->num_childs; i++) {
140 child = history_tree_clone(node->childs[i]);
141 if (IS_NULL(child)) {
142 history_tree_free(clone);
143 return (OnigCaptureTreeNode* )0;
145 history_tree_add_child(clone, child);
148 return clone;
151 extern OnigCaptureTreeNode*
152 onig_get_capture_tree(OnigRegion* region)
154 return region->history_root;
156 #endif /* USE_CAPTURE_HISTORY */
158 extern void
159 onig_region_clear(OnigRegion* region)
161 int i;
163 for (i = 0; i < region->num_regs; i++) {
164 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
166 #ifdef USE_CAPTURE_HISTORY
167 history_root_free(region);
168 #endif
171 extern int
172 onig_region_resize(OnigRegion* region, int n)
174 region->num_regs = n;
176 if (n < ONIG_NREGION)
177 n = ONIG_NREGION;
179 if (region->allocated == 0) {
180 region->beg = (int* )xmalloc(n * sizeof(int));
181 region->end = (int* )xmalloc(n * sizeof(int));
183 if (region->beg == 0 || region->end == 0)
184 return ONIGERR_MEMORY;
186 region->allocated = n;
188 else if (region->allocated < n) {
189 region->beg = (int* )xrealloc(region->beg, n * sizeof(int));
190 region->end = (int* )xrealloc(region->end, n * sizeof(int));
192 if (region->beg == 0 || region->end == 0)
193 return ONIGERR_MEMORY;
195 region->allocated = n;
198 return 0;
201 static int
202 onig_region_resize_clear(OnigRegion* region, int n)
204 int r;
206 r = onig_region_resize(region, n);
207 if (r != 0) return r;
208 onig_region_clear(region);
209 return 0;
212 extern int
213 onig_region_set(OnigRegion* region, int at, int beg, int end)
215 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
217 if (at >= region->allocated) {
218 int r = onig_region_resize(region, at + 1);
219 if (r < 0) return r;
222 region->beg[at] = beg;
223 region->end[at] = end;
224 return 0;
227 extern void
228 onig_region_init(OnigRegion* region)
230 region->num_regs = 0;
231 region->allocated = 0;
232 region->beg = (int* )0;
233 region->end = (int* )0;
234 region->history_root = (OnigCaptureTreeNode* )0;
237 extern OnigRegion*
238 onig_region_new(void)
240 OnigRegion* r;
242 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
243 onig_region_init(r);
244 return r;
247 extern void
248 onig_region_free(OnigRegion* r, int free_self)
250 if (r) {
251 if (r->allocated > 0) {
252 if (r->beg) xfree(r->beg);
253 if (r->end) xfree(r->end);
254 r->allocated = 0;
256 #ifdef USE_CAPTURE_HISTORY
257 history_root_free(r);
258 #endif
259 if (free_self) xfree(r);
263 extern void
264 onig_region_copy(OnigRegion* to, OnigRegion* from)
266 #define RREGC_SIZE (sizeof(int) * from->num_regs)
267 int i;
269 if (to == from) return;
271 if (to->allocated == 0) {
272 if (from->num_regs > 0) {
273 to->beg = (int* )xmalloc(RREGC_SIZE);
274 to->end = (int* )xmalloc(RREGC_SIZE);
275 to->allocated = from->num_regs;
278 else if (to->allocated < from->num_regs) {
279 to->beg = (int* )xrealloc(to->beg, RREGC_SIZE);
280 to->end = (int* )xrealloc(to->end, RREGC_SIZE);
281 to->allocated = from->num_regs;
284 for (i = 0; i < from->num_regs; i++) {
285 to->beg[i] = from->beg[i];
286 to->end[i] = from->end[i];
288 to->num_regs = from->num_regs;
290 #ifdef USE_CAPTURE_HISTORY
291 history_root_free(to);
293 if (IS_NOT_NULL(from->history_root)) {
294 to->history_root = history_tree_clone(from->history_root);
296 #endif
300 /** stack **/
301 #define INVALID_STACK_INDEX -1
303 /* stack type */
304 /* used by normal-POP */
305 #define STK_ALT 0x0001
306 #define STK_LOOK_BEHIND_NOT 0x0002
307 #define STK_POS_NOT 0x0003
308 /* handled by normal-POP */
309 #define STK_MEM_START 0x0100
310 #define STK_MEM_END 0x8200
311 #define STK_REPEAT_INC 0x0300
312 #define STK_STATE_CHECK_MARK 0x1000
313 /* avoided by normal-POP */
314 #define STK_NULL_CHECK_START 0x3000
315 #define STK_NULL_CHECK_END 0x5000 /* for recursive call */
316 #define STK_MEM_END_MARK 0x8400
317 #define STK_POS 0x0500 /* used when POP-POS */
318 #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
319 #define STK_REPEAT 0x0700
320 #define STK_CALL_FRAME 0x0800
321 #define STK_RETURN 0x0900
322 #define STK_VOID 0x0a00 /* for fill a blank */
324 /* stack type check mask */
325 #define STK_MASK_POP_USED 0x00ff
326 #define STK_MASK_TO_VOID_TARGET 0x10ff
327 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
329 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
330 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
331 (msa).stack_p = (void* )0;\
332 (msa).options = (arg_option);\
333 (msa).region = (arg_region);\
334 (msa).start = (arg_start);\
335 (msa).best_len = ONIG_MISMATCH;\
336 } while(0)
337 #else
338 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
339 (msa).stack_p = (void* )0;\
340 (msa).options = (arg_option);\
341 (msa).region = (arg_region);\
342 (msa).start = (arg_start);\
343 } while(0)
344 #endif
346 #ifdef USE_COMBINATION_EXPLOSION_CHECK
348 #define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
350 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
351 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
352 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
353 offset = ((offset) * (state_num)) >> 3;\
354 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
355 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \
356 (msa).state_check_buff = (void* )xmalloc(size);\
357 else \
358 (msa).state_check_buff = (void* )xalloca(size);\
359 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
360 (size_t )(size - (offset))); \
361 (msa).state_check_buff_size = size;\
363 else {\
364 (msa).state_check_buff = (void* )0;\
365 (msa).state_check_buff_size = 0;\
368 else {\
369 (msa).state_check_buff = (void* )0;\
370 (msa).state_check_buff_size = 0;\
372 } while(0)
374 #define MATCH_ARG_FREE(msa) do {\
375 if ((msa).stack_p) xfree((msa).stack_p);\
376 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
377 if ((msa).state_check_buff) xfree((msa).state_check_buff);\
379 } while(0)
380 #else
381 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num)
382 #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
383 #endif
387 #define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
388 if (msa->stack_p) {\
389 alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\
390 stk_alloc = (OnigStackType* )(msa->stack_p);\
391 stk_base = stk_alloc;\
392 stk = stk_base;\
393 stk_end = stk_base + msa->stack_n;\
395 else {\
396 alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\
397 + sizeof(OnigStackType) * (stack_num));\
398 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\
399 stk_base = stk_alloc;\
400 stk = stk_base;\
401 stk_end = stk_base + (stack_num);\
403 } while(0)
405 #define STACK_SAVE do{\
406 if (stk_base != stk_alloc) {\
407 msa->stack_p = stk_base;\
408 msa->stack_n = stk_end - stk_base;\
410 } while(0)
412 static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
414 extern unsigned int
415 onig_get_match_stack_limit_size(void)
417 return MatchStackLimitSize;
420 extern int
421 onig_set_match_stack_limit_size(unsigned int size)
423 MatchStackLimitSize = size;
424 return 0;
427 static int
428 stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
429 OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
431 unsigned int n;
432 OnigStackType *x, *stk_base, *stk_end, *stk;
434 stk_base = *arg_stk_base;
435 stk_end = *arg_stk_end;
436 stk = *arg_stk;
438 n = stk_end - stk_base;
439 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
440 x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
441 if (IS_NULL(x)) {
442 STACK_SAVE;
443 return ONIGERR_MEMORY;
445 xmemcpy(x, stk_base, n * sizeof(OnigStackType));
446 n *= 2;
448 else {
449 n *= 2;
450 if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) {
451 if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize)
452 return ONIGERR_MATCH_STACK_LIMIT_OVER;
453 else
454 n = MatchStackLimitSize;
456 x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
457 if (IS_NULL(x)) {
458 STACK_SAVE;
459 return ONIGERR_MEMORY;
462 *arg_stk = x + (stk - stk_base);
463 *arg_stk_base = x;
464 *arg_stk_end = x + n;
465 return 0;
468 #define STACK_ENSURE(n) do {\
469 if (stk_end - stk < (n)) {\
470 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
471 if (r != 0) { STACK_SAVE; return r; } \
473 } while(0)
475 #define STACK_AT(index) (stk_base + (index))
476 #define GET_STACK_INDEX(stk) ((stk) - stk_base)
478 #define STACK_PUSH_TYPE(stack_type) do {\
479 STACK_ENSURE(1);\
480 stk->type = (stack_type);\
481 STACK_INC;\
482 } while(0)
484 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
486 #ifdef USE_COMBINATION_EXPLOSION_CHECK
487 #define STATE_CHECK_POS(s,snum) \
488 (((s) - str) * num_comb_exp_check + ((snum) - 1))
489 #define STATE_CHECK_VAL(v,snum) do {\
490 if (state_check_buff != NULL) {\
491 int x = STATE_CHECK_POS(s,snum);\
492 (v) = state_check_buff[x/8] & (1<<(x%8));\
494 else (v) = 0;\
495 } while(0)
498 #define ELSE_IF_STATE_CHECK_MARK(stk) \
499 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
500 int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
501 state_check_buff[x/8] |= (1<<(x%8)); \
504 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
505 STACK_ENSURE(1);\
506 stk->type = (stack_type);\
507 stk->u.state.pcode = (pat);\
508 stk->u.state.pstr = (s);\
509 stk->u.state.pstr_prev = (sprev);\
510 stk->u.state.state_check = 0;\
511 STACK_INC;\
512 } while(0)
514 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
515 stk->type = (stack_type);\
516 stk->u.state.pcode = (pat);\
517 stk->u.state.state_check = 0;\
518 STACK_INC;\
519 } while(0)
521 #define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\
522 STACK_ENSURE(1);\
523 stk->type = STK_ALT;\
524 stk->u.state.pcode = (pat);\
525 stk->u.state.pstr = (s);\
526 stk->u.state.pstr_prev = (sprev);\
527 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
528 STACK_INC;\
529 } while(0)
531 #define STACK_PUSH_STATE_CHECK(s,snum) do {\
532 if (state_check_buff != NULL) {\
533 STACK_ENSURE(1);\
534 stk->type = STK_STATE_CHECK_MARK;\
535 stk->u.state.pstr = (s);\
536 stk->u.state.state_check = (snum);\
537 STACK_INC;\
539 } while(0)
541 #else /* USE_COMBINATION_EXPLOSION_CHECK */
543 #define ELSE_IF_STATE_CHECK_MARK(stk)
545 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
546 STACK_ENSURE(1);\
547 stk->type = (stack_type);\
548 stk->u.state.pcode = (pat);\
549 stk->u.state.pstr = (s);\
550 stk->u.state.pstr_prev = (sprev);\
551 STACK_INC;\
552 } while(0)
554 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
555 stk->type = (stack_type);\
556 stk->u.state.pcode = (pat);\
557 STACK_INC;\
558 } while(0)
559 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
561 #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
562 #define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)
563 #define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev)
564 #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
565 #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \
566 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev)
568 #define STACK_PUSH_REPEAT(id, pat) do {\
569 STACK_ENSURE(1);\
570 stk->type = STK_REPEAT;\
571 stk->u.repeat.num = (id);\
572 stk->u.repeat.pcode = (pat);\
573 stk->u.repeat.count = 0;\
574 STACK_INC;\
575 } while(0)
577 #define STACK_PUSH_REPEAT_INC(sindex) do {\
578 STACK_ENSURE(1);\
579 stk->type = STK_REPEAT_INC;\
580 stk->u.repeat_inc.si = (sindex);\
581 STACK_INC;\
582 } while(0)
584 #define STACK_PUSH_MEM_START(mnum, s) do {\
585 STACK_ENSURE(1);\
586 stk->type = STK_MEM_START;\
587 stk->u.mem.num = (mnum);\
588 stk->u.mem.pstr = (s);\
589 stk->u.mem.start = mem_start_stk[mnum];\
590 stk->u.mem.end = mem_end_stk[mnum];\
591 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
592 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
593 STACK_INC;\
594 } while(0)
596 #define STACK_PUSH_MEM_END(mnum, s) do {\
597 STACK_ENSURE(1);\
598 stk->type = STK_MEM_END;\
599 stk->u.mem.num = (mnum);\
600 stk->u.mem.pstr = (s);\
601 stk->u.mem.start = mem_start_stk[mnum];\
602 stk->u.mem.end = mem_end_stk[mnum];\
603 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
604 STACK_INC;\
605 } while(0)
607 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
608 STACK_ENSURE(1);\
609 stk->type = STK_MEM_END_MARK;\
610 stk->u.mem.num = (mnum);\
611 STACK_INC;\
612 } while(0)
614 #define STACK_GET_MEM_START(mnum, k) do {\
615 int level = 0;\
616 k = stk;\
617 while (k > stk_base) {\
618 k--;\
619 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
620 && k->u.mem.num == (mnum)) {\
621 level++;\
623 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
624 if (level == 0) break;\
625 level--;\
628 } while(0)
630 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
631 int level = 0;\
632 while (k < stk) {\
633 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
634 if (level == 0) (start) = k->u.mem.pstr;\
635 level++;\
637 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
638 level--;\
639 if (level == 0) {\
640 (end) = k->u.mem.pstr;\
641 break;\
644 k++;\
646 } while(0)
648 #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
649 STACK_ENSURE(1);\
650 stk->type = STK_NULL_CHECK_START;\
651 stk->u.null_check.num = (cnum);\
652 stk->u.null_check.pstr = (s);\
653 STACK_INC;\
654 } while(0)
656 #define STACK_PUSH_NULL_CHECK_END(cnum) do {\
657 STACK_ENSURE(1);\
658 stk->type = STK_NULL_CHECK_END;\
659 stk->u.null_check.num = (cnum);\
660 STACK_INC;\
661 } while(0)
663 #define STACK_PUSH_CALL_FRAME(pat) do {\
664 STACK_ENSURE(1);\
665 stk->type = STK_CALL_FRAME;\
666 stk->u.call_frame.ret_addr = (pat);\
667 STACK_INC;\
668 } while(0)
670 #define STACK_PUSH_RETURN do {\
671 STACK_ENSURE(1);\
672 stk->type = STK_RETURN;\
673 STACK_INC;\
674 } while(0)
677 #ifdef ONIG_DEBUG
678 #define STACK_BASE_CHECK(p, at) \
679 if ((p) < stk_base) {\
680 fprintf(stderr, "at %s\n", at);\
681 goto stack_error;\
683 #else
684 #define STACK_BASE_CHECK(p, at)
685 #endif
687 #define STACK_POP_ONE do {\
688 stk--;\
689 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
690 } while(0)
692 #define STACK_POP do {\
693 switch (pop_level) {\
694 case STACK_POP_LEVEL_FREE:\
695 while (1) {\
696 stk--;\
697 STACK_BASE_CHECK(stk, "STACK_POP"); \
698 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
699 ELSE_IF_STATE_CHECK_MARK(stk);\
701 break;\
702 case STACK_POP_LEVEL_MEM_START:\
703 while (1) {\
704 stk--;\
705 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
706 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
707 else if (stk->type == STK_MEM_START) {\
708 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
709 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
711 ELSE_IF_STATE_CHECK_MARK(stk);\
713 break;\
714 default:\
715 while (1) {\
716 stk--;\
717 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
718 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
719 else if (stk->type == STK_MEM_START) {\
720 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
721 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
723 else if (stk->type == STK_REPEAT_INC) {\
724 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
726 else if (stk->type == STK_MEM_END) {\
727 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
728 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
730 ELSE_IF_STATE_CHECK_MARK(stk);\
732 break;\
734 } while(0)
736 #define STACK_POP_TIL_POS_NOT do {\
737 while (1) {\
738 stk--;\
739 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
740 if (stk->type == STK_POS_NOT) break;\
741 else if (stk->type == STK_MEM_START) {\
742 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
743 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
745 else if (stk->type == STK_REPEAT_INC) {\
746 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
748 else if (stk->type == STK_MEM_END) {\
749 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
750 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
752 ELSE_IF_STATE_CHECK_MARK(stk);\
754 } while(0)
756 #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
757 while (1) {\
758 stk--;\
759 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
760 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
761 else if (stk->type == STK_MEM_START) {\
762 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
763 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
765 else if (stk->type == STK_REPEAT_INC) {\
766 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
768 else if (stk->type == STK_MEM_END) {\
769 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
770 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
772 ELSE_IF_STATE_CHECK_MARK(stk);\
774 } while(0)
776 #define STACK_POS_END(k) do {\
777 k = stk;\
778 while (1) {\
779 k--;\
780 STACK_BASE_CHECK(k, "STACK_POS_END"); \
781 if (IS_TO_VOID_TARGET(k)) {\
782 k->type = STK_VOID;\
784 else if (k->type == STK_POS) {\
785 k->type = STK_VOID;\
786 break;\
789 } while(0)
791 #define STACK_STOP_BT_END do {\
792 OnigStackType *k = stk;\
793 while (1) {\
794 k--;\
795 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
796 if (IS_TO_VOID_TARGET(k)) {\
797 k->type = STK_VOID;\
799 else if (k->type == STK_STOP_BT) {\
800 k->type = STK_VOID;\
801 break;\
804 } while(0)
806 #define STACK_NULL_CHECK(isnull,id,s) do {\
807 OnigStackType* k = stk;\
808 while (1) {\
809 k--;\
810 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
811 if (k->type == STK_NULL_CHECK_START) {\
812 if (k->u.null_check.num == (id)) {\
813 (isnull) = (k->u.null_check.pstr == (s));\
814 break;\
818 } while(0)
820 #define STACK_NULL_CHECK_REC(isnull,id,s) do {\
821 int level = 0;\
822 OnigStackType* k = stk;\
823 while (1) {\
824 k--;\
825 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
826 if (k->type == STK_NULL_CHECK_START) {\
827 if (k->u.null_check.num == (id)) {\
828 if (level == 0) {\
829 (isnull) = (k->u.null_check.pstr == (s));\
830 break;\
832 else level--;\
835 else if (k->type == STK_NULL_CHECK_END) {\
836 level++;\
839 } while(0)
841 #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
842 OnigStackType* k = stk;\
843 while (1) {\
844 k--;\
845 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
846 if (k->type == STK_NULL_CHECK_START) {\
847 if (k->u.null_check.num == (id)) {\
848 if (k->u.null_check.pstr != (s)) {\
849 (isnull) = 0;\
850 break;\
852 else {\
853 UChar* endp;\
854 (isnull) = 1;\
855 while (k < stk) {\
856 if (k->type == STK_MEM_START) {\
857 if (k->u.mem.end == INVALID_STACK_INDEX) {\
858 (isnull) = 0; break;\
860 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
861 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
862 else\
863 endp = (UChar* )k->u.mem.end;\
864 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
865 (isnull) = 0; break;\
867 else if (endp != s) {\
868 (isnull) = -1; /* empty, but position changed */ \
871 k++;\
873 break;\
878 } while(0)
880 #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
881 int level = 0;\
882 OnigStackType* k = stk;\
883 while (1) {\
884 k--;\
885 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
886 if (k->type == STK_NULL_CHECK_START) {\
887 if (k->u.null_check.num == (id)) {\
888 if (level == 0) {\
889 if (k->u.null_check.pstr != (s)) {\
890 (isnull) = 0;\
891 break;\
893 else {\
894 UChar* endp;\
895 (isnull) = 1;\
896 while (k < stk) {\
897 if (k->type == STK_MEM_START) {\
898 if (k->u.mem.end == INVALID_STACK_INDEX) {\
899 (isnull) = 0; break;\
901 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
902 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
903 else\
904 endp = (UChar* )k->u.mem.end;\
905 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
906 (isnull) = 0; break;\
908 else if (endp != s) {\
909 (isnull) = -1; /* empty, but position changed */ \
912 k++;\
914 break;\
917 else {\
918 level--;\
922 else if (k->type == STK_NULL_CHECK_END) {\
923 if (k->u.null_check.num == (id)) level++;\
926 } while(0)
928 #define STACK_GET_REPEAT(id, k) do {\
929 int level = 0;\
930 k = stk;\
931 while (1) {\
932 k--;\
933 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
934 if (k->type == STK_REPEAT) {\
935 if (level == 0) {\
936 if (k->u.repeat.num == (id)) {\
937 break;\
941 else if (k->type == STK_CALL_FRAME) level--;\
942 else if (k->type == STK_RETURN) level++;\
944 } while(0)
946 #define STACK_RETURN(addr) do {\
947 int level = 0;\
948 OnigStackType* k = stk;\
949 while (1) {\
950 k--;\
951 STACK_BASE_CHECK(k, "STACK_RETURN"); \
952 if (k->type == STK_CALL_FRAME) {\
953 if (level == 0) {\
954 (addr) = k->u.call_frame.ret_addr;\
955 break;\
957 else level--;\
959 else if (k->type == STK_RETURN)\
960 level++;\
962 } while(0)
965 #define STRING_CMP(s1,s2,len) do {\
966 while (len-- > 0) {\
967 if (*s1++ != *s2++) goto fail;\
969 } while(0)
971 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\
972 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
973 goto fail; \
974 } while(0)
976 static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
977 UChar* s1, UChar** ps2, int mblen)
979 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
980 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
981 UChar *p1, *p2, *end1, *s2, *end2;
982 int len1, len2;
984 s2 = *ps2;
985 end1 = s1 + mblen;
986 end2 = s2 + mblen;
987 while (s1 < end1) {
988 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1);
989 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2);
990 if (len1 != len2) return 0;
991 p1 = buf1;
992 p2 = buf2;
993 while (len1-- > 0) {
994 if (*p1 != *p2) return 0;
995 p1++;
996 p2++;
1000 *ps2 = s2;
1001 return 1;
1004 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1005 is_fail = 0;\
1006 while (len-- > 0) {\
1007 if (*s1++ != *s2++) {\
1008 is_fail = 1; break;\
1011 } while(0)
1013 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\
1014 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
1015 is_fail = 1; \
1016 else \
1017 is_fail = 0; \
1018 } while(0)
1021 #define IS_EMPTY_STR (str == end)
1022 #define ON_STR_BEGIN(s) ((s) == str)
1023 #define ON_STR_END(s) ((s) == end)
1024 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1025 #define DATA_ENSURE_CHECK1 (s < right_range)
1026 #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1027 #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1028 #else
1029 #define DATA_ENSURE_CHECK1 (s < end)
1030 #define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1031 #define DATA_ENSURE(n) if (s + (n) > end) goto fail
1032 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1035 #ifdef USE_CAPTURE_HISTORY
1036 static int
1037 make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
1038 OnigStackType* stk_top, UChar* str, regex_t* reg)
1040 int n, r;
1041 OnigCaptureTreeNode* child;
1042 OnigStackType* k = *kp;
1044 while (k < stk_top) {
1045 if (k->type == STK_MEM_START) {
1046 n = k->u.mem.num;
1047 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1048 BIT_STATUS_AT(reg->capture_history, n) != 0) {
1049 child = history_node_new();
1050 CHECK_NULL_RETURN_MEMERR(child);
1051 child->group = n;
1052 child->beg = (int )(k->u.mem.pstr - str);
1053 r = history_tree_add_child(node, child);
1054 if (r != 0) return r;
1055 *kp = (k + 1);
1056 r = make_capture_history_tree(child, kp, stk_top, str, reg);
1057 if (r != 0) return r;
1059 k = *kp;
1060 child->end = (int )(k->u.mem.pstr - str);
1063 else if (k->type == STK_MEM_END) {
1064 if (k->u.mem.num == node->group) {
1065 node->end = (int )(k->u.mem.pstr - str);
1066 *kp = k;
1067 return 0;
1070 k++;
1073 return 1; /* 1: root node ending. */
1075 #endif
1077 #ifdef USE_BACKREF_WITH_LEVEL
1078 static int mem_is_in_memp(int mem, int num, UChar* memp)
1080 int i;
1081 MemNumType m;
1083 for (i = 0; i < num; i++) {
1084 GET_MEMNUM_INC(m, memp);
1085 if (mem == (int )m) return 1;
1087 return 0;
1090 static int backref_match_at_nested_level(regex_t* reg
1091 , OnigStackType* top, OnigStackType* stk_base
1092 , int ignore_case, int case_fold_flag
1093 , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
1095 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
1096 int level;
1097 OnigStackType* k;
1099 level = 0;
1100 k = top;
1101 k--;
1102 while (k >= stk_base) {
1103 if (k->type == STK_CALL_FRAME) {
1104 level--;
1106 else if (k->type == STK_RETURN) {
1107 level++;
1109 else if (level == nest) {
1110 if (k->type == STK_MEM_START) {
1111 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1112 pstart = k->u.mem.pstr;
1113 if (pend != NULL_UCHARP) {
1114 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
1115 p = pstart;
1116 ss = *s;
1118 if (ignore_case != 0) {
1119 if (string_cmp_ic(reg->enc, case_fold_flag,
1120 pstart, &ss, (int )(pend - pstart)) == 0)
1121 return 0; /* or goto next_mem; */
1123 else {
1124 while (p < pend) {
1125 if (*p++ != *ss++) return 0; /* or goto next_mem; */
1129 *s = ss;
1130 return 1;
1134 else if (k->type == STK_MEM_END) {
1135 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1136 pend = k->u.mem.pstr;
1140 k--;
1143 return 0;
1145 #endif /* USE_BACKREF_WITH_LEVEL */
1148 #ifdef ONIG_DEBUG_STATISTICS
1150 #define USE_TIMEOFDAY
1152 #ifdef USE_TIMEOFDAY
1153 #ifdef HAVE_SYS_TIME_H
1154 #include <sys/time.h>
1155 #endif
1156 #ifdef HAVE_UNISTD_H
1157 #include <unistd.h>
1158 #endif
1159 static struct timeval ts, te;
1160 #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
1161 #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
1162 (((te).tv_sec - (ts).tv_sec)*1000000))
1163 #else
1164 #ifdef HAVE_SYS_TIMES_H
1165 #include <sys/times.h>
1166 #endif
1167 static struct tms ts, te;
1168 #define GETTIME(t) times(&(t))
1169 #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
1170 #endif
1172 static int OpCounter[256];
1173 static int OpPrevCounter[256];
1174 static unsigned long OpTime[256];
1175 static int OpCurr = OP_FINISH;
1176 static int OpPrevTarget = OP_FAIL;
1177 static int MaxStackDepth = 0;
1179 #define MOP_IN(opcode) do {\
1180 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
1181 OpCurr = opcode;\
1182 OpCounter[opcode]++;\
1183 GETTIME(ts);\
1184 } while(0)
1186 #define MOP_OUT do {\
1187 GETTIME(te);\
1188 OpTime[OpCurr] += TIMEDIFF(te, ts);\
1189 } while(0)
1191 extern void
1192 onig_statistics_init(void)
1194 int i;
1195 for (i = 0; i < 256; i++) {
1196 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
1198 MaxStackDepth = 0;
1201 extern void
1202 onig_print_statistics(FILE* f)
1204 int i;
1205 fprintf(f, " count prev time\n");
1206 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
1207 fprintf(f, "%8d: %8d: %10ld: %s\n",
1208 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
1210 fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
1213 #define STACK_INC do {\
1214 stk++;\
1215 if (stk - stk_base > MaxStackDepth) \
1216 MaxStackDepth = stk - stk_base;\
1217 } while(0)
1219 #else
1220 #define STACK_INC stk++
1222 #define MOP_IN(opcode)
1223 #define MOP_OUT
1224 #endif
1227 /* matching region of POSIX API */
1228 typedef int regoff_t;
1230 typedef struct {
1231 regoff_t rm_so;
1232 regoff_t rm_eo;
1233 } posix_regmatch_t;
1235 /* match data(str - end) from position (sstart). */
1236 /* if sstart == str then set sprev to NULL. */
1237 static int
1238 match_at(regex_t* reg, const UChar* str, const UChar* end,
1239 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1240 const UChar* right_range,
1241 #endif
1242 const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
1244 static UChar FinishCode[] = { OP_FINISH };
1246 int i, n, num_mem, best_len, pop_level;
1247 LengthType tlen, tlen2;
1248 MemNumType mem;
1249 RelAddrType addr;
1250 OnigOptionType option = reg->options;
1251 OnigEncoding encode = reg->enc;
1252 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
1253 UChar *s, *q, *sbegin;
1254 UChar *p = reg->p;
1255 char *alloca_base;
1256 OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
1257 OnigStackType *stkp; /* used as any purpose. */
1258 OnigStackIndex si;
1259 OnigStackIndex *repeat_stk;
1260 OnigStackIndex *mem_start_stk, *mem_end_stk;
1261 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1262 int scv;
1263 unsigned char* state_check_buff = msa->state_check_buff;
1264 int num_comb_exp_check = reg->num_comb_exp_check;
1265 #endif
1266 n = reg->num_repeat + reg->num_mem * 2;
1268 STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
1269 pop_level = reg->stack_pop_level;
1270 num_mem = reg->num_mem;
1271 repeat_stk = (OnigStackIndex* )alloca_base;
1273 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
1274 mem_end_stk = mem_start_stk + num_mem;
1275 mem_start_stk--; /* for index start from 1,
1276 mem_start_stk[1]..mem_start_stk[num_mem] */
1277 mem_end_stk--; /* for index start from 1,
1278 mem_end_stk[1]..mem_end_stk[num_mem] */
1279 for (i = 1; i <= num_mem; i++) {
1280 mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
1283 #ifdef ONIG_DEBUG_MATCH
1284 fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n",
1285 (int )str, (int )end, (int )sstart, (int )sprev);
1286 fprintf(stderr, "size: %d, start offset: %d\n",
1287 (int )(end - str), (int )(sstart - str));
1288 #endif
1290 STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */
1291 best_len = ONIG_MISMATCH;
1292 s = (UChar* )sstart;
1293 while (1) {
1294 #ifdef ONIG_DEBUG_MATCH
1296 UChar *q, *bp, buf[50];
1297 int len;
1298 fprintf(stderr, "%4d> \"", (int )(s - str));
1299 bp = buf;
1300 for (i = 0, q = s; i < 7 && q < end; i++) {
1301 len = enclen(encode, q);
1302 while (len-- > 0) *bp++ = *q++;
1304 if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
1305 else { xmemcpy(bp, "\"", 1); bp += 1; }
1306 *bp = 0;
1307 fputs((char* )buf, stderr);
1308 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
1309 onig_print_compiled_byte_code(stderr, p, NULL, encode);
1310 fprintf(stderr, "\n");
1312 #endif
1314 sbegin = s;
1315 switch (*p++) {
1316 case OP_END: MOP_IN(OP_END);
1317 n = s - sstart;
1318 if (n > best_len) {
1319 OnigRegion* region;
1320 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1321 if (IS_FIND_LONGEST(option)) {
1322 if (n > msa->best_len) {
1323 msa->best_len = n;
1324 msa->best_s = (UChar* )sstart;
1326 else
1327 goto end_best_len;
1329 #endif
1330 best_len = n;
1331 region = msa->region;
1332 if (region) {
1333 #ifdef USE_POSIX_API_REGION_OPTION
1334 if (IS_POSIX_REGION(msa->options)) {
1335 posix_regmatch_t* rmt = (posix_regmatch_t* )region;
1337 rmt[0].rm_so = sstart - str;
1338 rmt[0].rm_eo = s - str;
1339 for (i = 1; i <= num_mem; i++) {
1340 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1341 if (BIT_STATUS_AT(reg->bt_mem_start, i))
1342 rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
1343 else
1344 rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str;
1346 rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i)
1347 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1348 : (UChar* )((void* )mem_end_stk[i])) - str;
1350 else {
1351 rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
1355 else {
1356 #endif /* USE_POSIX_API_REGION_OPTION */
1357 region->beg[0] = sstart - str;
1358 region->end[0] = s - str;
1359 for (i = 1; i <= num_mem; i++) {
1360 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1361 if (BIT_STATUS_AT(reg->bt_mem_start, i))
1362 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
1363 else
1364 region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
1366 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
1367 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1368 : (UChar* )((void* )mem_end_stk[i])) - str;
1370 else {
1371 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
1375 #ifdef USE_CAPTURE_HISTORY
1376 if (reg->capture_history != 0) {
1377 int r;
1378 OnigCaptureTreeNode* node;
1380 if (IS_NULL(region->history_root)) {
1381 region->history_root = node = history_node_new();
1382 CHECK_NULL_RETURN_MEMERR(node);
1384 else {
1385 node = region->history_root;
1386 history_tree_clear(node);
1389 node->group = 0;
1390 node->beg = sstart - str;
1391 node->end = s - str;
1393 stkp = stk_base;
1394 r = make_capture_history_tree(region->history_root, &stkp,
1395 stk, (UChar* )str, reg);
1396 if (r < 0) {
1397 best_len = r; /* error code */
1398 goto finish;
1401 #endif /* USE_CAPTURE_HISTORY */
1402 #ifdef USE_POSIX_API_REGION_OPTION
1403 } /* else IS_POSIX_REGION() */
1404 #endif
1405 } /* if (region) */
1406 } /* n > best_len */
1408 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1409 end_best_len:
1410 #endif
1411 MOP_OUT;
1413 if (IS_FIND_CONDITION(option)) {
1414 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
1415 best_len = ONIG_MISMATCH;
1416 goto fail; /* for retry */
1418 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
1419 goto fail; /* for retry */
1423 /* default behavior: return first-matching result. */
1424 goto finish;
1425 break;
1427 case OP_EXACT1: MOP_IN(OP_EXACT1);
1428 #if 0
1429 DATA_ENSURE(1);
1430 if (*p != *s) goto fail;
1431 p++; s++;
1432 #endif
1433 if (*p != *s++) goto fail;
1434 DATA_ENSURE(0);
1435 p++;
1436 MOP_OUT;
1437 break;
1439 case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC);
1441 int len;
1442 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1444 DATA_ENSURE(1);
1445 len = ONIGENC_MBC_CASE_FOLD(encode,
1446 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1447 case_fold_flag,
1448 &s, end, lowbuf);
1449 DATA_ENSURE(0);
1450 q = lowbuf;
1451 while (len-- > 0) {
1452 if (*p != *q) {
1453 goto fail;
1455 p++; q++;
1458 MOP_OUT;
1459 break;
1461 case OP_EXACT2: MOP_IN(OP_EXACT2);
1462 DATA_ENSURE(2);
1463 if (*p != *s) goto fail;
1464 p++; s++;
1465 if (*p != *s) goto fail;
1466 sprev = s;
1467 p++; s++;
1468 MOP_OUT;
1469 continue;
1470 break;
1472 case OP_EXACT3: MOP_IN(OP_EXACT3);
1473 DATA_ENSURE(3);
1474 if (*p != *s) goto fail;
1475 p++; s++;
1476 if (*p != *s) goto fail;
1477 p++; s++;
1478 if (*p != *s) goto fail;
1479 sprev = s;
1480 p++; s++;
1481 MOP_OUT;
1482 continue;
1483 break;
1485 case OP_EXACT4: MOP_IN(OP_EXACT4);
1486 DATA_ENSURE(4);
1487 if (*p != *s) goto fail;
1488 p++; s++;
1489 if (*p != *s) goto fail;
1490 p++; s++;
1491 if (*p != *s) goto fail;
1492 p++; s++;
1493 if (*p != *s) goto fail;
1494 sprev = s;
1495 p++; s++;
1496 MOP_OUT;
1497 continue;
1498 break;
1500 case OP_EXACT5: MOP_IN(OP_EXACT5);
1501 DATA_ENSURE(5);
1502 if (*p != *s) goto fail;
1503 p++; s++;
1504 if (*p != *s) goto fail;
1505 p++; s++;
1506 if (*p != *s) goto fail;
1507 p++; s++;
1508 if (*p != *s) goto fail;
1509 p++; s++;
1510 if (*p != *s) goto fail;
1511 sprev = s;
1512 p++; s++;
1513 MOP_OUT;
1514 continue;
1515 break;
1517 case OP_EXACTN: MOP_IN(OP_EXACTN);
1518 GET_LENGTH_INC(tlen, p);
1519 DATA_ENSURE(tlen);
1520 while (tlen-- > 0) {
1521 if (*p++ != *s++) goto fail;
1523 sprev = s - 1;
1524 MOP_OUT;
1525 continue;
1526 break;
1528 case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC);
1530 int len;
1531 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1533 GET_LENGTH_INC(tlen, p);
1534 endp = p + tlen;
1536 while (p < endp) {
1537 sprev = s;
1538 DATA_ENSURE(1);
1539 len = ONIGENC_MBC_CASE_FOLD(encode,
1540 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1541 case_fold_flag,
1542 &s, end, lowbuf);
1543 DATA_ENSURE(0);
1544 q = lowbuf;
1545 while (len-- > 0) {
1546 if (*p != *q) goto fail;
1547 p++; q++;
1552 MOP_OUT;
1553 continue;
1554 break;
1556 case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1);
1557 DATA_ENSURE(2);
1558 if (*p != *s) goto fail;
1559 p++; s++;
1560 if (*p != *s) goto fail;
1561 p++; s++;
1562 MOP_OUT;
1563 break;
1565 case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2);
1566 DATA_ENSURE(4);
1567 if (*p != *s) goto fail;
1568 p++; s++;
1569 if (*p != *s) goto fail;
1570 p++; s++;
1571 sprev = s;
1572 if (*p != *s) goto fail;
1573 p++; s++;
1574 if (*p != *s) goto fail;
1575 p++; s++;
1576 MOP_OUT;
1577 continue;
1578 break;
1580 case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3);
1581 DATA_ENSURE(6);
1582 if (*p != *s) goto fail;
1583 p++; s++;
1584 if (*p != *s) goto fail;
1585 p++; s++;
1586 if (*p != *s) goto fail;
1587 p++; s++;
1588 if (*p != *s) goto fail;
1589 p++; s++;
1590 sprev = s;
1591 if (*p != *s) goto fail;
1592 p++; s++;
1593 if (*p != *s) goto fail;
1594 p++; s++;
1595 MOP_OUT;
1596 continue;
1597 break;
1599 case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N);
1600 GET_LENGTH_INC(tlen, p);
1601 DATA_ENSURE(tlen * 2);
1602 while (tlen-- > 0) {
1603 if (*p != *s) goto fail;
1604 p++; s++;
1605 if (*p != *s) goto fail;
1606 p++; s++;
1608 sprev = s - 2;
1609 MOP_OUT;
1610 continue;
1611 break;
1613 case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N);
1614 GET_LENGTH_INC(tlen, p);
1615 DATA_ENSURE(tlen * 3);
1616 while (tlen-- > 0) {
1617 if (*p != *s) goto fail;
1618 p++; s++;
1619 if (*p != *s) goto fail;
1620 p++; s++;
1621 if (*p != *s) goto fail;
1622 p++; s++;
1624 sprev = s - 3;
1625 MOP_OUT;
1626 continue;
1627 break;
1629 case OP_EXACTMBN: MOP_IN(OP_EXACTMBN);
1630 GET_LENGTH_INC(tlen, p); /* mb-len */
1631 GET_LENGTH_INC(tlen2, p); /* string len */
1632 tlen2 *= tlen;
1633 DATA_ENSURE(tlen2);
1634 while (tlen2-- > 0) {
1635 if (*p != *s) goto fail;
1636 p++; s++;
1638 sprev = s - tlen;
1639 MOP_OUT;
1640 continue;
1641 break;
1643 case OP_CCLASS: MOP_IN(OP_CCLASS);
1644 DATA_ENSURE(1);
1645 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
1646 p += SIZE_BITSET;
1647 s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */
1648 MOP_OUT;
1649 break;
1651 case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB);
1652 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail;
1654 cclass_mb:
1655 GET_LENGTH_INC(tlen, p);
1657 OnigCodePoint code;
1658 UChar *ss;
1659 int mb_len;
1661 DATA_ENSURE(1);
1662 mb_len = enclen(encode, s, end);
1663 DATA_ENSURE(mb_len);
1664 ss = s;
1665 s += mb_len;
1666 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1668 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1669 if (! onig_is_in_code_range(p, code)) goto fail;
1670 #else
1671 q = p;
1672 ALIGNMENT_RIGHT(q);
1673 if (! onig_is_in_code_range(q, code)) goto fail;
1674 #endif
1676 p += tlen;
1677 MOP_OUT;
1678 break;
1680 case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX);
1681 DATA_ENSURE(1);
1682 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
1683 p += SIZE_BITSET;
1684 goto cclass_mb;
1686 else {
1687 if (BITSET_AT(((BitSetRef )p), *s) == 0)
1688 goto fail;
1690 p += SIZE_BITSET;
1691 GET_LENGTH_INC(tlen, p);
1692 p += tlen;
1693 s++;
1695 MOP_OUT;
1696 break;
1698 case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT);
1699 DATA_ENSURE(1);
1700 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
1701 p += SIZE_BITSET;
1702 s += enclen(encode, s, end);
1703 MOP_OUT;
1704 break;
1706 case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT);
1707 DATA_ENSURE(1);
1708 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
1709 s++;
1710 GET_LENGTH_INC(tlen, p);
1711 p += tlen;
1712 goto cc_mb_not_success;
1715 cclass_mb_not:
1716 GET_LENGTH_INC(tlen, p);
1718 OnigCodePoint code;
1719 UChar *ss;
1720 int mb_len = enclen(encode, s, end);
1722 if (! DATA_ENSURE_CHECK(mb_len)) {
1723 DATA_ENSURE(1);
1724 s = (UChar* )end;
1725 p += tlen;
1726 goto cc_mb_not_success;
1729 ss = s;
1730 s += mb_len;
1731 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1733 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1734 if (onig_is_in_code_range(p, code)) goto fail;
1735 #else
1736 q = p;
1737 ALIGNMENT_RIGHT(q);
1738 if (onig_is_in_code_range(q, code)) goto fail;
1739 #endif
1741 p += tlen;
1743 cc_mb_not_success:
1744 MOP_OUT;
1745 break;
1747 case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT);
1748 DATA_ENSURE(1);
1749 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
1750 p += SIZE_BITSET;
1751 goto cclass_mb_not;
1753 else {
1754 if (BITSET_AT(((BitSetRef )p), *s) != 0)
1755 goto fail;
1757 p += SIZE_BITSET;
1758 GET_LENGTH_INC(tlen, p);
1759 p += tlen;
1760 s++;
1762 MOP_OUT;
1763 break;
1765 case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE);
1767 OnigCodePoint code;
1768 void *node;
1769 int mb_len;
1770 UChar *ss;
1772 DATA_ENSURE(1);
1773 GET_POINTER_INC(node, p);
1774 mb_len = enclen(encode, s, end);
1775 ss = s;
1776 s += mb_len;
1777 DATA_ENSURE(0);
1778 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1779 if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail;
1781 MOP_OUT;
1782 break;
1784 case OP_ANYCHAR: MOP_IN(OP_ANYCHAR);
1785 DATA_ENSURE(1);
1786 n = enclen(encode, s, end);
1787 DATA_ENSURE(n);
1788 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1789 s += n;
1790 MOP_OUT;
1791 break;
1793 case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML);
1794 DATA_ENSURE(1);
1795 n = enclen(encode, s, end);
1796 DATA_ENSURE(n);
1797 s += n;
1798 MOP_OUT;
1799 break;
1801 case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR);
1802 while (DATA_ENSURE_CHECK1) {
1803 STACK_PUSH_ALT(p, s, sprev);
1804 n = enclen(encode, s, end);
1805 DATA_ENSURE(n);
1806 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1807 sprev = s;
1808 s += n;
1810 MOP_OUT;
1811 break;
1813 case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR);
1814 while (DATA_ENSURE_CHECK1) {
1815 STACK_PUSH_ALT(p, s, sprev);
1816 n = enclen(encode, s, end);
1817 if (n > 1) {
1818 DATA_ENSURE(n);
1819 sprev = s;
1820 s += n;
1822 else {
1823 sprev = s;
1824 s++;
1827 MOP_OUT;
1828 break;
1830 case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
1831 while (DATA_ENSURE_CHECK1) {
1832 if (*p == *s) {
1833 STACK_PUSH_ALT(p + 1, s, sprev);
1835 n = enclen(encode, s, end);
1836 DATA_ENSURE(n);
1837 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1838 sprev = s;
1839 s += n;
1841 p++;
1842 MOP_OUT;
1843 break;
1845 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1846 while (DATA_ENSURE_CHECK1) {
1847 if (*p == *s) {
1848 STACK_PUSH_ALT(p + 1, s, sprev);
1850 n = enclen(encode, s, end);
1851 if (n > 1) {
1852 DATA_ENSURE(n);
1853 sprev = s;
1854 s += n;
1856 else {
1857 sprev = s;
1858 s++;
1861 p++;
1862 MOP_OUT;
1863 break;
1865 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1866 case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
1867 GET_STATE_CHECK_NUM_INC(mem, p);
1868 while (DATA_ENSURE_CHECK1) {
1869 STATE_CHECK_VAL(scv, mem);
1870 if (scv) goto fail;
1872 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
1873 n = enclen(encode, s);
1874 DATA_ENSURE(n);
1875 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1876 sprev = s;
1877 s += n;
1879 MOP_OUT;
1880 break;
1882 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
1883 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
1885 GET_STATE_CHECK_NUM_INC(mem, p);
1886 while (DATA_ENSURE_CHECK1) {
1887 STATE_CHECK_VAL(scv, mem);
1888 if (scv) goto fail;
1890 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
1891 n = enclen(encode, s);
1892 if (n > 1) {
1893 DATA_ENSURE(n);
1894 sprev = s;
1895 s += n;
1897 else {
1898 sprev = s;
1899 s++;
1902 MOP_OUT;
1903 break;
1904 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1906 case OP_WORD: MOP_IN(OP_WORD);
1907 DATA_ENSURE(1);
1908 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
1909 goto fail;
1911 s += enclen(encode, s, end);
1912 MOP_OUT;
1913 break;
1915 case OP_NOT_WORD: MOP_IN(OP_NOT_WORD);
1916 DATA_ENSURE(1);
1917 if (ONIGENC_IS_MBC_WORD(encode, s, end))
1918 goto fail;
1920 s += enclen(encode, s, end);
1921 MOP_OUT;
1922 break;
1924 case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND);
1925 if (ON_STR_BEGIN(s)) {
1926 DATA_ENSURE(1);
1927 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
1928 goto fail;
1930 else if (ON_STR_END(s)) {
1931 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
1932 goto fail;
1934 else {
1935 if (ONIGENC_IS_MBC_WORD(encode, s, end)
1936 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
1937 goto fail;
1939 MOP_OUT;
1940 continue;
1941 break;
1943 case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND);
1944 if (ON_STR_BEGIN(s)) {
1945 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
1946 goto fail;
1948 else if (ON_STR_END(s)) {
1949 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
1950 goto fail;
1952 else {
1953 if (ONIGENC_IS_MBC_WORD(encode, s, end)
1954 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
1955 goto fail;
1957 MOP_OUT;
1958 continue;
1959 break;
1961 #ifdef USE_WORD_BEGIN_END
1962 case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN);
1963 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
1964 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
1965 MOP_OUT;
1966 continue;
1969 goto fail;
1970 break;
1972 case OP_WORD_END: MOP_IN(OP_WORD_END);
1973 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
1974 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
1975 MOP_OUT;
1976 continue;
1979 goto fail;
1980 break;
1981 #endif
1983 case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF);
1984 if (! ON_STR_BEGIN(s)) goto fail;
1986 MOP_OUT;
1987 continue;
1988 break;
1990 case OP_END_BUF: MOP_IN(OP_END_BUF);
1991 if (! ON_STR_END(s)) goto fail;
1993 MOP_OUT;
1994 continue;
1995 break;
1997 case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE);
1998 if (ON_STR_BEGIN(s)) {
1999 if (IS_NOTBOL(msa->options)) goto fail;
2000 MOP_OUT;
2001 continue;
2003 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
2004 MOP_OUT;
2005 continue;
2007 goto fail;
2008 break;
2010 case OP_END_LINE: MOP_IN(OP_END_LINE);
2011 if (ON_STR_END(s)) {
2012 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2013 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
2014 #endif
2015 if (IS_NOTEOL(msa->options)) goto fail;
2016 MOP_OUT;
2017 continue;
2018 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2020 #endif
2022 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
2023 MOP_OUT;
2024 continue;
2026 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2027 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2028 MOP_OUT;
2029 continue;
2031 #endif
2032 goto fail;
2033 break;
2035 case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF);
2036 if (ON_STR_END(s)) {
2037 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2038 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
2039 #endif
2040 if (IS_NOTEOL(msa->options)) goto fail;
2041 MOP_OUT;
2042 continue;
2043 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2045 #endif
2047 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
2048 ON_STR_END(s + enclen(encode, s, end))) {
2049 MOP_OUT;
2050 continue;
2052 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2053 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2054 UChar* ss = s + enclen(encode, s);
2055 ss += enclen(encode, ss);
2056 if (ON_STR_END(ss)) {
2057 MOP_OUT;
2058 continue;
2061 #endif
2062 goto fail;
2063 break;
2065 case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION);
2066 if (s != msa->start)
2067 goto fail;
2069 MOP_OUT;
2070 continue;
2071 break;
2073 case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH);
2074 GET_MEMNUM_INC(mem, p);
2075 STACK_PUSH_MEM_START(mem, s);
2076 MOP_OUT;
2077 continue;
2078 break;
2080 case OP_MEMORY_START: MOP_IN(OP_MEMORY_START);
2081 GET_MEMNUM_INC(mem, p);
2082 mem_start_stk[mem] = (OnigStackIndex )((void* )s);
2083 MOP_OUT;
2084 continue;
2085 break;
2087 case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH);
2088 GET_MEMNUM_INC(mem, p);
2089 STACK_PUSH_MEM_END(mem, s);
2090 MOP_OUT;
2091 continue;
2092 break;
2094 case OP_MEMORY_END: MOP_IN(OP_MEMORY_END);
2095 GET_MEMNUM_INC(mem, p);
2096 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2097 MOP_OUT;
2098 continue;
2099 break;
2101 #ifdef USE_SUBEXP_CALL
2102 case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC);
2103 GET_MEMNUM_INC(mem, p);
2104 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
2105 STACK_PUSH_MEM_END(mem, s);
2106 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2107 MOP_OUT;
2108 continue;
2109 break;
2111 case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC);
2112 GET_MEMNUM_INC(mem, p);
2113 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2114 STACK_GET_MEM_START(mem, stkp);
2116 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2117 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2118 else
2119 mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
2121 STACK_PUSH_MEM_END_MARK(mem);
2122 MOP_OUT;
2123 continue;
2124 break;
2125 #endif
2127 case OP_BACKREF1: MOP_IN(OP_BACKREF1);
2128 mem = 1;
2129 goto backref;
2130 break;
2132 case OP_BACKREF2: MOP_IN(OP_BACKREF2);
2133 mem = 2;
2134 goto backref;
2135 break;
2137 case OP_BACKREFN: MOP_IN(OP_BACKREFN);
2138 GET_MEMNUM_INC(mem, p);
2139 backref:
2141 int len;
2142 UChar *pstart, *pend;
2144 /* if you want to remove following line,
2145 you should check in parse and compile time. */
2146 if (mem > num_mem) goto fail;
2147 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2148 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2150 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2151 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2152 else
2153 pstart = (UChar* )((void* )mem_start_stk[mem]);
2155 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2156 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2157 : (UChar* )((void* )mem_end_stk[mem]));
2158 n = pend - pstart;
2159 DATA_ENSURE(n);
2160 sprev = s;
2161 STRING_CMP(pstart, s, n);
2162 while (sprev + (len = enclen(encode, sprev, end)) < s)
2163 sprev += len;
2165 MOP_OUT;
2166 continue;
2168 break;
2170 case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC);
2171 GET_MEMNUM_INC(mem, p);
2173 int len;
2174 UChar *pstart, *pend;
2176 /* if you want to remove following line,
2177 you should check in parse and compile time. */
2178 if (mem > num_mem) goto fail;
2179 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2180 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2182 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2183 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2184 else
2185 pstart = (UChar* )((void* )mem_start_stk[mem]);
2187 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2188 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2189 : (UChar* )((void* )mem_end_stk[mem]));
2190 n = pend - pstart;
2191 DATA_ENSURE(n);
2192 sprev = s;
2193 STRING_CMP_IC(case_fold_flag, pstart, &s, n);
2194 while (sprev + (len = enclen(encode, sprev, end)) < s)
2195 sprev += len;
2197 MOP_OUT;
2198 continue;
2200 break;
2202 case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI);
2204 int len, is_fail;
2205 UChar *pstart, *pend, *swork;
2207 GET_LENGTH_INC(tlen, p);
2208 for (i = 0; i < tlen; i++) {
2209 GET_MEMNUM_INC(mem, p);
2211 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2212 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2214 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2215 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2216 else
2217 pstart = (UChar* )((void* )mem_start_stk[mem]);
2219 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2220 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2221 : (UChar* )((void* )mem_end_stk[mem]));
2222 n = pend - pstart;
2223 DATA_ENSURE(n);
2224 sprev = s;
2225 swork = s;
2226 STRING_CMP_VALUE(pstart, swork, n, is_fail);
2227 if (is_fail) continue;
2228 s = swork;
2229 while (sprev + (len = enclen(encode, sprev, end)) < s)
2230 sprev += len;
2232 p += (SIZE_MEMNUM * (tlen - i - 1));
2233 break; /* success */
2235 if (i == tlen) goto fail;
2236 MOP_OUT;
2237 continue;
2239 break;
2241 case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC);
2243 int len, is_fail;
2244 UChar *pstart, *pend, *swork;
2246 GET_LENGTH_INC(tlen, p);
2247 for (i = 0; i < tlen; i++) {
2248 GET_MEMNUM_INC(mem, p);
2250 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2251 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2253 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2254 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2255 else
2256 pstart = (UChar* )((void* )mem_start_stk[mem]);
2258 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2259 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2260 : (UChar* )((void* )mem_end_stk[mem]));
2261 n = pend - pstart;
2262 DATA_ENSURE(n);
2263 sprev = s;
2264 swork = s;
2265 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
2266 if (is_fail) continue;
2267 s = swork;
2268 while (sprev + (len = enclen(encode, sprev, end)) < s)
2269 sprev += len;
2271 p += (SIZE_MEMNUM * (tlen - i - 1));
2272 break; /* success */
2274 if (i == tlen) goto fail;
2275 MOP_OUT;
2276 continue;
2278 break;
2280 #ifdef USE_BACKREF_WITH_LEVEL
2281 case OP_BACKREF_WITH_LEVEL:
2283 int len;
2284 OnigOptionType ic;
2285 LengthType level;
2287 GET_OPTION_INC(ic, p);
2288 GET_LENGTH_INC(level, p);
2289 GET_LENGTH_INC(tlen, p);
2291 sprev = s;
2292 if (backref_match_at_nested_level(reg, stk, stk_base, ic
2293 , case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
2294 while (sprev + (len = enclen(encode, sprev, end)) < s)
2295 sprev += len;
2297 p += (SIZE_MEMNUM * tlen);
2299 else
2300 goto fail;
2302 MOP_OUT;
2303 continue;
2306 break;
2307 #endif
2309 #if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
2310 case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH);
2311 GET_OPTION_INC(option, p);
2312 STACK_PUSH_ALT(p, s, sprev);
2313 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
2314 MOP_OUT;
2315 continue;
2316 break;
2318 case OP_SET_OPTION: MOP_IN(OP_SET_OPTION);
2319 GET_OPTION_INC(option, p);
2320 MOP_OUT;
2321 continue;
2322 break;
2323 #endif
2325 case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START);
2326 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2327 STACK_PUSH_NULL_CHECK_START(mem, s);
2328 MOP_OUT;
2329 continue;
2330 break;
2332 case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END);
2334 int isnull;
2336 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2337 STACK_NULL_CHECK(isnull, mem, s);
2338 if (isnull) {
2339 #ifdef ONIG_DEBUG_MATCH
2340 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n",
2341 (int )mem, (int )s);
2342 #endif
2343 null_check_found:
2344 /* empty loop founded, skip next instruction */
2345 switch (*p++) {
2346 case OP_JUMP:
2347 case OP_PUSH:
2348 p += SIZE_RELADDR;
2349 break;
2350 case OP_REPEAT_INC:
2351 case OP_REPEAT_INC_NG:
2352 case OP_REPEAT_INC_SG:
2353 case OP_REPEAT_INC_NG_SG:
2354 p += SIZE_MEMNUM;
2355 break;
2356 default:
2357 goto unexpected_bytecode_error;
2358 break;
2362 MOP_OUT;
2363 continue;
2364 break;
2366 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2367 case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST);
2369 int isnull;
2371 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2372 STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
2373 if (isnull) {
2374 #ifdef ONIG_DEBUG_MATCH
2375 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n",
2376 (int )mem, (int )s);
2377 #endif
2378 if (isnull == -1) goto fail;
2379 goto null_check_found;
2382 MOP_OUT;
2383 continue;
2384 break;
2385 #endif
2387 #ifdef USE_SUBEXP_CALL
2388 case OP_NULL_CHECK_END_MEMST_PUSH:
2389 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
2391 int isnull;
2393 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2394 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2395 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
2396 #else
2397 STACK_NULL_CHECK_REC(isnull, mem, s);
2398 #endif
2399 if (isnull) {
2400 #ifdef ONIG_DEBUG_MATCH
2401 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n",
2402 (int )mem, (int )s);
2403 #endif
2404 if (isnull == -1) goto fail;
2405 goto null_check_found;
2407 else {
2408 STACK_PUSH_NULL_CHECK_END(mem);
2411 MOP_OUT;
2412 continue;
2413 break;
2414 #endif
2416 case OP_JUMP: MOP_IN(OP_JUMP);
2417 GET_RELADDR_INC(addr, p);
2418 p += addr;
2419 MOP_OUT;
2420 CHECK_INTERRUPT_IN_MATCH_AT;
2421 continue;
2422 break;
2424 case OP_PUSH: MOP_IN(OP_PUSH);
2425 GET_RELADDR_INC(addr, p);
2426 STACK_PUSH_ALT(p + addr, s, sprev);
2427 MOP_OUT;
2428 continue;
2429 break;
2431 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2432 case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH);
2433 GET_STATE_CHECK_NUM_INC(mem, p);
2434 STATE_CHECK_VAL(scv, mem);
2435 if (scv) goto fail;
2437 GET_RELADDR_INC(addr, p);
2438 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
2439 MOP_OUT;
2440 continue;
2441 break;
2443 case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
2444 GET_STATE_CHECK_NUM_INC(mem, p);
2445 GET_RELADDR_INC(addr, p);
2446 STATE_CHECK_VAL(scv, mem);
2447 if (scv) {
2448 p += addr;
2450 else {
2451 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
2453 MOP_OUT;
2454 continue;
2455 break;
2457 case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK);
2458 GET_STATE_CHECK_NUM_INC(mem, p);
2459 STATE_CHECK_VAL(scv, mem);
2460 if (scv) goto fail;
2462 STACK_PUSH_STATE_CHECK(s, mem);
2463 MOP_OUT;
2464 continue;
2465 break;
2466 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
2468 case OP_POP: MOP_IN(OP_POP);
2469 STACK_POP_ONE;
2470 MOP_OUT;
2471 continue;
2472 break;
2474 case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
2475 GET_RELADDR_INC(addr, p);
2476 if (*p == *s && DATA_ENSURE_CHECK1) {
2477 p++;
2478 STACK_PUSH_ALT(p + addr, s, sprev);
2479 MOP_OUT;
2480 continue;
2482 p += (addr + 1);
2483 MOP_OUT;
2484 continue;
2485 break;
2487 case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT);
2488 GET_RELADDR_INC(addr, p);
2489 if (*p == *s) {
2490 p++;
2491 STACK_PUSH_ALT(p + addr, s, sprev);
2492 MOP_OUT;
2493 continue;
2495 p++;
2496 MOP_OUT;
2497 continue;
2498 break;
2500 case OP_REPEAT: MOP_IN(OP_REPEAT);
2502 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2503 GET_RELADDR_INC(addr, p);
2505 STACK_ENSURE(1);
2506 repeat_stk[mem] = GET_STACK_INDEX(stk);
2507 STACK_PUSH_REPEAT(mem, p);
2509 if (reg->repeat_range[mem].lower == 0) {
2510 STACK_PUSH_ALT(p + addr, s, sprev);
2513 MOP_OUT;
2514 continue;
2515 break;
2517 case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG);
2519 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2520 GET_RELADDR_INC(addr, p);
2522 STACK_ENSURE(1);
2523 repeat_stk[mem] = GET_STACK_INDEX(stk);
2524 STACK_PUSH_REPEAT(mem, p);
2526 if (reg->repeat_range[mem].lower == 0) {
2527 STACK_PUSH_ALT(p, s, sprev);
2528 p += addr;
2531 MOP_OUT;
2532 continue;
2533 break;
2535 case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC);
2536 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2537 si = repeat_stk[mem];
2538 stkp = STACK_AT(si);
2540 repeat_inc:
2541 stkp->u.repeat.count++;
2542 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
2543 /* end of repeat. Nothing to do. */
2545 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2546 STACK_PUSH_ALT(p, s, sprev);
2547 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
2549 else {
2550 p = stkp->u.repeat.pcode;
2552 STACK_PUSH_REPEAT_INC(si);
2553 MOP_OUT;
2554 CHECK_INTERRUPT_IN_MATCH_AT;
2555 continue;
2556 break;
2558 case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG);
2559 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2560 STACK_GET_REPEAT(mem, stkp);
2561 si = GET_STACK_INDEX(stkp);
2562 goto repeat_inc;
2563 break;
2565 case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG);
2566 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2567 si = repeat_stk[mem];
2568 stkp = STACK_AT(si);
2570 repeat_inc_ng:
2571 stkp->u.repeat.count++;
2572 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
2573 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2574 UChar* pcode = stkp->u.repeat.pcode;
2576 STACK_PUSH_REPEAT_INC(si);
2577 STACK_PUSH_ALT(pcode, s, sprev);
2579 else {
2580 p = stkp->u.repeat.pcode;
2581 STACK_PUSH_REPEAT_INC(si);
2584 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
2585 STACK_PUSH_REPEAT_INC(si);
2587 MOP_OUT;
2588 CHECK_INTERRUPT_IN_MATCH_AT;
2589 continue;
2590 break;
2592 case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG);
2593 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2594 STACK_GET_REPEAT(mem, stkp);
2595 si = GET_STACK_INDEX(stkp);
2596 goto repeat_inc_ng;
2597 break;
2599 case OP_PUSH_POS: MOP_IN(OP_PUSH_POS);
2600 STACK_PUSH_POS(s, sprev);
2601 MOP_OUT;
2602 continue;
2603 break;
2605 case OP_POP_POS: MOP_IN(OP_POP_POS);
2607 STACK_POS_END(stkp);
2608 s = stkp->u.state.pstr;
2609 sprev = stkp->u.state.pstr_prev;
2611 MOP_OUT;
2612 continue;
2613 break;
2615 case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT);
2616 GET_RELADDR_INC(addr, p);
2617 STACK_PUSH_POS_NOT(p + addr, s, sprev);
2618 MOP_OUT;
2619 continue;
2620 break;
2622 case OP_FAIL_POS: MOP_IN(OP_FAIL_POS);
2623 STACK_POP_TIL_POS_NOT;
2624 goto fail;
2625 break;
2627 case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT);
2628 STACK_PUSH_STOP_BT;
2629 MOP_OUT;
2630 continue;
2631 break;
2633 case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT);
2634 STACK_STOP_BT_END;
2635 MOP_OUT;
2636 continue;
2637 break;
2639 case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND);
2640 GET_LENGTH_INC(tlen, p);
2641 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
2642 if (IS_NULL(s)) goto fail;
2643 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
2644 MOP_OUT;
2645 continue;
2646 break;
2648 case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
2649 GET_RELADDR_INC(addr, p);
2650 GET_LENGTH_INC(tlen, p);
2651 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
2652 if (IS_NULL(q)) {
2653 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
2654 If you want to change to fail, replace following line. */
2655 p += addr;
2656 /* goto fail; */
2658 else {
2659 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev);
2660 s = q;
2661 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
2663 MOP_OUT;
2664 continue;
2665 break;
2667 case OP_FAIL_LOOK_BEHIND_NOT: MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
2668 STACK_POP_TIL_LOOK_BEHIND_NOT;
2669 goto fail;
2670 break;
2672 #ifdef USE_SUBEXP_CALL
2673 case OP_CALL: MOP_IN(OP_CALL);
2674 GET_ABSADDR_INC(addr, p);
2675 STACK_PUSH_CALL_FRAME(p);
2676 p = reg->p + addr;
2677 MOP_OUT;
2678 continue;
2679 break;
2681 case OP_RETURN: MOP_IN(OP_RETURN);
2682 STACK_RETURN(p);
2683 STACK_PUSH_RETURN;
2684 MOP_OUT;
2685 continue;
2686 break;
2687 #endif
2689 case OP_FINISH:
2690 goto finish;
2691 break;
2693 fail:
2694 MOP_OUT;
2695 /* fall */
2696 case OP_FAIL: MOP_IN(OP_FAIL);
2697 STACK_POP;
2698 p = stk->u.state.pcode;
2699 s = stk->u.state.pstr;
2700 sprev = stk->u.state.pstr_prev;
2702 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2703 if (stk->u.state.state_check != 0) {
2704 stk->type = STK_STATE_CHECK_MARK;
2705 stk++;
2707 #endif
2709 MOP_OUT;
2710 continue;
2711 break;
2713 default:
2714 goto bytecode_error;
2716 } /* end of switch */
2717 sprev = sbegin;
2718 } /* end of while(1) */
2720 finish:
2721 STACK_SAVE;
2722 return best_len;
2724 #ifdef ONIG_DEBUG
2725 stack_error:
2726 STACK_SAVE;
2727 return ONIGERR_STACK_BUG;
2728 #endif
2730 bytecode_error:
2731 STACK_SAVE;
2732 return ONIGERR_UNDEFINED_BYTECODE;
2734 unexpected_bytecode_error:
2735 STACK_SAVE;
2736 return ONIGERR_UNEXPECTED_BYTECODE;
2740 static UChar*
2741 slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
2742 const UChar* text, const UChar* text_end, UChar* text_range)
2744 UChar *t, *p, *s, *end;
2746 end = (UChar* )text_end;
2747 end -= target_end - target - 1;
2748 if (end > text_range)
2749 end = text_range;
2751 s = (UChar* )text;
2753 while (s < end) {
2754 if (*s == *target) {
2755 p = s + 1;
2756 t = target + 1;
2757 while (t < target_end) {
2758 if (*t != *p++)
2759 break;
2760 t++;
2762 if (t == target_end)
2763 return s;
2765 s += enclen(enc, s, end);
2768 return (UChar* )NULL;
2771 static int
2772 str_lower_case_match(OnigEncoding enc, int case_fold_flag,
2773 const UChar* t, const UChar* tend,
2774 const UChar* p, const UChar* end)
2776 int lowlen;
2777 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2779 while (t < tend) {
2780 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
2781 q = lowbuf;
2782 while (lowlen > 0) {
2783 if (*t++ != *q++) return 0;
2784 lowlen--;
2788 return 1;
2791 static UChar*
2792 slow_search_ic(OnigEncoding enc, int case_fold_flag,
2793 UChar* target, UChar* target_end,
2794 const UChar* text, const UChar* text_end, UChar* text_range)
2796 UChar *s, *end;
2798 end = (UChar* )text_end;
2799 end -= target_end - target - 1;
2800 if (end > text_range)
2801 end = text_range;
2803 s = (UChar* )text;
2805 while (s < end) {
2806 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
2807 s, text_end))
2808 return s;
2810 s += enclen(enc, s, text_end);
2813 return (UChar* )NULL;
2816 static UChar*
2817 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
2818 const UChar* text, const UChar* adjust_text,
2819 const UChar* text_end, const UChar* text_start)
2821 UChar *t, *p, *s;
2823 s = (UChar* )text_end;
2824 s -= (target_end - target);
2825 if (s > text_start)
2826 s = (UChar* )text_start;
2827 else
2828 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
2830 while (s >= text) {
2831 if (*s == *target) {
2832 p = s + 1;
2833 t = target + 1;
2834 while (t < target_end) {
2835 if (*t != *p++)
2836 break;
2837 t++;
2839 if (t == target_end)
2840 return s;
2842 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
2845 return (UChar* )NULL;
2848 static UChar*
2849 slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
2850 UChar* target, UChar* target_end,
2851 const UChar* text, const UChar* adjust_text,
2852 const UChar* text_end, const UChar* text_start)
2854 UChar *s;
2856 s = (UChar* )text_end;
2857 s -= (target_end - target);
2858 if (s > text_start)
2859 s = (UChar* )text_start;
2860 else
2861 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
2863 while (s >= text) {
2864 if (str_lower_case_match(enc, case_fold_flag,
2865 target, target_end, s, text_end))
2866 return s;
2868 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
2871 return (UChar* )NULL;
2874 static UChar*
2875 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
2876 const UChar* text, const UChar* text_end,
2877 const UChar* text_range)
2879 const UChar *s, *se, *t, *p, *end;
2880 const UChar *tail;
2881 int skip, tlen1;
2883 #ifdef ONIG_DEBUG_SEARCH
2884 fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
2885 (int )text, (int )text_end, (int )text_range);
2886 #endif
2888 tail = target_end - 1;
2889 tlen1 = tail - target;
2890 end = text_range;
2891 if (end + tlen1 > text_end)
2892 end = text_end - tlen1;
2894 s = text;
2896 if (IS_NULL(reg->int_map)) {
2897 while (s < end) {
2898 p = se = s + tlen1;
2899 t = tail;
2900 while (t >= target && *p == *t) {
2901 p--; t--;
2903 if (t < target) return (UChar* )s;
2905 skip = reg->map[*se];
2906 t = s;
2907 do {
2908 s += enclen(reg->enc, s, end);
2909 } while ((s - t) < skip && s < end);
2912 else {
2913 while (s < end) {
2914 p = se = s + tlen1;
2915 t = tail;
2916 while (t >= target && *p == *t) {
2917 p--; t--;
2919 if (t < target) return (UChar* )s;
2921 skip = reg->int_map[*se];
2922 t = s;
2923 do {
2924 s += enclen(reg->enc, s, end);
2925 } while ((s - t) < skip && s < end);
2929 return (UChar* )NULL;
2932 static UChar*
2933 bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
2934 const UChar* text, const UChar* text_end, const UChar* text_range)
2936 const UChar *s, *t, *p, *end;
2937 const UChar *tail;
2939 end = text_range + (target_end - target) - 1;
2940 if (end > text_end)
2941 end = text_end;
2943 tail = target_end - 1;
2944 s = text + (target_end - target) - 1;
2945 if (IS_NULL(reg->int_map)) {
2946 while (s < end) {
2947 p = s;
2948 t = tail;
2949 while (t >= target && *p == *t) {
2950 p--; t--;
2952 if (t < target) return (UChar* )(p + 1);
2953 s += reg->map[*s];
2956 else { /* see int_map[] */
2957 while (s < end) {
2958 p = s;
2959 t = tail;
2960 while (t >= target && *p == *t) {
2961 p--; t--;
2963 if (t < target) return (UChar* )(p + 1);
2964 s += reg->int_map[*s];
2967 return (UChar* )NULL;
2970 static int
2971 set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
2972 int** skip)
2975 int i, len;
2977 if (IS_NULL(*skip)) {
2978 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
2979 if (IS_NULL(*skip)) return ONIGERR_MEMORY;
2982 len = end - s;
2983 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
2984 (*skip)[i] = len;
2986 for (i = len - 1; i > 0; i--)
2987 (*skip)[s[i]] = i;
2989 return 0;
2992 static UChar*
2993 bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
2994 const UChar* text, const UChar* adjust_text,
2995 const UChar* text_end, const UChar* text_start)
2997 const UChar *s, *t, *p;
2999 s = text_end - (target_end - target);
3000 if (text_start < s)
3001 s = text_start;
3002 else
3003 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
3005 while (s >= text) {
3006 p = s;
3007 t = target;
3008 while (t < target_end && *p == *t) {
3009 p++; t++;
3011 if (t == target_end)
3012 return (UChar* )s;
3014 s -= reg->int_map_backward[*s];
3015 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
3018 return (UChar* )NULL;
3021 static UChar*
3022 map_search(OnigEncoding enc, UChar map[],
3023 const UChar* text, const UChar* text_range)
3025 const UChar *s = text;
3027 while (s < text_range) {
3028 if (map[*s]) return (UChar* )s;
3030 s += enclen(enc, s, text_range);
3032 return (UChar* )NULL;
3035 static UChar*
3036 map_search_backward(OnigEncoding enc, UChar map[],
3037 const UChar* text, const UChar* adjust_text,
3038 const UChar* text_start)
3040 const UChar *s = text_start;
3042 while (s >= text) {
3043 if (map[*s]) return (UChar* )s;
3045 s = onigenc_get_prev_char_head(enc, adjust_text, s);
3047 return (UChar* )NULL;
3050 extern int
3051 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
3052 OnigOptionType option)
3054 int r;
3055 UChar *prev;
3056 OnigMatchArg msa;
3058 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
3059 start:
3060 THREAD_ATOMIC_START;
3061 if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
3062 ONIG_STATE_INC(reg);
3063 if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
3064 onig_chain_reduce(reg);
3065 ONIG_STATE_INC(reg);
3068 else {
3069 int n;
3071 THREAD_ATOMIC_END;
3072 n = 0;
3073 while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
3074 if (++n > THREAD_PASS_LIMIT_COUNT)
3075 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
3076 THREAD_PASS;
3078 goto start;
3080 THREAD_ATOMIC_END;
3081 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
3083 MATCH_ARG_INIT(msa, option, region, at);
3084 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3086 int offset = at - str;
3087 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3089 #endif
3091 if (region
3092 #ifdef USE_POSIX_API_REGION_OPTION
3093 && !IS_POSIX_REGION(option)
3094 #endif
3096 r = onig_region_resize_clear(region, reg->num_mem + 1);
3098 else
3099 r = 0;
3101 if (r == 0) {
3102 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
3103 r = match_at(reg, str, end,
3104 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3105 end,
3106 #endif
3107 at, prev, &msa);
3110 MATCH_ARG_FREE(msa);
3111 ONIG_STATE_DEC_THREAD(reg);
3112 return r;
3115 static int
3116 forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
3117 UChar* range, UChar** low, UChar** high, UChar** low_prev)
3119 UChar *p, *pprev = (UChar* )NULL;
3121 #ifdef ONIG_DEBUG_SEARCH
3122 fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n",
3123 (int )str, (int )end, (int )s, (int )range);
3124 #endif
3126 p = s;
3127 if (reg->dmin > 0) {
3128 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
3129 p += reg->dmin;
3131 else {
3132 UChar *q = p + reg->dmin;
3133 while (p < q) p += enclen(reg->enc, p, end);
3137 retry:
3138 switch (reg->optimize) {
3139 case ONIG_OPTIMIZE_EXACT:
3140 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
3141 break;
3142 case ONIG_OPTIMIZE_EXACT_IC:
3143 p = slow_search_ic(reg->enc, reg->case_fold_flag,
3144 reg->exact, reg->exact_end, p, end, range);
3145 break;
3147 case ONIG_OPTIMIZE_EXACT_BM:
3148 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
3149 break;
3151 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3152 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
3153 break;
3155 case ONIG_OPTIMIZE_MAP:
3156 p = map_search(reg->enc, reg->map, p, range);
3157 break;
3160 if (p && p < range) {
3161 if (p - reg->dmin < s) {
3162 retry_gate:
3163 pprev = p;
3164 p += enclen(reg->enc, p, end);
3165 goto retry;
3168 if (reg->sub_anchor) {
3169 UChar* prev;
3171 switch (reg->sub_anchor) {
3172 case ANCHOR_BEGIN_LINE:
3173 if (!ON_STR_BEGIN(p)) {
3174 prev = onigenc_get_prev_char_head(reg->enc,
3175 (pprev ? pprev : str), p);
3176 if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
3177 goto retry_gate;
3179 break;
3181 case ANCHOR_END_LINE:
3182 if (ON_STR_END(p)) {
3183 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3184 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
3185 (pprev ? pprev : str), p);
3186 if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
3187 goto retry_gate;
3188 #endif
3190 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
3191 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3192 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
3193 #endif
3195 goto retry_gate;
3196 break;
3200 if (reg->dmax == 0) {
3201 *low = p;
3202 if (low_prev) {
3203 if (*low > s)
3204 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p);
3205 else
3206 *low_prev = onigenc_get_prev_char_head(reg->enc,
3207 (pprev ? pprev : str), p);
3210 else {
3211 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3212 *low = p - reg->dmax;
3213 if (*low > s) {
3214 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
3215 *low, (const UChar** )low_prev);
3216 if (low_prev && IS_NULL(*low_prev))
3217 *low_prev = onigenc_get_prev_char_head(reg->enc,
3218 (pprev ? pprev : s), *low);
3220 else {
3221 if (low_prev)
3222 *low_prev = onigenc_get_prev_char_head(reg->enc,
3223 (pprev ? pprev : str), *low);
3227 /* no needs to adjust *high, *high is used as range check only */
3228 *high = p - reg->dmin;
3230 #ifdef ONIG_DEBUG_SEARCH
3231 fprintf(stderr,
3232 "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
3233 (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
3234 #endif
3235 return 1; /* success */
3238 return 0; /* fail */
3241 static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc,
3242 int** skip));
3244 #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
3246 static int
3247 backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
3248 UChar* s, const UChar* range, UChar* adjrange,
3249 UChar** low, UChar** high)
3251 int r;
3252 UChar *p;
3254 range += reg->dmin;
3255 p = s;
3257 retry:
3258 switch (reg->optimize) {
3259 case ONIG_OPTIMIZE_EXACT:
3260 exact_method:
3261 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
3262 range, adjrange, end, p);
3263 break;
3265 case ONIG_OPTIMIZE_EXACT_IC:
3266 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
3267 reg->exact, reg->exact_end,
3268 range, adjrange, end, p);
3269 break;
3271 case ONIG_OPTIMIZE_EXACT_BM:
3272 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3273 if (IS_NULL(reg->int_map_backward)) {
3274 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
3275 goto exact_method;
3277 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
3278 &(reg->int_map_backward));
3279 if (r) return r;
3281 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
3282 end, p);
3283 break;
3285 case ONIG_OPTIMIZE_MAP:
3286 p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
3287 break;
3290 if (p) {
3291 if (reg->sub_anchor) {
3292 UChar* prev;
3294 switch (reg->sub_anchor) {
3295 case ANCHOR_BEGIN_LINE:
3296 if (!ON_STR_BEGIN(p)) {
3297 prev = onigenc_get_prev_char_head(reg->enc, str, p);
3298 if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
3299 p = prev;
3300 goto retry;
3303 break;
3305 case ANCHOR_END_LINE:
3306 if (ON_STR_END(p)) {
3307 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3308 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
3309 if (IS_NULL(prev)) goto fail;
3310 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
3311 p = prev;
3312 goto retry;
3314 #endif
3316 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
3317 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3318 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
3319 #endif
3321 p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
3322 if (IS_NULL(p)) goto fail;
3323 goto retry;
3325 break;
3329 /* no needs to adjust *high, *high is used as range check only */
3330 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3331 *low = p - reg->dmax;
3332 *high = p - reg->dmin;
3333 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
3336 #ifdef ONIG_DEBUG_SEARCH
3337 fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
3338 (int )(*low - str), (int )(*high - str));
3339 #endif
3340 return 1; /* success */
3343 fail:
3344 #ifdef ONIG_DEBUG_SEARCH
3345 fprintf(stderr, "backward_search_range: fail.\n");
3346 #endif
3347 return 0; /* fail */
3351 extern int
3352 onig_search(regex_t* reg, const UChar* str, const UChar* end,
3353 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
3355 int r;
3356 UChar *s, *prev;
3357 OnigMatchArg msa;
3358 const UChar *orig_start = start;
3359 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3360 const UChar *orig_range = range;
3361 #endif
3363 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
3364 start:
3365 THREAD_ATOMIC_START;
3366 if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
3367 ONIG_STATE_INC(reg);
3368 if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
3369 onig_chain_reduce(reg);
3370 ONIG_STATE_INC(reg);
3373 else {
3374 int n;
3376 THREAD_ATOMIC_END;
3377 n = 0;
3378 while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
3379 if (++n > THREAD_PASS_LIMIT_COUNT)
3380 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
3381 THREAD_PASS;
3383 goto start;
3385 THREAD_ATOMIC_END;
3386 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
3388 #ifdef ONIG_DEBUG_SEARCH
3389 fprintf(stderr,
3390 "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
3391 (int )str, (int )(end - str), (int )(start - str), (int )(range - str));
3392 #endif
3394 if (region
3395 #ifdef USE_POSIX_API_REGION_OPTION
3396 && !IS_POSIX_REGION(option)
3397 #endif
3399 r = onig_region_resize_clear(region, reg->num_mem + 1);
3400 if (r) goto finish_no_msa;
3403 if (start > end || start < str) goto mismatch_no_msa;
3406 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3407 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3408 #define MATCH_AND_RETURN_CHECK(upper_range) \
3409 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
3410 if (r != ONIG_MISMATCH) {\
3411 if (r >= 0) {\
3412 if (! IS_FIND_LONGEST(reg->options)) {\
3413 goto match;\
3416 else goto finish; /* error */ \
3418 #else
3419 #define MATCH_AND_RETURN_CHECK(upper_range) \
3420 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
3421 if (r != ONIG_MISMATCH) {\
3422 if (r >= 0) {\
3423 goto match;\
3425 else goto finish; /* error */ \
3427 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
3428 #else
3429 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3430 #define MATCH_AND_RETURN_CHECK(none) \
3431 r = match_at(reg, str, end, s, prev, &msa);\
3432 if (r != ONIG_MISMATCH) {\
3433 if (r >= 0) {\
3434 if (! IS_FIND_LONGEST(reg->options)) {\
3435 goto match;\
3438 else goto finish; /* error */ \
3440 #else
3441 #define MATCH_AND_RETURN_CHECK(none) \
3442 r = match_at(reg, str, end, s, prev, &msa);\
3443 if (r != ONIG_MISMATCH) {\
3444 if (r >= 0) {\
3445 goto match;\
3447 else goto finish; /* error */ \
3449 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
3450 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
3453 /* anchor optimize: resume search range */
3454 if (reg->anchor != 0 && str < end) {
3455 UChar *min_semi_end, *max_semi_end;
3457 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
3458 /* search start-position only */
3459 begin_position:
3460 if (range > start)
3461 range = start + 1;
3462 else
3463 range = start;
3465 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
3466 /* search str-position only */
3467 if (range > start) {
3468 if (start != str) goto mismatch_no_msa;
3469 range = str + 1;
3471 else {
3472 if (range <= str) {
3473 start = str;
3474 range = str;
3476 else
3477 goto mismatch_no_msa;
3480 else if (reg->anchor & ANCHOR_END_BUF) {
3481 min_semi_end = max_semi_end = (UChar* )end;
3483 end_buf:
3484 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
3485 goto mismatch_no_msa;
3487 if (range > start) {
3488 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
3489 start = min_semi_end - reg->anchor_dmax;
3490 if (start < end)
3491 start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
3492 else { /* match with empty at end */
3493 start = onigenc_get_prev_char_head(reg->enc, str, end);
3496 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
3497 range = max_semi_end - reg->anchor_dmin + 1;
3500 if (start >= range) goto mismatch_no_msa;
3502 else {
3503 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
3504 range = min_semi_end - reg->anchor_dmax;
3506 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
3507 start = max_semi_end - reg->anchor_dmin;
3508 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
3510 if (range > start) goto mismatch_no_msa;
3513 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
3514 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
3516 max_semi_end = (UChar* )end;
3517 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
3518 min_semi_end = pre_end;
3520 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3521 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
3522 if (IS_NOT_NULL(pre_end) &&
3523 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
3524 min_semi_end = pre_end;
3526 #endif
3527 if (min_semi_end > str && start <= min_semi_end) {
3528 goto end_buf;
3531 else {
3532 min_semi_end = (UChar* )end;
3533 goto end_buf;
3536 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
3537 goto begin_position;
3540 else if (str == end) { /* empty string */
3541 static const UChar* address_for_empty_string = (UChar* )"";
3543 #ifdef ONIG_DEBUG_SEARCH
3544 fprintf(stderr, "onig_search: empty string.\n");
3545 #endif
3547 if (reg->threshold_len == 0) {
3548 start = end = str = address_for_empty_string;
3549 s = (UChar* )start;
3550 prev = (UChar* )NULL;
3552 MATCH_ARG_INIT(msa, option, region, start);
3553 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3554 msa.state_check_buff = (void* )0;
3555 msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
3556 #endif
3557 MATCH_AND_RETURN_CHECK(end);
3558 goto mismatch;
3560 goto mismatch_no_msa;
3563 #ifdef ONIG_DEBUG_SEARCH
3564 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
3565 (int )(end - str), (int )(start - str), (int )(range - str));
3566 #endif
3568 MATCH_ARG_INIT(msa, option, region, orig_start);
3569 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3571 int offset = (MIN(start, range) - str);
3572 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3574 #endif
3576 s = (UChar* )start;
3577 if (range > start) { /* forward search */
3578 if (s > str)
3579 prev = onigenc_get_prev_char_head(reg->enc, str, s);
3580 else
3581 prev = (UChar* )NULL;
3583 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
3584 UChar *sch_range, *low, *high, *low_prev;
3586 sch_range = (UChar* )range;
3587 if (reg->dmax != 0) {
3588 if (reg->dmax == ONIG_INFINITE_DISTANCE)
3589 sch_range = (UChar* )end;
3590 else {
3591 sch_range += reg->dmax;
3592 if (sch_range > end) sch_range = (UChar* )end;
3596 if ((end - start) < reg->threshold_len)
3597 goto mismatch;
3599 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3600 do {
3601 if (! forward_search_range(reg, str, end, s, sch_range,
3602 &low, &high, &low_prev)) goto mismatch;
3603 if (s < low) {
3604 s = low;
3605 prev = low_prev;
3607 while (s <= high) {
3608 MATCH_AND_RETURN_CHECK(orig_range);
3609 prev = s;
3610 s += enclen(reg->enc, s, end);
3612 } while (s < range);
3613 goto mismatch;
3615 else { /* check only. */
3616 if (! forward_search_range(reg, str, end, s, sch_range,
3617 &low, &high, (UChar** )NULL)) goto mismatch;
3619 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
3620 do {
3621 MATCH_AND_RETURN_CHECK(orig_range);
3622 prev = s;
3623 s += enclen(reg->enc, s, end);
3625 while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
3626 prev = s;
3627 s += enclen(reg->enc, s, end);
3629 } while (s < range);
3630 goto mismatch;
3635 do {
3636 MATCH_AND_RETURN_CHECK(orig_range);
3637 prev = s;
3638 s += enclen(reg->enc, s, end);
3639 } while (s < range);
3641 if (s == range) { /* because empty match with /$/. */
3642 MATCH_AND_RETURN_CHECK(orig_range);
3645 else { /* backward search */
3646 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3647 if (orig_start < end)
3648 orig_start += enclen(reg->enc, orig_start, end); /* is upper range */
3649 #endif
3651 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
3652 UChar *low, *high, *adjrange, *sch_start;
3654 if (range < end)
3655 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
3656 else
3657 adjrange = (UChar* )end;
3659 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
3660 (end - range) >= reg->threshold_len) {
3661 do {
3662 sch_start = s + reg->dmax;
3663 if (sch_start > end) sch_start = (UChar* )end;
3664 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
3665 &low, &high) <= 0)
3666 goto mismatch;
3668 if (s > high)
3669 s = high;
3671 while (s >= low) {
3672 prev = onigenc_get_prev_char_head(reg->enc, str, s);
3673 MATCH_AND_RETURN_CHECK(orig_start);
3674 s = prev;
3676 } while (s >= range);
3677 goto mismatch;
3679 else { /* check only. */
3680 if ((end - range) < reg->threshold_len) goto mismatch;
3682 sch_start = s;
3683 if (reg->dmax != 0) {
3684 if (reg->dmax == ONIG_INFINITE_DISTANCE)
3685 sch_start = (UChar* )end;
3686 else {
3687 sch_start += reg->dmax;
3688 if (sch_start > end) sch_start = (UChar* )end;
3689 else
3690 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
3691 start, sch_start);
3694 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
3695 &low, &high) <= 0) goto mismatch;
3699 do {
3700 prev = onigenc_get_prev_char_head(reg->enc, str, s);
3701 MATCH_AND_RETURN_CHECK(orig_start);
3702 s = prev;
3703 } while (s >= range);
3706 mismatch:
3707 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3708 if (IS_FIND_LONGEST(reg->options)) {
3709 if (msa.best_len >= 0) {
3710 s = msa.best_s;
3711 goto match;
3714 #endif
3715 r = ONIG_MISMATCH;
3717 finish:
3718 MATCH_ARG_FREE(msa);
3719 ONIG_STATE_DEC_THREAD(reg);
3721 /* If result is mismatch and no FIND_NOT_EMPTY option,
3722 then the region is not setted in match_at(). */
3723 if (IS_FIND_NOT_EMPTY(reg->options) && region
3724 #ifdef USE_POSIX_API_REGION_OPTION
3725 && !IS_POSIX_REGION(option)
3726 #endif
3728 onig_region_clear(region);
3731 #ifdef ONIG_DEBUG
3732 if (r != ONIG_MISMATCH)
3733 fprintf(stderr, "onig_search: error %d\n", r);
3734 #endif
3735 return r;
3737 mismatch_no_msa:
3738 r = ONIG_MISMATCH;
3739 finish_no_msa:
3740 ONIG_STATE_DEC_THREAD(reg);
3741 #ifdef ONIG_DEBUG
3742 if (r != ONIG_MISMATCH)
3743 fprintf(stderr, "onig_search: error %d\n", r);
3744 #endif
3745 return r;
3747 match:
3748 ONIG_STATE_DEC_THREAD(reg);
3749 MATCH_ARG_FREE(msa);
3750 return s - str;
3753 extern OnigEncoding
3754 onig_get_encoding(regex_t* reg)
3756 return reg->enc;
3759 extern OnigOptionType
3760 onig_get_options(regex_t* reg)
3762 return reg->options;
3765 extern OnigCaseFoldType
3766 onig_get_case_fold_flag(regex_t* reg)
3768 return reg->case_fold_flag;
3771 extern OnigSyntaxType*
3772 onig_get_syntax(regex_t* reg)
3774 return reg->syntax;
3777 extern int
3778 onig_number_of_captures(regex_t* reg)
3780 return reg->num_mem;
3783 extern int
3784 onig_number_of_capture_histories(regex_t* reg)
3786 #ifdef USE_CAPTURE_HISTORY
3787 int i, n;
3789 n = 0;
3790 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
3791 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
3792 n++;
3794 return n;
3795 #else
3796 return 0;
3797 #endif
3800 extern void
3801 onig_copy_encoding(OnigEncoding to, OnigEncoding from)
3803 *to = *from;