2 * Stack-less Just-In-Time compiler
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 #ifdef REGEX_MATCH_VERBOSE
36 /* Extra, hidden flags:
37 {id!} where id > 0 found in the code. */
38 #define REGEX_ID_CHECK 0x100
39 /* When REGEX_NEWLINE && REGEX_MATCH_BEGIN defined, the pattern turn to a normal search,
40 which starts with [\r\n] character range. */
41 #define REGEX_FAKE_MATCH_BEGIN 0x200
42 /* When REGEX_NEWLINE && REGEX_MATCH_END defined, the pattern turn to a normal search,
43 which ends with [\r\n] character range. */
44 #define REGEX_FAKE_MATCH_END 0x400
46 /* --------------------------------------------------------------------- */
47 /* Structures for JIT-ed pattern matching */
48 /* --------------------------------------------------------------------- */
54 /* Number of state descriptors for one term. */
61 sljit_sw (SLJIT_FUNC
*call_init
)(void *next
, void* match
);
63 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
64 struct sljit_function_context context
;
69 /* Variable sized array to contain the handler addresses. */
70 sljit_uw entry_addrs
[1];
75 /* Current and next state array. */
80 /* String character index (ever increasing). */
82 /* Best match found so far (members in priority order). */
86 /* Bool flags (encoded as word). */
88 sljit_sw fast_forward
;
90 struct regex_machine
*machine
;
94 void (SLJIT_FUNC
*call_continue
)(struct regex_match
*match
, const regex_char_t
*input_string
, int length
);
97 /* Variable sized array to contain the state arrays. */
102 ITEM[0] - pointer to the address inside the machine code
103 ITEM[1] - next pointer
104 ITEM[2] - string started from (optional)
105 ITEM[3] - max ID (optional) */
107 /* Register allocation. */
108 /* Current state array (loaded & stored: regex_match->current). */
109 #define R_CURR_STATE SLJIT_S0
110 /* Next state array (loaded & stored: regex_match->next). */
111 #define R_NEXT_STATE SLJIT_S1
112 /* Head (loaded & stored: regex_match->head). */
113 #define R_NEXT_HEAD SLJIT_S2
114 /* String fragment pointer. */
115 #define R_STRING SLJIT_S3
116 /* String fragment length. */
117 #define R_LENGTH SLJIT_S4
118 /* 'struct regex_match*' */
119 #define R_REGEX_MATCH SLJIT_R0
120 /* Current character. */
121 #define R_CURR_CHAR SLJIT_R1
122 /* Temporary register. */
123 #define R_TEMP SLJIT_R2
124 /* Caches the regex_match->best_begin. */
125 #define R_BEST_BEGIN SLJIT_R3
126 /* Current character index. */
127 #define R_CURR_INDEX SLJIT_R4
129 /* --------------------------------------------------------------------- */
130 /* Stack management */
131 /* --------------------------------------------------------------------- */
133 /* Try to allocate 2^n blocks. */
134 #define STACK_FRAGMENT_SIZE (((64 * sizeof(struct stack_item)) - (sizeof(struct stack_fragment_data))) / (sizeof(struct stack_item)))
141 struct stack_fragment_data
{
142 struct stack_fragment
*next
;
143 struct stack_fragment
*prev
;
146 struct stack_fragment
{
147 struct stack_fragment_data data
;
148 struct stack_item items
[STACK_FRAGMENT_SIZE
];
152 struct stack_fragment
*first
;
153 struct stack_fragment
*last
;
158 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
160 static void stack_check(struct stack
*stack
)
162 struct stack_fragment
*curr
;
168 SLJIT_ASSERT(stack
->index
>= 0 && stack
->index
< STACK_FRAGMENT_SIZE
);
170 if (stack
->first
== NULL
) {
171 SLJIT_ASSERT(stack
->first
== NULL
&& stack
->last
== NULL
);
172 SLJIT_ASSERT(stack
->index
== STACK_FRAGMENT_SIZE
- 1 && stack
->count
== 0);
177 if (stack
->last
== NULL
) {
178 SLJIT_ASSERT(stack
->index
== STACK_FRAGMENT_SIZE
- 1 && stack
->count
== 0);
182 SLJIT_ASSERT(stack
->index
>= 0 && stack
->count
>= 0);
184 SLJIT_ASSERT(stack
->first
->data
.prev
== NULL
);
187 if (curr
== stack
->last
)
190 SLJIT_ASSERT(curr
->data
.next
->data
.prev
== curr
);
191 curr
= curr
->data
.next
;
198 static void stack_init(struct stack
*stack
)
202 stack
->index
= STACK_FRAGMENT_SIZE
- 1;
206 static void stack_destroy(struct stack
*stack
)
208 struct stack_fragment
*curr
= stack
->first
;
209 struct stack_fragment
*prev
;
211 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
217 curr
= curr
->data
.next
;
218 SLJIT_FREE(prev
, NULL
);
222 static SLJIT_INLINE
struct stack_item
* stack_top(struct stack
*stack
)
224 SLJIT_ASSERT(stack
->last
);
225 return stack
->last
->items
+ stack
->index
;
228 static int stack_push(struct stack
*stack
, int type
, int value
)
232 if (stack
->index
>= STACK_FRAGMENT_SIZE
) {
234 if (!stack
->last
->data
.next
) {
235 stack
->last
->data
.next
= (struct stack_fragment
*)SLJIT_MALLOC(sizeof(struct stack_fragment
), NULL
);
236 if (!stack
->last
->data
.next
)
238 stack
->last
->data
.next
->data
.next
= NULL
;
239 stack
->last
->data
.next
->data
.prev
= stack
->last
;
241 stack
->last
= stack
->last
->data
.next
;
244 else if (!stack
->first
) {
245 stack
->last
= (struct stack_fragment
*)SLJIT_MALLOC(sizeof(struct stack_fragment
), NULL
);
248 stack
->last
->data
.prev
= NULL
;
249 stack
->last
->data
.next
= NULL
;
250 stack
->first
= stack
->last
;
254 stack
->last
= stack
->first
;
257 stack
->last
->items
[stack
->index
].type
= type
;
258 stack
->last
->items
[stack
->index
].value
= value
;
260 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
266 static struct stack_item
* stack_pop(struct stack
*stack
)
268 struct stack_item
*ret
= stack_top(stack
);
270 if (stack
->index
> 0)
273 stack
->last
= stack
->last
->data
.prev
;
274 stack
->index
= STACK_FRAGMENT_SIZE
- 1;
278 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
284 static SLJIT_INLINE
void stack_clone(struct stack
*src
, struct stack
*dst
)
289 static int stack_push_copy(struct stack
*stack
, int items
, int length
)
291 struct stack_fragment
*frag1
;
292 struct stack_fragment
*frag2
;
296 SLJIT_ASSERT(stack
->count
>= (sljit_uw
)length
&& items
<= length
&& items
> 0);
298 /* Allocate the necessary elements. */
299 counter
= (sljit_uw
)items
;
302 while (counter
> 0) {
303 if (stack
->index
+ counter
>= STACK_FRAGMENT_SIZE
) {
304 SLJIT_ASSERT(counter
>= STACK_FRAGMENT_SIZE
- stack
->index
- 1 + 1);
305 counter
-= STACK_FRAGMENT_SIZE
- stack
->index
- 1 + 1;
307 if (!stack
->last
->data
.next
) {
308 stack
->last
->data
.next
= (struct stack_fragment
*)SLJIT_MALLOC(sizeof(struct stack_fragment
), NULL
);
309 if (!stack
->last
->data
.next
)
311 stack
->last
->data
.next
->data
.next
= NULL
;
312 stack
->last
->data
.next
->data
.prev
= stack
->last
;
314 stack
->last
= stack
->last
->data
.next
;
317 stack
->index
+= counter
;
325 frag2
->items
[ind2
] = frag1
->items
[ind1
];
328 ind1
= STACK_FRAGMENT_SIZE
;
329 frag1
= frag1
->data
.prev
;
332 ind2
= STACK_FRAGMENT_SIZE
;
333 frag2
= frag2
->data
.prev
;
341 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
344 stack
->count
+= (sljit_uw
)items
;
348 /* --------------------------------------------------------------------- */
350 /* --------------------------------------------------------------------- */
365 /* generator only. */
378 struct compiler_common
{
379 /* Temporary stacks. */
384 /* Encoded size of the dfa representation. */
386 /* Number of terms. */
388 /* Number of state descriptors for one term (same as machine->no_states). */
390 /* Number of type_rng_(char|left)-s in the longest character range. */
391 sljit_sw longest_range_size
;
393 /* DFA linear representation (size: dfa_size). */
394 struct stack_item
*dfa_transitions
;
395 /* Term id and search state pairs (size: dfa_size). */
396 struct stack_item
*search_states
;
399 struct sljit_compiler
*compiler
;
400 /* Machine data, which must be kept for later use. */
401 struct regex_machine
*machine
;
402 /* Temporary space for jumps (size: longest_range_size). */
403 struct sljit_jump
**range_jump_list
;
406 static const regex_char_t
* decode_number(const regex_char_t
*regex_string
, int length
, int *result
)
410 SLJIT_ASSERT(length
> 0);
411 if (*regex_string
< '0' || *regex_string
> '9') {
416 while (length
> 0 && *regex_string
>= '0' && *regex_string
<= '9') {
417 value
= value
* 10 + (*regex_string
- '0');
426 static int iterate(struct stack
*stack
, int min
, int max
)
429 struct stack_item
*item
;
434 stack_clone(stack
, &it
);
436 /* Calculate size. */
438 item
= stack_pop(&it
);
439 switch (item
->type
) {
446 case type_qestion_mark
:
459 SLJIT_ASSERT(depth
> 0);
462 count
= (int)it
.count
;
466 SLJIT_ASSERT(depth
> 0);
471 SLJIT_ASSERT(item
->type
!= type_begin
&& item
->type
!= type_end
);
473 count
= (int)it
.count
;
479 if (min
== 0 && max
== 0) {
480 /* {0,0} case, not {0,} case: delete subtree. */
481 stack_clone(&it
, stack
);
482 /* and put an empty bracket expression instead of it. */
483 if (stack_push(stack
, type_open_br
, 0))
484 return REGEX_MEMORY_ERROR
;
485 if (stack_push(stack
, type_close_br
, 0))
486 return REGEX_MEMORY_ERROR
;
490 count
= (int)stack
->count
- count
;
492 /* Put an open bracket before the sequence. */
493 if (stack_push_copy(stack
, 1, count
))
496 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
497 SLJIT_ASSERT(stack_push(&it
, type_open_br
, 0) == 0);
499 stack_push(&it
, type_open_br
, 0);
504 len
= len
* (max
- 1);
506 /* Insert ? operators. */
512 if (stack_push_copy(stack
, count
, count
))
517 if (stack_push_copy(stack
, count
, count
))
519 if (stack_push(stack
, type_qestion_mark
, 0))
520 return REGEX_MEMORY_ERROR
;
526 SLJIT_ASSERT(max
> 0);
529 if (stack_push(stack
, type_qestion_mark
, 0))
530 return REGEX_MEMORY_ERROR
;
534 if (stack_push_copy(stack
, count
, count
))
540 SLJIT_ASSERT(min
> 0);
542 /* Insert + operator. */
545 if (stack_push_copy(stack
, count
, count
))
550 if (stack_push(stack
, type_plus_sign
, 0))
551 return REGEX_MEMORY_ERROR
;
554 /* Close the opened bracket. */
555 if (stack_push(stack
, type_close_br
, 0))
556 return REGEX_MEMORY_ERROR
;
561 static int parse_iterator(const regex_char_t
*regex_string
, int length
, struct stack
*stack
, sljit_uw
*dfa_size
, int begin
)
563 /* We only know that *regex_string == { . */
565 const regex_char_t
*base_from
= regex_string
;
566 const regex_char_t
*from
;
571 /* Decode left value. */
575 if (*regex_string
== ',') {
582 regex_string
= decode_number(regex_string
, length
, &val1
);
585 length
-= (int)(regex_string
- from
);
589 if (*regex_string
== '}') {
594 else if (length
>= 2 && *regex_string
== '!' && regex_string
[1] == '}') {
595 /* Non posix extension. */
596 if (stack_push(stack
, type_id
, val1
))
599 return (int)(regex_string
- base_from
) + 1;
602 if (*regex_string
!= ',')
612 /* Decode right value. */
616 if (*regex_string
== '}')
620 regex_string
= decode_number(regex_string
, length
, &val2
);
621 length
-= (int)(regex_string
- from
);
622 if (val2
< 0 || length
== 0 || *regex_string
!= '}' || val2
< val1
)
625 SLJIT_ASSERT(val1
== 0);
632 if (val1
> 1 || val2
> 1) {
633 val1
= iterate(stack
, val1
, val2
);
636 *dfa_size
+= (sljit_uw
)val1
;
638 else if (val1
== 0 && val2
== 0) {
639 if (stack_push(stack
, type_asterisk
, 0))
643 else if (val1
== 1 && val2
== 0) {
644 if (stack_push(stack
, type_plus_sign
, 0))
648 else if (val1
== 0 && val2
== 1) {
649 if (stack_push(stack
, type_qestion_mark
, 0))
653 else if (val1
== -1) {
654 val1
= iterate(stack
, 0, 0);
657 *dfa_size
-= (sljit_uw
)val1
;
658 SLJIT_ASSERT(*dfa_size
>= 2);
662 SLJIT_ASSERT(val1
== 1 && val2
== 1);
664 return (int)(regex_string
- base_from
);
667 static int parse_char_range(const regex_char_t
*regex_string
, int length
, struct compiler_common
*compiler_common
)
669 struct stack
* stack
= &compiler_common
->stack
;
670 const regex_char_t
*base_from
= regex_string
;
671 regex_char_t left_char
, right_char
, tmp_char
;
679 if (*regex_string
!= '^') {
680 if (stack_push(stack
, type_rng_start
, 0))
690 if (stack_push(stack
, type_rng_start
, 1))
693 /* For both the type_rng_start & type_rng_end. */
694 compiler_common
->dfa_size
+= 2;
696 /* Range must be at least 1 character. */
697 if (*regex_string
== ']') {
700 if (stack_push(stack
, type_rng_char
, ']'))
702 compiler_common
->dfa_size
++;
709 if (*regex_string
== ']')
712 if (*regex_string
!= '\\')
713 left_char
= *regex_string
;
719 left_char
= *regex_string
;
724 /* Is a range here? */
725 if (length
>= 3 && *regex_string
== '-' && *(regex_string
+ 1) != ']') {
729 if (*regex_string
!= '\\')
730 right_char
= *regex_string
;
736 right_char
= *regex_string
;
741 if (left_char
> right_char
) {
742 /* Swap if necessary. */
743 tmp_char
= left_char
;
744 left_char
= right_char
;
745 right_char
= tmp_char
;
748 if (stack_push(stack
, type_rng_left
, left_char
))
750 if (stack_push(stack
, type_rng_right
, right_char
))
752 compiler_common
->dfa_size
+= 2;
755 if (stack_push(stack
, type_rng_char
, left_char
))
757 compiler_common
->dfa_size
++;
761 if (stack_push(stack
, type_rng_end
, 0))
763 return (int)(regex_string
- base_from
);
766 static int parse(const regex_char_t
*regex_string
, int length
, struct compiler_common
*compiler_common
)
768 /* Depth of bracketed expressions. */
770 /* Have we already found a term? '1' if not yet. */
772 /* Cache stack pointer. */
773 struct stack
* stack
= &compiler_common
->stack
;
776 /* Type_begin and type_end. */
777 compiler_common
->dfa_size
= 2;
779 if (stack_push(stack
, type_begin
, 0))
780 return REGEX_MEMORY_ERROR
;
782 if (length
> 0 && *regex_string
== '^') {
783 compiler_common
->flags
|= REGEX_MATCH_BEGIN
;
788 if ((compiler_common
->flags
& (REGEX_MATCH_BEGIN
| REGEX_NEWLINE
)) == (REGEX_MATCH_BEGIN
| REGEX_NEWLINE
)) {
789 /* Replace REGEX_MATCH_BEGIN flag to REGEX_FAKE_MATCH_BEGIN */
790 compiler_common
->flags
&= ~REGEX_MATCH_BEGIN
;
791 compiler_common
->flags
|= REGEX_FAKE_MATCH_BEGIN
;
792 /* and append a new-line search. */
793 if (stack_push(stack
, type_newline
, 0))
794 return REGEX_MEMORY_ERROR
;
795 compiler_common
->dfa_size
++;
796 /* Begin intentionally kept as 1. */
800 switch (*regex_string
) {
805 return REGEX_INVALID_REGEX
;
806 if (stack_push(stack
, type_char
, *regex_string
))
807 return REGEX_MEMORY_ERROR
;
809 compiler_common
->dfa_size
++;
813 if (stack_push(stack
, type_rng_start
, 1))
814 return REGEX_MEMORY_ERROR
;
815 if (compiler_common
->flags
& REGEX_NEWLINE
) {
816 if (stack_push(stack
, type_rng_char
, '\n'))
817 return REGEX_MEMORY_ERROR
;
818 if (stack_push(stack
, type_rng_char
, '\r'))
819 return REGEX_MEMORY_ERROR
;
820 compiler_common
->dfa_size
+= 2;
822 if (stack_push(stack
, type_rng_end
, 1))
823 return REGEX_MEMORY_ERROR
;
825 compiler_common
->dfa_size
+= 2;
830 if (stack_push(stack
, type_open_br
, 0))
831 return REGEX_MEMORY_ERROR
;
837 return REGEX_INVALID_REGEX
;
839 if (stack_push(stack
, type_close_br
, 0))
840 return REGEX_MEMORY_ERROR
;
845 if (stack_push(stack
, type_select
, 0))
846 return REGEX_MEMORY_ERROR
;
848 compiler_common
->dfa_size
+= 2;
853 return REGEX_INVALID_REGEX
;
854 if (stack_push(stack
, type_asterisk
, 0))
855 return REGEX_MEMORY_ERROR
;
856 compiler_common
->dfa_size
+= 2;
862 return REGEX_INVALID_REGEX
;
863 if (stack_push(stack
, (*regex_string
== '+') ? type_plus_sign
: type_qestion_mark
, 0))
864 return REGEX_MEMORY_ERROR
;
865 compiler_common
->dfa_size
++;
869 tmp
= parse_iterator(regex_string
, length
, stack
, &compiler_common
->dfa_size
, begin
);
876 return REGEX_MEMORY_ERROR
;
878 /* Not a valid range expression. */
879 SLJIT_ASSERT(tmp
== -2);
880 if (stack_push(stack
, type_char
, '{'))
881 return REGEX_MEMORY_ERROR
;
882 compiler_common
->dfa_size
++;
887 tmp
= parse_char_range(regex_string
, length
, compiler_common
);
893 return REGEX_MEMORY_ERROR
;
895 SLJIT_ASSERT(tmp
== -2);
896 return REGEX_INVALID_REGEX
;
902 if (length
== 1 && *regex_string
== '$') {
903 compiler_common
->flags
|= REGEX_MATCH_END
;
906 if (stack_push(stack
, type_char
, *regex_string
))
907 return REGEX_MEMORY_ERROR
;
909 compiler_common
->dfa_size
++;
917 return REGEX_INVALID_REGEX
;
919 if ((compiler_common
->flags
& (REGEX_MATCH_END
| REGEX_NEWLINE
)) == (REGEX_MATCH_END
| REGEX_NEWLINE
)) {
920 /* Replace REGEX_MATCH_END flag to REGEX_FAKE_MATCH_END */
921 compiler_common
->flags
&= ~REGEX_MATCH_END
;
922 compiler_common
->flags
|= REGEX_FAKE_MATCH_END
;
923 /* and append a new-line search. */
924 if (stack_push(stack
, type_newline
, 1))
925 return REGEX_MEMORY_ERROR
;
926 compiler_common
->dfa_size
++;
927 /* Begin intentionally kept as 1. */
930 if (stack_push(stack
, type_end
, 0))
931 return REGEX_MEMORY_ERROR
;
933 return REGEX_NO_ERROR
;
936 /* --------------------------------------------------------------------- */
937 /* Generating machine state transitions */
938 /* --------------------------------------------------------------------- */
940 #define PUT_TRANSITION(typ, val) \
943 transitions_ptr->type = typ; \
944 transitions_ptr->value = val; \
947 static struct stack_item
* handle_iteratives(struct stack_item
*transitions_ptr
, struct stack_item
*transitions
, struct stack
*depth
)
949 struct stack_item
*item
;
952 item
= stack_top(depth
);
954 switch (item
->type
) {
956 SLJIT_ASSERT(transitions
[item
->value
].type
== type_branch
);
957 transitions
[item
->value
].value
= (int)(transitions_ptr
- transitions
);
958 PUT_TRANSITION(type_branch
, item
->value
+ 1);
962 SLJIT_ASSERT(transitions
[item
->value
].type
== type_branch
);
963 transitions
[item
->value
].value
= (int)(transitions_ptr
- transitions
);
966 case type_qestion_mark
:
967 PUT_TRANSITION(type_branch
, item
->value
);
971 return transitions_ptr
;
977 static int generate_transitions(struct compiler_common
*compiler_common
)
979 struct stack
*stack
= &compiler_common
->stack
;
980 struct stack
*depth
= &compiler_common
->depth
;
981 struct stack_item
*transitions_ptr
;
982 struct stack_item
*item
;
985 compiler_common
->dfa_transitions
= SLJIT_MALLOC(sizeof(struct stack_item
) * compiler_common
->dfa_size
, NULL
);
986 if (!compiler_common
->dfa_transitions
)
987 return REGEX_MEMORY_ERROR
;
989 /* Go through the items of the stack and generate the necessary branches and jumps (edges of DFA). */
990 transitions_ptr
= compiler_common
->dfa_transitions
+ compiler_common
->dfa_size
;
991 while (stack
->count
> 0) {
992 item
= stack_pop(stack
);
993 switch (item
->type
) {
996 item
= stack_pop(depth
);
997 if (item
->type
== type_select
)
998 PUT_TRANSITION(type_branch
, item
->value
+ 1);
1000 SLJIT_ASSERT(item
->type
== type_close_br
);
1001 if (stack
->count
== 0)
1002 PUT_TRANSITION(type_begin
, 0);
1004 transitions_ptr
= handle_iteratives(transitions_ptr
, compiler_common
->dfa_transitions
, depth
);
1009 if (item
->type
== type_end
)
1010 *--transitions_ptr
= *item
;
1011 if (stack_push(depth
, type_close_br
, (int)(transitions_ptr
- compiler_common
->dfa_transitions
)))
1012 return REGEX_MEMORY_ERROR
;
1016 item
= stack_top(depth
);
1017 if (item
->type
== type_select
) {
1018 SLJIT_ASSERT(compiler_common
->dfa_transitions
[item
->value
].type
== type_jump
);
1019 PUT_TRANSITION(type_branch
, item
->value
+ 1);
1020 PUT_TRANSITION(type_jump
, item
->value
);
1021 item
->value
= (int)(transitions_ptr
- compiler_common
->dfa_transitions
);
1024 SLJIT_ASSERT(item
->type
== type_close_br
);
1025 item
->type
= type_select
;
1026 PUT_TRANSITION(type_jump
, item
->value
);
1027 item
->value
= (int)(transitions_ptr
- compiler_common
->dfa_transitions
);
1032 case type_plus_sign
:
1033 case type_qestion_mark
:
1034 if (item
->type
!= type_qestion_mark
)
1035 PUT_TRANSITION(type_branch
, 0);
1036 if (stack_push(depth
, item
->type
, (int)(transitions_ptr
- compiler_common
->dfa_transitions
)))
1037 return REGEX_MEMORY_ERROR
;
1042 case type_rng_start
:
1043 /* Requires handle_iteratives. */
1044 *--transitions_ptr
= *item
;
1045 transitions_ptr
= handle_iteratives(transitions_ptr
, compiler_common
->dfa_transitions
, depth
);
1049 *--transitions_ptr
= *item
;
1054 SLJIT_ASSERT(compiler_common
->dfa_transitions
== transitions_ptr
);
1055 SLJIT_ASSERT(depth
->count
== 0);
1056 return REGEX_NO_ERROR
;
1059 #undef PUT_TRANSITION
1061 #ifdef REGEX_MATCH_VERBOSE
1063 static void verbose_transitions(struct compiler_common
*compiler_common
)
1065 struct stack_item
*transitions_ptr
= compiler_common
->dfa_transitions
;
1066 struct stack_item
*transitions_end
= transitions_ptr
+ compiler_common
->dfa_size
;
1067 struct stack_item
*search_states_ptr
= compiler_common
->search_states
;
1070 printf("-----------------\nTransitions\n-----------------\n");
1072 while (transitions_ptr
< transitions_end
) {
1073 printf("[%3d] ", pos
++);
1074 if (search_states_ptr
->type
>= 0)
1075 printf("(%3d) ", search_states_ptr
->type
);
1076 switch (transitions_ptr
->type
) {
1078 printf("type_begin\n");
1082 printf("type_end\n");
1086 if (transitions_ptr
->value
>= ' ')
1087 printf("type_char '%c'\n", transitions_ptr
->value
);
1089 printf("type_char 0x%x\n", transitions_ptr
->value
);
1093 printf("type_newline %s\n", transitions_ptr
->value
? "(end)" : "(begin)");
1097 printf("type_id %d\n", transitions_ptr
->value
);
1100 case type_rng_start
:
1101 printf("type_rng_start %s\n", transitions_ptr
->value
? "(invert)" : "(normal)");
1105 printf("type_rng_end\n");
1109 if (transitions_ptr
->value
>= ' ')
1110 printf("type_rng_char '%c'\n", transitions_ptr
->value
);
1112 printf("type_rng_char 0x%x\n", transitions_ptr
->value
);
1116 if (transitions_ptr
->value
>= ' ')
1117 printf("type_rng_left '%c'\n", transitions_ptr
->value
);
1119 printf("type_rng_left 0x%x\n", transitions_ptr
->value
);
1122 case type_rng_right
:
1123 if (transitions_ptr
->value
>= ' ')
1124 printf("type_rng_right '%c'\n", transitions_ptr
->value
);
1126 printf("type_rng_right 0x%x\n", transitions_ptr
->value
);
1130 printf("type_branch -> %d\n", transitions_ptr
->value
);
1134 printf("type_jump -> %d\n", transitions_ptr
->value
);
1138 printf("UNEXPECTED TYPE\n");
1142 search_states_ptr
++;
1145 if (!(compiler_common
->flags
& (REGEX_MATCH_BEGIN
| REGEX_MATCH_END
| REGEX_NEWLINE
| REGEX_ID_CHECK
| REGEX_FAKE_MATCH_BEGIN
| REGEX_FAKE_MATCH_END
)))
1147 if (compiler_common
->flags
& REGEX_MATCH_BEGIN
)
1148 printf("REGEX_MATCH_BEGIN ");
1149 if (compiler_common
->flags
& REGEX_MATCH_END
)
1150 printf("REGEX_MATCH_END ");
1151 if (compiler_common
->flags
& REGEX_NEWLINE
)
1152 printf("REGEX_NEWLINE ");
1153 if (compiler_common
->flags
& REGEX_ID_CHECK
)
1154 printf("REGEX_ID_CHECK ");
1155 if (compiler_common
->flags
& REGEX_FAKE_MATCH_BEGIN
)
1156 printf("REGEX_FAKE_MATCH_BEGIN ");
1157 if (compiler_common
->flags
& REGEX_FAKE_MATCH_END
)
1158 printf("REGEX_FAKE_MATCH_END ");
1159 if (compiler_common
->longest_range_size
> 0)
1160 printf("(longest range: %ld) ", (long)compiler_common
->longest_range_size
);
1166 /* --------------------------------------------------------------------- */
1168 /* --------------------------------------------------------------------- */
1170 static int generate_search_states(struct compiler_common
*compiler_common
)
1172 struct stack_item
*transitions_ptr
= compiler_common
->dfa_transitions
;
1173 struct stack_item
*transitions_end
= transitions_ptr
+ compiler_common
->dfa_size
;
1174 struct stack_item
*search_states_ptr
;
1175 struct stack_item
*rng_start
= NULL
;
1177 compiler_common
->terms_size
= !(compiler_common
->flags
& REGEX_FAKE_MATCH_END
) ? 1 : 2;
1178 compiler_common
->longest_range_size
= 0;
1179 compiler_common
->search_states
= SLJIT_MALLOC(sizeof(struct stack_item
) * compiler_common
->dfa_size
, NULL
);
1180 if (!compiler_common
->search_states
)
1181 return REGEX_MEMORY_ERROR
;
1183 search_states_ptr
= compiler_common
->search_states
;
1184 while (transitions_ptr
< transitions_end
) {
1185 switch (transitions_ptr
->type
) {
1188 search_states_ptr
->type
= 0;
1192 search_states_ptr
->type
= (int)compiler_common
->terms_size
++;
1196 if (transitions_ptr
->value
)
1197 search_states_ptr
->type
= 1;
1199 search_states_ptr
->type
= (int)compiler_common
->terms_size
++;
1200 SLJIT_ASSERT(search_states_ptr
->type
== 1 || search_states_ptr
->type
== 2);
1204 if (transitions_ptr
->value
> 0)
1205 compiler_common
->flags
|= REGEX_ID_CHECK
;
1206 search_states_ptr
->type
= -1;
1209 case type_rng_start
:
1210 search_states_ptr
->type
= (int)compiler_common
->terms_size
;
1211 rng_start
= search_states_ptr
;
1215 search_states_ptr
->type
= (int)compiler_common
->terms_size
++;
1216 /* This is an over estimation. */
1217 if (compiler_common
->longest_range_size
< search_states_ptr
- rng_start
)
1218 compiler_common
->longest_range_size
= search_states_ptr
- rng_start
;
1222 search_states_ptr
->type
= -1;
1225 search_states_ptr
->value
= -1;
1226 search_states_ptr
++;
1229 return REGEX_NO_ERROR
;
1232 static int trace_transitions(int from
, struct compiler_common
*compiler_common
)
1235 struct stack
*stack
= &compiler_common
->stack
;
1236 struct stack
*depth
= &compiler_common
->depth
;
1237 struct stack_item
*dfa_transitions
= compiler_common
->dfa_transitions
;
1238 struct stack_item
*search_states
= compiler_common
->search_states
;
1240 SLJIT_ASSERT(search_states
[from
].type
>= 0);
1244 /* Be prepared for any paths (loops, etc). */
1246 if (dfa_transitions
[from
].type
== type_id
)
1247 if (id
< dfa_transitions
[from
].value
)
1248 id
= dfa_transitions
[from
].value
;
1250 if (search_states
[from
].value
< id
) {
1252 if (search_states
[from
].value
== -1)
1253 if (stack_push(stack
, 0, from
))
1254 return REGEX_MEMORY_ERROR
;
1255 search_states
[from
].value
= id
;
1257 if (dfa_transitions
[from
].type
== type_branch
) {
1258 if (stack_push(depth
, id
, from
))
1259 return REGEX_MEMORY_ERROR
;
1263 else if (dfa_transitions
[from
].type
== type_jump
) {
1264 from
= dfa_transitions
[from
].value
;
1267 else if (search_states
[from
].type
< 0) {
1273 /* Back tracking. */
1274 if (depth
->count
> 0) {
1275 id
= stack_top(depth
)->type
;
1276 from
= dfa_transitions
[stack_pop(depth
)->value
].value
;
1283 /* --------------------------------------------------------------------- */
1284 /* Code generator */
1285 /* --------------------------------------------------------------------- */
1287 #define TERM_OFFSET_OF(index, offs) (((index) * no_states + (offs)) * (sljit_sw)sizeof(sljit_sw))
1288 #define TERM_REL_OFFSET_OF(base, offs) ((base) + ((offs) * (sljit_sw)sizeof(sljit_sw)))
1290 #define EMIT_OP1(type, arg1, arg2, arg3, arg4) \
1291 CHECK(sljit_emit_op1(compiler, type, arg1, arg2, arg3, arg4))
1293 #define EMIT_OP2(type, arg1, arg2, arg3, arg4, arg5, arg6) \
1294 CHECK(sljit_emit_op2(compiler, type, arg1, arg2, arg3, arg4, arg5, arg6))
1296 #define EMIT_OP2U(type, arg1, arg2, arg3, arg4) \
1297 CHECK(sljit_emit_op2u(compiler, type, arg1, arg2, arg3, arg4))
1299 #define EMIT_LABEL(label) \
1300 label = sljit_emit_label(compiler); \
1303 #define EMIT_JUMP(jump, type) \
1304 jump = sljit_emit_jump(compiler, type); \
1307 #define EMIT_CMP(jump, type, arg1, arg2, arg3, arg4) \
1308 jump = sljit_emit_cmp(compiler, type, arg1, arg2, arg3, arg4); \
1311 /* CHECK depends on the use case. */
1313 #define CHECK(exp) \
1314 if (SLJIT_UNLIKELY(exp)) \
1315 return REGEX_MEMORY_ERROR
1317 static int compile_uncond_tran(struct compiler_common
*compiler_common
, int reg
)
1319 struct sljit_compiler
*compiler
= compiler_common
->compiler
;
1320 struct stack
*stack
= &compiler_common
->stack
;
1321 struct stack_item
*search_states
= compiler_common
->search_states
;
1322 int flags
= compiler_common
->flags
;
1323 sljit_sw no_states
= compiler_common
->no_states
;
1325 sljit_sw offset
, value
;
1327 if (reg
!= R_CURR_STATE
|| !(compiler_common
->flags
& REGEX_FAKE_MATCH_BEGIN
)) {
1328 CHECK(trace_transitions(0, compiler_common
));
1331 CHECK(trace_transitions(1, compiler_common
));
1334 while (stack
->count
> 0) {
1335 value
= stack_pop(stack
)->value
;
1336 if (search_states
[value
].type
>= 0) {
1337 offset
= TERM_OFFSET_OF(search_states
[value
].type
, 0);
1338 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(reg
), TERM_REL_OFFSET_OF(offset
, 1), SLJIT_IMM
, head
);
1342 if (!(flags
& REGEX_MATCH_BEGIN
)) {
1343 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(reg
), TERM_REL_OFFSET_OF(offset
, 2), R_TEMP
, 0);
1344 if (flags
& REGEX_ID_CHECK
) {
1345 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(reg
), TERM_REL_OFFSET_OF(offset
, 3), SLJIT_IMM
, search_states
[value
].value
);
1348 else if (flags
& REGEX_ID_CHECK
) {
1349 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(reg
), TERM_REL_OFFSET_OF(offset
, 2), SLJIT_IMM
, search_states
[value
].value
);
1352 search_states
[value
].value
= -1;
1354 if (reg
== R_NEXT_STATE
) {
1355 EMIT_OP1(SLJIT_MOV
, R_TEMP
, 0, R_NEXT_HEAD
, 0);
1357 else if (flags
& REGEX_FAKE_MATCH_BEGIN
) {
1358 SLJIT_ASSERT(compiler_common
->dfa_transitions
[1].type
== type_newline
&& !compiler_common
->dfa_transitions
[1].value
);
1359 offset
= TERM_OFFSET_OF(search_states
[1].type
, 0);
1361 SLJIT_ASSERT(!(flags
& REGEX_MATCH_BEGIN
));
1363 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(reg
), TERM_REL_OFFSET_OF(offset
, 1), SLJIT_IMM
, head
);
1366 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(reg
), TERM_REL_OFFSET_OF(offset
, 2), SLJIT_IMM
, 1);
1367 if (flags
& REGEX_ID_CHECK
) {
1368 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(reg
), TERM_REL_OFFSET_OF(offset
, 3), SLJIT_IMM
, 0);
1371 EMIT_OP1(SLJIT_MOV
, R_NEXT_HEAD
, 0, SLJIT_IMM
, head
);
1372 return REGEX_NO_ERROR
;
1375 static int compile_cond_tran(struct compiler_common
*compiler_common
, sljit_sw curr_index
)
1377 struct sljit_compiler
*compiler
= compiler_common
->compiler
;
1378 struct stack
*stack
= &compiler_common
->stack
;
1379 struct stack_item
*search_states
= compiler_common
->search_states
;
1384 struct sljit_jump
*jump1
;
1385 struct sljit_jump
*jump2
;
1386 struct sljit_jump
*jump3
;
1387 struct sljit_jump
*jump4
;
1388 struct sljit_jump
*jump5
;
1389 struct sljit_label
*label1
;
1391 flags
= compiler_common
->flags
;
1392 no_states
= compiler_common
->no_states
;
1394 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, fast_forward
), SLJIT_IMM
, 0);
1395 if (!(flags
& (REGEX_ID_CHECK
| REGEX_MATCH_BEGIN
))) {
1396 EMIT_OP1(SLJIT_MOV
, R_TEMP
, 0, SLJIT_MEM1(R_CURR_STATE
), TERM_OFFSET_OF(curr_index
, 2));
1399 while (stack
->count
> 0) {
1400 value
= stack_pop(stack
)->value
;
1401 if (search_states
[value
].type
>= 0) {
1402 #ifdef REGEX_MATCH_VERBOSE
1403 if (flags
& REGEX_MATCH_VERBOSE
)
1404 printf("-> (%3d:%3d) ", search_states
[value
].type
, search_states
[value
].value
);
1406 offset
= TERM_OFFSET_OF(search_states
[value
].type
, 0);
1408 if (!(flags
& REGEX_ID_CHECK
)) {
1409 if (!(flags
& REGEX_MATCH_BEGIN
)) {
1410 /* Check whether item is inserted. */
1411 EMIT_CMP(jump1
, SLJIT_NOT_EQUAL
, SLJIT_MEM1(R_NEXT_STATE
), offset
+ (sljit_sw
)sizeof(sljit_sw
), SLJIT_IMM
, -1);
1412 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_NEXT_STATE
), offset
+ (sljit_sw
)sizeof(sljit_sw
), R_NEXT_HEAD
, 0);
1414 EMIT_OP1(SLJIT_MOV
, R_NEXT_HEAD
, 0, SLJIT_IMM
, offset
);
1416 EMIT_JUMP(jump2
, SLJIT_JUMP
);
1418 /* Check whether old index <= index. */
1420 sljit_set_label(jump1
, label1
);
1422 EMIT_CMP(jump1
, SLJIT_LESS_EQUAL
, SLJIT_MEM1(R_NEXT_STATE
), offset
+ 2 * (sljit_sw
)sizeof(sljit_sw
), R_TEMP
, 0);
1425 sljit_set_label(jump2
, label1
);
1426 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_NEXT_STATE
), offset
+ 2 * (sljit_sw
)sizeof(sljit_sw
), R_TEMP
, 0);
1429 sljit_set_label(jump1
, label1
);
1432 /* Check whether item is inserted. */
1433 EMIT_CMP(jump1
, SLJIT_NOT_EQUAL
, SLJIT_MEM1(R_NEXT_STATE
), offset
+ (sljit_sw
)sizeof(sljit_sw
), SLJIT_IMM
, -1);
1434 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_NEXT_STATE
), offset
+ (sljit_sw
)sizeof(sljit_sw
), R_NEXT_HEAD
, 0);
1436 EMIT_OP1(SLJIT_MOV
, R_NEXT_HEAD
, 0, SLJIT_IMM
, offset
);
1439 sljit_set_label(jump1
, label1
);
1443 if (!(flags
& REGEX_MATCH_BEGIN
)) {
1444 EMIT_OP1(SLJIT_MOV
, R_TEMP
, 0, SLJIT_MEM1(R_CURR_STATE
), TERM_OFFSET_OF(curr_index
, 2));
1446 /* Check whether item is inserted. */
1447 EMIT_CMP(jump1
, SLJIT_NOT_EQUAL
, SLJIT_MEM1(R_NEXT_STATE
), offset
+ (sljit_sw
)sizeof(sljit_sw
), SLJIT_IMM
, -1);
1448 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_NEXT_STATE
), offset
+ (sljit_sw
)sizeof(sljit_sw
), R_NEXT_HEAD
, 0);
1450 EMIT_OP1(SLJIT_MOV
, R_NEXT_HEAD
, 0, SLJIT_IMM
, offset
);
1452 EMIT_JUMP(jump2
, SLJIT_JUMP
);
1454 /* Check whether old index != index. */
1456 sljit_set_label(jump1
, label1
);
1458 EMIT_OP2U(SLJIT_SUB
| SLJIT_SET_Z
| SLJIT_SET_LESS
, SLJIT_MEM1(R_NEXT_STATE
), offset
+ 2 * (sljit_sw
)sizeof(sljit_sw
), R_TEMP
, 0);
1459 EMIT_JUMP(jump1
, SLJIT_LESS
);
1460 EMIT_JUMP(jump3
, SLJIT_NOT_EQUAL
); /* Greater. */
1462 /* Old index == index. */
1463 EMIT_OP1(SLJIT_MOV
, R_TEMP
, 0, SLJIT_MEM1(R_CURR_STATE
), TERM_OFFSET_OF(curr_index
, 3));
1464 if (search_states
[value
].value
> 0) {
1465 EMIT_CMP(jump4
, SLJIT_GREATER
, R_TEMP
, 0, SLJIT_IMM
, search_states
[value
].value
);
1467 EMIT_OP1(SLJIT_MOV
, R_TEMP
, 0, SLJIT_IMM
, search_states
[value
].value
);
1469 sljit_set_label(jump4
, label1
);
1472 EMIT_OP2U(SLJIT_SUB
| SLJIT_SET_GREATER_EQUAL
, SLJIT_MEM1(R_NEXT_STATE
), offset
+ 3 * (sljit_sw
)sizeof(sljit_sw
), R_TEMP
, 0);
1473 EMIT_JUMP(jump4
, SLJIT_GREATER_EQUAL
);
1474 EMIT_JUMP(jump5
, SLJIT_JUMP
);
1476 /* Overwrite index & id. */
1478 sljit_set_label(jump3
, label1
);
1479 sljit_set_label(jump2
, label1
);
1480 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_NEXT_STATE
), offset
+ 2 * (sljit_sw
)sizeof(sljit_sw
), R_TEMP
, 0);
1482 EMIT_OP1(SLJIT_MOV
, R_TEMP
, 0, SLJIT_MEM1(R_CURR_STATE
), TERM_OFFSET_OF(curr_index
, 3));
1483 if (search_states
[value
].value
> 0) {
1484 EMIT_CMP(jump3
, SLJIT_GREATER
, R_TEMP
, 0, SLJIT_IMM
, search_states
[value
].value
);
1486 EMIT_OP1(SLJIT_MOV
, R_TEMP
, 0, SLJIT_IMM
, search_states
[value
].value
);
1488 sljit_set_label(jump3
, label1
);
1492 sljit_set_label(jump5
, label1
);
1493 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_NEXT_STATE
), offset
+ 3 * (sljit_sw
)sizeof(sljit_sw
), R_TEMP
, 0);
1497 sljit_set_label(jump1
, label1
);
1498 sljit_set_label(jump4
, label1
);
1501 EMIT_OP1(SLJIT_MOV
, R_TEMP
, 0, SLJIT_MEM1(R_CURR_STATE
), TERM_OFFSET_OF(curr_index
, 2));
1503 if (search_states
[value
].value
> 0) {
1504 EMIT_CMP(jump1
, SLJIT_GREATER
, R_TEMP
, 0, SLJIT_IMM
, search_states
[value
].value
);
1506 EMIT_OP1(SLJIT_MOV
, R_TEMP
, 0, SLJIT_IMM
, search_states
[value
].value
);
1508 sljit_set_label(jump1
, label1
);
1511 /* Check whether item is inserted. */
1512 EMIT_CMP(jump1
, SLJIT_NOT_EQUAL
, SLJIT_MEM1(R_NEXT_STATE
), offset
+ (sljit_sw
)sizeof(sljit_sw
), SLJIT_IMM
, -1);
1513 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_NEXT_STATE
), offset
+ (sljit_sw
)sizeof(sljit_sw
), R_NEXT_HEAD
, 0);
1515 EMIT_OP1(SLJIT_MOV
, R_NEXT_HEAD
, 0, SLJIT_IMM
, offset
);
1517 EMIT_JUMP(jump2
, SLJIT_JUMP
);
1519 /* Check whether old id >= id. */
1521 sljit_set_label(jump1
, label1
);
1523 EMIT_CMP(jump1
, SLJIT_GREATER_EQUAL
, SLJIT_MEM1(R_NEXT_STATE
), offset
+ 2 * (sljit_sw
)sizeof(sljit_sw
), R_TEMP
, 0);
1526 sljit_set_label(jump2
, label1
);
1527 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_NEXT_STATE
), offset
+ 2 * (sljit_sw
)sizeof(sljit_sw
), R_TEMP
, 0);
1530 sljit_set_label(jump1
, label1
);
1534 search_states
[value
].value
= -1;
1537 #ifdef REGEX_MATCH_VERBOSE
1538 if (flags
& REGEX_MATCH_VERBOSE
)
1541 return REGEX_NO_ERROR
;
1544 static int compile_end_check(struct compiler_common
*compiler_common
, struct sljit_label
*end_check_label
)
1546 struct sljit_compiler
*compiler
= compiler_common
->compiler
;
1547 struct sljit_jump
*jump
;
1548 struct sljit_jump
*clear_states_jump
;
1549 struct sljit_label
*label
;
1550 struct sljit_label
*leave_label
;
1551 struct sljit_label
*begin_loop_label
;
1553 /* Priority order: best_begin > best_end > best_id.
1555 if (new best_begin > old test_begin) do nothing
1556 otherwise we know that new_end > old_end, since R_CURR_INDEX ever increasing
1557 therefore we must overwrite all best_* variables (new_id also contains the highest id for this turn). */
1559 /* Both R_CURR_CHAR and R_BEST_BEGIN used as temporary registers. */
1561 if (!(compiler_common
->flags
& REGEX_MATCH_BEGIN
)) {
1562 EMIT_OP1(SLJIT_MOV
, R_CURR_CHAR
, 0, SLJIT_MEM1(R_CURR_STATE
), TERM_REL_OFFSET_OF(0, 2));
1563 EMIT_CMP(jump
, !(compiler_common
->flags
& REGEX_MATCH_NON_GREEDY
) ? SLJIT_LESS
: SLJIT_LESS_EQUAL
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, best_begin
), R_CURR_CHAR
, 0);
1564 sljit_set_label(jump
, end_check_label
);
1566 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, best_begin
), R_CURR_CHAR
, 0);
1567 if (!(compiler_common
->flags
& (REGEX_FAKE_MATCH_BEGIN
| REGEX_FAKE_MATCH_END
))) {
1568 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, best_end
), R_CURR_INDEX
, 0);
1571 if ((compiler_common
->flags
& (REGEX_FAKE_MATCH_BEGIN
| REGEX_FAKE_MATCH_END
)) == (REGEX_FAKE_MATCH_BEGIN
| REGEX_FAKE_MATCH_END
)) {
1572 EMIT_OP2(SLJIT_SUB
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, best_end
), R_CURR_INDEX
, 0, SLJIT_IMM
, 2);
1575 EMIT_OP2(SLJIT_SUB
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, best_end
), R_CURR_INDEX
, 0, SLJIT_IMM
, 1);
1578 if (compiler_common
->flags
& REGEX_ID_CHECK
) {
1579 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, best_id
), SLJIT_MEM1(R_CURR_STATE
), TERM_REL_OFFSET_OF(0, 3));
1582 EMIT_CMP(clear_states_jump
, SLJIT_LESS
, R_CURR_CHAR
, 0, R_BEST_BEGIN
, 0);
1584 EMIT_LABEL(leave_label
);
1585 EMIT_OP1(SLJIT_MOV
, R_BEST_BEGIN
, 0, R_CURR_CHAR
, 0);
1586 EMIT_JUMP(jump
, SLJIT_JUMP
);
1587 sljit_set_label(jump
, end_check_label
);
1589 /* A loop to clear all states, which are > (or >=) than R_CURR_CHAR. */
1591 sljit_set_label(clear_states_jump
, label
);
1593 EMIT_OP1(SLJIT_MOV
, R_TEMP
, 0, R_NEXT_HEAD
, 0);
1594 EMIT_OP1(SLJIT_MOV
, R_NEXT_HEAD
, 0, SLJIT_IMM
, 0);
1596 /* Begin of the loop. */
1597 EMIT_LABEL(begin_loop_label
);
1598 EMIT_CMP(jump
, SLJIT_EQUAL
, R_TEMP
, 0, SLJIT_IMM
, 0);
1599 sljit_set_label(jump
, leave_label
);
1601 EMIT_OP2(SLJIT_ADD
, R_TEMP
, 0, R_TEMP
, 0, R_CURR_STATE
, 0);
1602 EMIT_OP1(SLJIT_MOV
, R_BEST_BEGIN
, 0, SLJIT_MEM1(R_TEMP
), sizeof(sljit_sw
));
1603 EMIT_CMP(clear_states_jump
, !(compiler_common
->flags
& REGEX_MATCH_NON_GREEDY
) ? SLJIT_GREATER
: SLJIT_GREATER_EQUAL
, SLJIT_MEM1(R_TEMP
), 2 * sizeof(sljit_sw
), R_CURR_CHAR
, 0);
1605 /* Case 1: keep this case. */
1606 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_TEMP
), sizeof(sljit_sw
), R_NEXT_HEAD
, 0);
1607 EMIT_OP2(SLJIT_SUB
, R_NEXT_HEAD
, 0, R_TEMP
, 0, R_CURR_STATE
, 0);
1609 EMIT_OP1(SLJIT_MOV
, R_TEMP
, 0, R_BEST_BEGIN
, 0);
1610 EMIT_JUMP(jump
, SLJIT_JUMP
);
1611 sljit_set_label(jump
, begin_loop_label
);
1613 /* Case 2: remove this case. */
1615 sljit_set_label(clear_states_jump
, label
);
1617 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_TEMP
), sizeof(sljit_sw
), SLJIT_IMM
, -1);
1619 EMIT_OP1(SLJIT_MOV
, R_TEMP
, 0, R_BEST_BEGIN
, 0);
1620 EMIT_JUMP(jump
, SLJIT_JUMP
);
1621 sljit_set_label(jump
, begin_loop_label
);
1624 EMIT_OP1(SLJIT_MOV
, R_BEST_BEGIN
, 0, SLJIT_IMM
, 0);
1625 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, best_begin
), SLJIT_IMM
, 0);
1626 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, best_end
), R_CURR_INDEX
, 0);
1627 if (compiler_common
->flags
& REGEX_ID_CHECK
) {
1628 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, best_id
), SLJIT_MEM1(R_CURR_STATE
), TERM_REL_OFFSET_OF(0, 2));
1630 EMIT_JUMP(jump
, SLJIT_JUMP
);
1631 sljit_set_label(jump
, end_check_label
);
1633 return REGEX_NO_ERROR
;
1636 static int compile_leave_fast_forward(struct compiler_common
*compiler_common
, struct sljit_label
*fast_forward_label
)
1638 struct sljit_compiler
*compiler
= compiler_common
->compiler
;
1639 struct stack
*stack
= &compiler_common
->stack
;
1640 struct stack_item
*dfa_transitions
= compiler_common
->dfa_transitions
;
1641 struct stack_item
*search_states
= compiler_common
->search_states
;
1643 struct sljit_jump
*jump
;
1644 int init_range
= 1, prev_value
= 0;
1646 while (stack
->count
> 0) {
1647 ind
= stack_pop(stack
)->value
;
1648 search_states
[ind
].value
= -1;
1649 if (search_states
[ind
].type
>= 0) {
1650 if (dfa_transitions
[ind
].type
== type_char
) {
1651 EMIT_CMP(jump
, SLJIT_EQUAL
, R_CURR_CHAR
, 0, SLJIT_IMM
, dfa_transitions
[ind
].value
);
1652 sljit_set_label(jump
, fast_forward_label
);
1654 else if (dfa_transitions
[ind
].type
== type_rng_start
) {
1655 SLJIT_ASSERT(!dfa_transitions
[ind
].value
);
1657 while (dfa_transitions
[ind
].type
!= type_rng_end
) {
1658 if (dfa_transitions
[ind
].type
== type_rng_char
) {
1659 EMIT_CMP(jump
, SLJIT_EQUAL
, R_CURR_CHAR
, 0, SLJIT_IMM
, dfa_transitions
[ind
].value
);
1660 sljit_set_label(jump
, fast_forward_label
);
1663 SLJIT_ASSERT(dfa_transitions
[ind
].type
== type_rng_left
);
1665 EMIT_OP1(SLJIT_MOV
, R_TEMP
, 0, R_CURR_CHAR
, 0);
1668 if (dfa_transitions
[ind
].value
!= prev_value
) {
1669 /* Best compatibility to all archs. */
1670 prev_value
-= dfa_transitions
[ind
].value
;
1671 if (prev_value
< 0) {
1672 EMIT_OP2(SLJIT_SUB
, R_TEMP
, 0, R_TEMP
, 0, SLJIT_IMM
, -prev_value
);
1675 EMIT_OP2(SLJIT_ADD
, R_TEMP
, 0, R_TEMP
, 0, SLJIT_IMM
, prev_value
);
1677 prev_value
= dfa_transitions
[ind
].value
;
1679 EMIT_CMP(jump
, SLJIT_LESS_EQUAL
, R_TEMP
, 0, SLJIT_IMM
, dfa_transitions
[ind
+ 1].value
- dfa_transitions
[ind
].value
);
1680 sljit_set_label(jump
, fast_forward_label
);
1687 SLJIT_ASSERT(dfa_transitions
[ind
].type
== type_newline
);
1688 EMIT_CMP(jump
, SLJIT_EQUAL
, R_CURR_CHAR
, 0, SLJIT_IMM
, '\n');
1689 sljit_set_label(jump
, fast_forward_label
);
1690 EMIT_CMP(jump
, SLJIT_EQUAL
, R_CURR_CHAR
, 0, SLJIT_IMM
, '\r');
1691 sljit_set_label(jump
, fast_forward_label
);
1695 return REGEX_NO_ERROR
;
1698 static int compile_newline_check(struct compiler_common
*compiler_common
, sljit_sw ind
)
1700 struct sljit_compiler
*compiler
= compiler_common
->compiler
;
1701 struct sljit_jump
*jump1
;
1702 struct sljit_jump
*jump2
;
1703 struct sljit_label
*label
;
1707 /* Check whether a new-line character is found. */
1708 EMIT_CMP(jump1
, SLJIT_EQUAL
, R_CURR_CHAR
, 0, SLJIT_IMM
, '\n');
1709 EMIT_CMP(jump2
, SLJIT_EQUAL
, R_CURR_CHAR
, 0, SLJIT_IMM
, '\r');
1711 no_states
= compiler_common
->no_states
;
1712 offset
= TERM_OFFSET_OF(compiler_common
->search_states
[ind
].type
, 1);
1713 EMIT_OP1(SLJIT_MOV
, R_TEMP
, 0, SLJIT_MEM1(R_CURR_STATE
), offset
);
1714 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_CURR_STATE
), offset
, SLJIT_IMM
, -1);
1715 CHECK(sljit_emit_ijump(compiler
, SLJIT_JUMP
, SLJIT_MEM2(R_CURR_STATE
, R_TEMP
), 0));
1718 sljit_set_label(jump1
, label
);
1719 sljit_set_label(jump2
, label
);
1720 return REGEX_NO_ERROR
;
1725 #define CHECK(exp) \
1726 if (SLJIT_UNLIKELY(exp)) \
1729 static SLJIT_INLINE
void range_set_label(struct sljit_jump
**range_jump_list
, struct sljit_label
*label
)
1731 while (*range_jump_list
) {
1732 sljit_set_label(*range_jump_list
, label
);
1737 static sljit_sw
compile_range_check(struct compiler_common
*compiler_common
, sljit_sw ind
)
1739 struct sljit_compiler
*compiler
= compiler_common
->compiler
;
1740 struct stack_item
*dfa_transitions
= compiler_common
->dfa_transitions
;
1741 struct sljit_jump
**range_jump_list
= compiler_common
->range_jump_list
;
1742 int invert
= dfa_transitions
[ind
].value
;
1743 struct sljit_label
*label
;
1746 int init_range
= 1, prev_value
= 0;
1750 while (dfa_transitions
[ind
].type
!= type_rng_end
) {
1751 if (dfa_transitions
[ind
].type
== type_rng_char
) {
1752 EMIT_CMP(*range_jump_list
, SLJIT_EQUAL
, R_CURR_CHAR
, 0, SLJIT_IMM
, dfa_transitions
[ind
].value
);
1756 SLJIT_ASSERT(dfa_transitions
[ind
].type
== type_rng_left
);
1758 EMIT_OP1(SLJIT_MOV
, R_TEMP
, 0, R_CURR_CHAR
, 0);
1761 if (dfa_transitions
[ind
].value
!= prev_value
) {
1762 /* Best compatibility to all archs. */
1763 prev_value
-= dfa_transitions
[ind
].value
;
1764 if (prev_value
< 0) {
1765 EMIT_OP2(SLJIT_SUB
, R_TEMP
, 0, R_TEMP
, 0, SLJIT_IMM
, -prev_value
);
1768 EMIT_OP2(SLJIT_ADD
, R_TEMP
, 0, R_TEMP
, 0, SLJIT_IMM
, prev_value
);
1770 prev_value
= dfa_transitions
[ind
].value
;
1772 EMIT_CMP(*range_jump_list
, SLJIT_LESS_EQUAL
, R_TEMP
, 0, SLJIT_IMM
, dfa_transitions
[ind
+ 1].value
- dfa_transitions
[ind
].value
);
1779 *range_jump_list
= NULL
;
1782 no_states
= compiler_common
->no_states
;
1783 offset
= TERM_OFFSET_OF(compiler_common
->search_states
[ind
].type
, 1);
1784 EMIT_OP1(SLJIT_MOV
, R_TEMP
, 0, SLJIT_MEM1(R_CURR_STATE
), offset
);
1785 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_CURR_STATE
), offset
, SLJIT_IMM
, -1);
1786 CHECK(sljit_emit_ijump(compiler
, SLJIT_JUMP
, SLJIT_MEM2(R_CURR_STATE
, R_TEMP
), 0));
1789 range_set_label(compiler_common
->range_jump_list
, label
);
1790 /* Clears the jump list. */
1791 *compiler_common
->range_jump_list
= NULL
;
1796 #undef TERM_OFFSET_OF
1804 /* --------------------------------------------------------------------- */
1806 /* --------------------------------------------------------------------- */
1808 #define TERM_OFFSET_OF(ind, offs) (((ind) * compiler_common.no_states + (offs)) * (sljit_sw)sizeof(sljit_sw))
1810 #define EMIT_OP1(type, arg1, arg2, arg3, arg4) \
1811 CHECK(sljit_emit_op1(compiler_common.compiler, type, arg1, arg2, arg3, arg4))
1813 #define EMIT_OP2(type, arg1, arg2, arg3, arg4, arg5, arg6) \
1814 CHECK(sljit_emit_op2(compiler_common.compiler, type, arg1, arg2, arg3, arg4, arg5, arg6))
1816 #define EMIT_LABEL(label) \
1817 label = sljit_emit_label(compiler_common.compiler); \
1820 #define EMIT_JUMP(jump, type) \
1821 jump = sljit_emit_jump(compiler_common.compiler, type); \
1824 #define EMIT_CMP(jump, type, arg1, arg2, arg3, arg4) \
1825 jump = sljit_emit_cmp(compiler_common.compiler, type, arg1, arg2, arg3, arg4); \
1828 /* A do {} while(0) expression helps to avoid goto statements. */
1829 #define BEGIN_GUARD \
1835 #define CHECK(exp) \
1836 if (SLJIT_UNLIKELY(exp)) \
1839 struct regex_machine
* regex_compile(const regex_char_t
*regex_string
, int length
, int re_flags
, int *error
)
1841 struct compiler_common compiler_common
;
1843 int error_code
, done
, suggest_fast_forward
;
1844 /* ID of an empty match (-1 if not reachable). */
1847 struct sljit_jump
*jump
;
1848 struct sljit_jump
*best_match_found_jump
;
1849 struct sljit_jump
*fast_forward_jump
= NULL
;
1850 struct sljit_jump
*length_is_zero_jump
;
1851 struct sljit_jump
*end_check_jump
= NULL
;
1852 struct sljit_jump
*best_match_check_jump
= NULL
;
1853 struct sljit_jump
*non_greedy_end_jump
= NULL
;
1854 struct sljit_label
*label
;
1855 struct sljit_label
*end_check_label
= NULL
;
1856 struct sljit_label
*start_label
;
1857 struct sljit_label
*fast_forward_label
;
1858 struct sljit_label
*fast_forward_return_label
;
1861 *error
= REGEX_NO_ERROR
;
1862 #ifdef REGEX_MATCH_VERBOSE
1863 compiler_common
.flags
= re_flags
& (REGEX_MATCH_BEGIN
| REGEX_MATCH_END
| REGEX_MATCH_NON_GREEDY
| REGEX_NEWLINE
| REGEX_MATCH_VERBOSE
);
1865 compiler_common
.flags
= re_flags
& (REGEX_MATCH_BEGIN
| REGEX_MATCH_END
| REGEX_MATCH_NON_GREEDY
| REGEX_NEWLINE
);
1868 /* Step 1: parsing (Left->Right).
1869 Syntax check and AST generator. */
1870 error_code
= parse(regex_string
, length
, &compiler_common
);
1872 stack_destroy(&compiler_common
.stack
);
1874 *error
= error_code
;
1878 /* Step 2: generating branches (Right->Left). */
1879 error_code
= generate_transitions(&compiler_common
);
1880 stack_destroy(&compiler_common
.stack
);
1881 stack_destroy(&compiler_common
.depth
);
1883 if (compiler_common
.dfa_transitions
)
1884 SLJIT_FREE(compiler_common
.dfa_transitions
, NULL
);
1886 *error
= error_code
;
1890 /* Step 3: Generate necessary data for depth-first search (Left->Right). */
1891 error_code
= generate_search_states(&compiler_common
);
1893 SLJIT_FREE(compiler_common
.dfa_transitions
, NULL
);
1895 *error
= error_code
;
1899 #ifdef REGEX_MATCH_VERBOSE
1900 if (compiler_common
.flags
& REGEX_MATCH_VERBOSE
)
1901 verbose_transitions(&compiler_common
);
1904 /* Step 4: Left->Right generate code. */
1905 stack_init(&compiler_common
.stack
);
1906 stack_init(&compiler_common
.depth
);
1908 compiler_common
.machine
= NULL
;
1909 compiler_common
.compiler
= NULL
;
1910 compiler_common
.range_jump_list
= NULL
;
1914 compiler_common
.machine
= (struct regex_machine
*)SLJIT_MALLOC(sizeof(struct regex_machine
) + (sljit_uw
)(compiler_common
.terms_size
- 1) * sizeof(sljit_uw
), NULL
);
1915 CHECK(!compiler_common
.machine
);
1917 compiler_common
.compiler
= sljit_create_compiler(NULL
, NULL
);
1918 CHECK(!compiler_common
.compiler
);
1920 if (compiler_common
.longest_range_size
> 0) {
1921 compiler_common
.range_jump_list
= (struct sljit_jump
**)SLJIT_MALLOC(sizeof(struct sljit_jump
*) * (sljit_uw
)compiler_common
.longest_range_size
, NULL
);
1922 CHECK(!compiler_common
.range_jump_list
);
1925 if ((compiler_common
.flags
& REGEX_ID_CHECK
) && !(compiler_common
.flags
& REGEX_MATCH_BEGIN
))
1926 compiler_common
.no_states
= 4;
1927 else if (!(compiler_common
.flags
& REGEX_ID_CHECK
) && (compiler_common
.flags
& REGEX_MATCH_BEGIN
))
1928 compiler_common
.no_states
= 2;
1930 compiler_common
.no_states
= 3;
1932 compiler_common
.machine
->flags
= compiler_common
.flags
;
1933 compiler_common
.machine
->no_states
= compiler_common
.no_states
;
1934 compiler_common
.machine
->size
= compiler_common
.machine
->no_states
* compiler_common
.terms_size
;
1936 /* Study the regular expression. */
1937 empty_match_id
= -1;
1938 suggest_fast_forward
= 1;
1939 if (!(compiler_common
.flags
& REGEX_FAKE_MATCH_BEGIN
)) {
1940 CHECK(trace_transitions(0, &compiler_common
));
1941 while (compiler_common
.stack
.count
> 0) {
1942 ind
= stack_pop(&compiler_common
.stack
)->value
;
1943 if (compiler_common
.search_states
[ind
].type
== 0) {
1944 SLJIT_ASSERT(compiler_common
.dfa_transitions
[ind
].type
== type_end
);
1945 suggest_fast_forward
= 0;
1946 empty_match_id
= compiler_common
.search_states
[ind
].value
;
1948 else if (compiler_common
.search_states
[ind
].type
> 0) {
1949 SLJIT_ASSERT(compiler_common
.dfa_transitions
[ind
].type
!= type_end
);
1950 if (compiler_common
.dfa_transitions
[ind
].type
== type_rng_start
&& compiler_common
.dfa_transitions
[ind
].value
)
1951 suggest_fast_forward
= 0;
1953 compiler_common
.search_states
[ind
].value
= -1;
1957 SLJIT_ASSERT(compiler_common
.dfa_transitions
[1].type
== type_newline
);
1958 CHECK(trace_transitions(1, &compiler_common
));
1959 while (compiler_common
.stack
.count
> 0) {
1960 ind
= stack_pop(&compiler_common
.stack
)->value
;
1961 if (compiler_common
.search_states
[ind
].type
== 0) {
1962 SLJIT_ASSERT(compiler_common
.dfa_transitions
[ind
].type
== type_end
);
1963 suggest_fast_forward
= 0;
1964 empty_match_id
= compiler_common
.search_states
[ind
].value
;
1966 compiler_common
.search_states
[ind
].value
= -1;
1970 /* Step 4.1: Generate entry. */
1971 CHECK(sljit_emit_enter(compiler_common
.compiler
, 0, SLJIT_ARGS3(VOID
, P
, P
, 32), 5, 5, 0, 0, 0));
1973 /* Copy arguments to their place. */
1974 EMIT_OP1(SLJIT_MOV
, R_REGEX_MATCH
, 0, SLJIT_S0
, 0);
1975 EMIT_OP1(SLJIT_MOV
, R_STRING
, 0, SLJIT_S1
, 0);
1976 EMIT_OP2(SLJIT_ADD
, R_LENGTH
, 0, SLJIT_S2
, 0, SLJIT_IMM
, 1);
1978 /* Init global registers. */
1979 EMIT_OP1(SLJIT_MOV
, R_CURR_STATE
, 0, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, current
));
1980 EMIT_OP1(SLJIT_MOV
, R_NEXT_STATE
, 0, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, next
));
1981 EMIT_OP1(SLJIT_MOV
, R_NEXT_HEAD
, 0, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, head
));
1982 EMIT_OP1(SLJIT_MOV
, R_BEST_BEGIN
, 0, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, best_begin
));
1983 EMIT_OP1(SLJIT_MOV
, R_CURR_INDEX
, 0, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, index
));
1985 /* Check whether the best match has already found in a previous frame. */
1986 EMIT_CMP(jump
, SLJIT_EQUAL
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, fast_quit
), SLJIT_IMM
, 0);
1987 EMIT_JUMP(best_match_found_jump
, SLJIT_JUMP
);
1989 #ifdef REGEX_MATCH_VERBOSE
1990 if (compiler_common
.flags
& REGEX_MATCH_VERBOSE
)
1991 printf("\n-----------------\nTrace\n-----------------\n");
1994 /* Step 4.2: Generate code for state 0. */
1996 sljit_emit_op0(compiler_common
.compiler
, SLJIT_ENDBR
);
1997 compiler_common
.machine
->entry_addrs
[0] = (sljit_uw
)label
;
1999 /* Swapping current and next. */
2000 EMIT_OP1(SLJIT_MOV
, R_TEMP
, 0, R_CURR_STATE
, 0);
2001 EMIT_OP1(SLJIT_MOV
, R_CURR_STATE
, 0, R_NEXT_STATE
, 0);
2002 EMIT_OP1(SLJIT_MOV
, R_NEXT_STATE
, 0, R_TEMP
, 0);
2004 /* Checking whether the best case needs to be updated. */
2005 if (!(compiler_common
.flags
& REGEX_MATCH_END
)) {
2006 EMIT_CMP(end_check_jump
, SLJIT_NOT_EQUAL
, SLJIT_MEM1(R_CURR_STATE
), TERM_REL_OFFSET_OF(0, 1), SLJIT_IMM
, -1);
2007 EMIT_LABEL(end_check_label
);
2009 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_NEXT_STATE
), TERM_REL_OFFSET_OF(0, 1), SLJIT_IMM
, -1);
2010 EMIT_OP2(SLJIT_ADD
, R_CURR_INDEX
, 0, R_CURR_INDEX
, 0, SLJIT_IMM
, 1);
2012 /* Checking whether best case has already found. */
2013 if (!(compiler_common
.flags
& REGEX_MATCH_END
) || (compiler_common
.flags
& REGEX_MATCH_BEGIN
)) {
2014 if (!(compiler_common
.flags
& REGEX_MATCH_BEGIN
)) {
2015 /* We can bail out if no more active states remain and R_BEST_BEGIN != -1. */
2016 EMIT_CMP(best_match_check_jump
, SLJIT_NOT_EQUAL
, R_BEST_BEGIN
, 0, SLJIT_IMM
, -1);
2019 /* We can bail out if no more active states remain (regardless of R_BEST_BEGIN). */
2020 EMIT_CMP(best_match_check_jump
, SLJIT_EQUAL
, R_NEXT_HEAD
, 0, SLJIT_IMM
, 0);
2024 EMIT_LABEL(start_label
);
2025 sljit_set_label(jump
, start_label
);
2027 if (!(compiler_common
.flags
& REGEX_MATCH_BEGIN
) && suggest_fast_forward
) {
2028 EMIT_CMP(fast_forward_jump
, SLJIT_NOT_EQUAL
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, fast_forward
), SLJIT_IMM
, 0);
2031 /* Loading the next character. */
2032 EMIT_OP2(SLJIT_SUB
| SLJIT_SET_Z
, R_LENGTH
, 0, R_LENGTH
, 0, SLJIT_IMM
, 1);
2033 EMIT_JUMP(length_is_zero_jump
, SLJIT_EQUAL
);
2035 EMIT_OP1(SLJIT_MOV
, R_TEMP
, 0, R_STRING
, 0);
2036 #ifdef REGEX_USE_8BIT_CHARS
2037 EMIT_OP1(SLJIT_MOV_U8
, R_CURR_CHAR
, 0, SLJIT_MEM1(R_TEMP
), 0);
2038 EMIT_OP2(SLJIT_ADD
, R_TEMP
, 0, R_TEMP
, 0, SLJIT_IMM
, 1);
2040 EMIT_OP1(SLJIT_MOV_UH
, R_CURR_CHAR
, 0, SLJIT_MEM1(R_TEMP
), 0);
2041 EMIT_OP2(SLJIT_ADD
, R_TEMP
, 0, R_TEMP
, 0, SLJIT_IMM
, 2);
2043 EMIT_OP1(SLJIT_MOV
, R_STRING
, 0, R_TEMP
, 0);
2045 #ifdef REGEX_MATCH_VERBOSE
2046 if (compiler_common
.flags
& REGEX_MATCH_VERBOSE
) {
2047 printf("(%3d): ", 0);
2048 CHECK(trace_transitions(0, &compiler_common
));
2049 while (compiler_common
.stack
.count
> 0) {
2050 ind
= stack_pop(&compiler_common
.stack
)->value
;
2051 if (compiler_common
.search_states
[ind
].type
>= 0)
2052 printf("-> (%3d:%3d) ", compiler_common
.search_states
[ind
].type
, compiler_common
.search_states
[ind
].value
);
2053 compiler_common
.search_states
[ind
].value
= -1;
2059 EMIT_LABEL(fast_forward_return_label
);
2060 if (!(compiler_common
.flags
& REGEX_MATCH_BEGIN
)) {
2061 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, fast_forward
), SLJIT_IMM
, 1);
2062 if (!(compiler_common
.flags
& REGEX_MATCH_END
)) {
2063 EMIT_CMP(jump
, SLJIT_NOT_EQUAL
, R_BEST_BEGIN
, 0, SLJIT_IMM
, -1);
2066 EMIT_OP1(SLJIT_MOV
, R_TEMP
, 0, R_CURR_INDEX
, 0);
2067 CHECK(compile_uncond_tran(&compiler_common
, R_NEXT_STATE
));
2068 /* And branching to the first state. */
2069 CHECK(sljit_emit_ijump(compiler_common
.compiler
, SLJIT_JUMP
, SLJIT_MEM2(R_CURR_STATE
, R_TEMP
), 0));
2071 if (!(compiler_common
.flags
& REGEX_MATCH_END
)) {
2073 sljit_set_label(jump
, label
);
2076 /* This is the case where we only have to reset the R_NEXT_HEAD. */
2077 EMIT_OP1(SLJIT_MOV
, R_TEMP
, 0, R_NEXT_HEAD
, 0);
2078 EMIT_OP1(SLJIT_MOV
, R_NEXT_HEAD
, 0, SLJIT_IMM
, 0);
2079 CHECK(sljit_emit_ijump(compiler_common
.compiler
, SLJIT_JUMP
, SLJIT_MEM2(R_CURR_STATE
, R_TEMP
), 0));
2081 /* Fast-forward loop. */
2082 if (fast_forward_jump
) {
2083 /* Quit from fast-forward loop. */
2084 EMIT_LABEL(fast_forward_label
);
2085 EMIT_OP2(SLJIT_SUB
, R_TEMP
, 0, R_NEXT_HEAD
, 0, SLJIT_IMM
, 1);
2086 EMIT_OP1(SLJIT_MOV
, R_LENGTH
, 0, R_NEXT_STATE
, 0);
2087 EMIT_OP1(SLJIT_MOV
, R_STRING
, 0, R_CURR_STATE
, 0);
2088 EMIT_OP1(SLJIT_MOV
, R_CURR_INDEX
, 0, R_NEXT_HEAD
, 0);
2089 EMIT_OP1(SLJIT_MOV
, R_NEXT_STATE
, 0, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, next
));
2090 EMIT_OP1(SLJIT_MOV
, R_CURR_STATE
, 0, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, current
));
2091 EMIT_OP1(SLJIT_MOV
, R_NEXT_HEAD
, 0, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, head
));
2093 /* Update the start field of the locations. */
2094 CHECK(trace_transitions(0, &compiler_common
));
2095 while (compiler_common
.stack
.count
> 0) {
2096 ind
= stack_pop(&compiler_common
.stack
)->value
;
2097 if (compiler_common
.search_states
[ind
].type
>= 0) {
2098 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_CURR_STATE
), TERM_OFFSET_OF(compiler_common
.search_states
[ind
].type
, 2), R_TEMP
, 0);
2100 compiler_common
.search_states
[ind
].value
= -1;
2102 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, fast_forward
), SLJIT_IMM
, 0);
2103 EMIT_JUMP(jump
, SLJIT_JUMP
);
2104 sljit_set_label(jump
, fast_forward_return_label
);
2106 /* Start fast-forward. */
2108 sljit_set_label(fast_forward_jump
, label
);
2110 /* Moving everything to registers. */
2111 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, next
), R_NEXT_STATE
, 0);
2112 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, current
), R_CURR_STATE
, 0);
2113 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, head
), R_NEXT_HEAD
, 0);
2114 EMIT_OP1(SLJIT_MOV
, R_NEXT_STATE
, 0, R_LENGTH
, 0);
2115 EMIT_OP1(SLJIT_MOV
, R_CURR_STATE
, 0, R_STRING
, 0);
2116 EMIT_OP1(SLJIT_MOV
, R_NEXT_HEAD
, 0, R_CURR_INDEX
, 0);
2118 /* Fast forward mainloop. */
2120 EMIT_OP2(SLJIT_SUB
| SLJIT_SET_Z
, R_NEXT_STATE
, 0, R_NEXT_STATE
, 0, SLJIT_IMM
, 1);
2121 EMIT_JUMP(fast_forward_jump
, SLJIT_EQUAL
);
2123 #ifdef REGEX_USE_8BIT_CHARS
2124 EMIT_OP1(SLJIT_MOV_U8
, R_CURR_CHAR
, 0, SLJIT_MEM1(R_CURR_STATE
), 0);
2125 EMIT_OP2(SLJIT_ADD
, R_CURR_STATE
, 0, R_CURR_STATE
, 0, SLJIT_IMM
, 1);
2127 EMIT_OP1(SLJIT_MOV_UH
, R_CURR_CHAR
, 0, SLJIT_MEM1(R_CURR_STATE
), 0);
2128 EMIT_OP2(SLJIT_ADD
, R_CURR_STATE
, 0, R_CURR_STATE
, 0, SLJIT_IMM
, 2);
2131 CHECK(trace_transitions(0, &compiler_common
));
2132 CHECK(compile_leave_fast_forward(&compiler_common
, fast_forward_label
));
2134 EMIT_OP2(SLJIT_ADD
, R_NEXT_HEAD
, 0, R_NEXT_HEAD
, 0, SLJIT_IMM
, 1);
2135 EMIT_JUMP(jump
, SLJIT_JUMP
);
2136 sljit_set_label(jump
, label
);
2138 /* String is finished. */
2140 sljit_set_label(fast_forward_jump
, label
);
2141 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, index
), R_NEXT_HEAD
, 0);
2142 EMIT_JUMP(fast_forward_jump
, SLJIT_JUMP
);
2146 if (end_check_jump
) {
2148 sljit_set_label(end_check_jump
, label
);
2150 if (!(compiler_common
.flags
& REGEX_MATCH_NON_GREEDY
) || !(compiler_common
.flags
& REGEX_MATCH_BEGIN
)) {
2151 CHECK(compile_end_check(&compiler_common
, end_check_label
));
2154 /* Since we leave, we do not need to update the R_BEST_BEGIN. */
2155 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, best_begin
), SLJIT_IMM
, 0);
2156 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, best_end
), R_CURR_INDEX
, 0);
2157 if (compiler_common
.flags
& REGEX_ID_CHECK
) {
2158 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, best_id
), SLJIT_MEM1(R_CURR_STATE
), TERM_REL_OFFSET_OF(0, 2));
2160 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, fast_quit
), SLJIT_IMM
, 1);
2161 EMIT_JUMP(non_greedy_end_jump
, SLJIT_JUMP
);
2166 if (best_match_check_jump
) {
2168 sljit_set_label(best_match_check_jump
, label
);
2170 if (!(compiler_common
.flags
& REGEX_MATCH_BEGIN
)) {
2171 EMIT_CMP(jump
, SLJIT_NOT_EQUAL
, R_NEXT_HEAD
, 0, SLJIT_IMM
, 0);
2172 sljit_set_label(jump
, start_label
);
2174 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, fast_quit
), SLJIT_IMM
, 1);
2177 /* Leaving matching and storing the necessary values. */
2179 sljit_set_label(length_is_zero_jump
, label
);
2180 if (non_greedy_end_jump
)
2181 sljit_set_label(non_greedy_end_jump
, label
);
2183 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, index
), R_CURR_INDEX
, 0);
2184 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, head
), R_NEXT_HEAD
, 0);
2185 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, next
), R_NEXT_STATE
, 0);
2186 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_REGEX_MATCH
), SLJIT_OFFSETOF(struct regex_match
, current
), R_CURR_STATE
, 0);
2188 /* Exit from JIT. */
2190 sljit_set_label(best_match_found_jump
, label
);
2191 if (fast_forward_jump
)
2192 sljit_set_label(fast_forward_jump
, label
);
2193 CHECK(sljit_emit_return_void(compiler_common
.compiler
));
2195 for (ind
= 1; ind
< (sljit_sw
)compiler_common
.dfa_size
- 1; ind
++) {
2196 if (compiler_common
.search_states
[ind
].type
>= 0) {
2197 SLJIT_ASSERT(compiler_common
.search_states
[ind
].type
< compiler_common
.terms_size
);
2199 sljit_emit_op0(compiler_common
.compiler
, SLJIT_ENDBR
);
2200 compiler_common
.machine
->entry_addrs
[compiler_common
.search_states
[ind
].type
] = (sljit_uw
)label
;
2202 if (compiler_common
.dfa_transitions
[ind
].type
== type_char
) {
2203 EMIT_CMP(jump
, SLJIT_NOT_EQUAL
, R_CURR_CHAR
, 0, SLJIT_IMM
, compiler_common
.dfa_transitions
[ind
].value
);
2205 else if (compiler_common
.dfa_transitions
[ind
].type
== type_rng_start
) {
2206 ind
= compile_range_check(&compiler_common
, ind
);
2210 SLJIT_ASSERT(compiler_common
.dfa_transitions
[ind
].type
== type_newline
);
2211 CHECK(compile_newline_check(&compiler_common
, ind
));
2214 CHECK(trace_transitions((int)ind
, &compiler_common
));
2215 #ifdef REGEX_MATCH_VERBOSE
2216 if (compiler_common
.flags
& REGEX_MATCH_VERBOSE
)
2217 printf("(%3d): ", compiler_common
.search_states
[ind
].type
);
2219 CHECK(compile_cond_tran(&compiler_common
, compiler_common
.search_states
[ind
].type
));
2221 if (compiler_common
.dfa_transitions
[ind
].type
== type_char
) {
2223 sljit_set_label(jump
, label
);
2225 else if (compiler_common
.dfa_transitions
[ind
].type
== type_rng_end
) {
2227 range_set_label(compiler_common
.range_jump_list
, label
);
2230 SLJIT_ASSERT(compiler_common
.dfa_transitions
[ind
].type
== type_newline
);
2233 /* Branch to the next item in the list. */
2234 EMIT_OP1(SLJIT_MOV
, R_TEMP
, 0, SLJIT_MEM1(R_CURR_STATE
), TERM_OFFSET_OF(compiler_common
.search_states
[ind
].type
, 1));
2235 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(R_CURR_STATE
), TERM_OFFSET_OF(compiler_common
.search_states
[ind
].type
, 1), SLJIT_IMM
, -1);
2236 CHECK(sljit_emit_ijump(compiler_common
.compiler
, SLJIT_JUMP
, SLJIT_MEM2(R_CURR_STATE
, R_TEMP
), 0));
2240 if (ind
== (sljit_sw
)compiler_common
.dfa_size
- 1) {
2241 /* Generate an init stub function. */
2243 CHECK(sljit_emit_enter(compiler_common
.compiler
, 0, SLJIT_ARGS2(W
, P
, P
), 3, 3, 0, 0, 0));
2245 if (empty_match_id
== -1) {
2246 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_S1
), SLJIT_OFFSETOF(struct regex_match
, best_begin
), SLJIT_IMM
, -1);
2247 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_S1
), SLJIT_OFFSETOF(struct regex_match
, best_id
), SLJIT_IMM
, 0);
2250 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_S1
), SLJIT_OFFSETOF(struct regex_match
, best_begin
), SLJIT_IMM
, 0);
2251 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_S1
), SLJIT_OFFSETOF(struct regex_match
, best_id
), SLJIT_IMM
, empty_match_id
);
2254 EMIT_OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_S1
), SLJIT_OFFSETOF(struct regex_match
, index
), SLJIT_IMM
, !(compiler_common
.flags
& REGEX_FAKE_MATCH_BEGIN
) ? 1 : 2);
2256 if (!(compiler_common
.flags
& REGEX_MATCH_NON_GREEDY
) || empty_match_id
== -1) {
2257 /* The else is a really rare event, so we still generate an empty function instead of a runtime pointer check. */
2258 SLJIT_ASSERT(R_CURR_STATE
== SLJIT_S0
);
2259 if (!(compiler_common
.flags
& REGEX_MATCH_BEGIN
)) {
2260 /* R_CURR_INDEX (put to R_TEMP) is zero. */
2261 EMIT_OP1(SLJIT_MOV
, R_TEMP
, 0, SLJIT_IMM
, 0);
2263 CHECK(compile_uncond_tran(&compiler_common
, R_CURR_STATE
));
2266 EMIT_OP1(SLJIT_MOV
, R_NEXT_HEAD
, 0, SLJIT_IMM
, 0);
2268 CHECK(sljit_emit_return(compiler_common
.compiler
, SLJIT_MOV
, R_NEXT_HEAD
, 0));
2270 compiler_common
.machine
->continue_match
= sljit_generate_code(compiler_common
.compiler
);
2271 #ifndef SLJIT_INDIRECT_CALL
2272 compiler_common
.machine
->u
.init_match
= (void*)(sljit_sw
)sljit_get_label_addr(label
);
2274 sljit_set_function_context(&compiler_common
.machine
->u
.init_match
, &compiler_common
.machine
->context
, sljit_get_label_addr(label
), regex_compile
);
2276 #ifdef REGEX_MATCH_VERBOSE
2277 if (compiler_common
.flags
& REGEX_MATCH_VERBOSE
)
2278 printf("Continue match: %p Init match: %p\n\n", compiler_common
.machine
->continue_match
, compiler_common
.machine
->u
.init_match
);
2280 if (compiler_common
.machine
->continue_match
) {
2281 for (ind
= 0; ind
< compiler_common
.terms_size
; ++ind
)
2282 compiler_common
.machine
->entry_addrs
[ind
] = sljit_get_label_addr((struct sljit_label
*)compiler_common
.machine
->entry_addrs
[ind
]);
2288 stack_destroy(&compiler_common
.stack
);
2289 stack_destroy(&compiler_common
.depth
);
2290 SLJIT_FREE(compiler_common
.dfa_transitions
, NULL
);
2291 SLJIT_FREE(compiler_common
.search_states
, NULL
);
2292 if (compiler_common
.range_jump_list
)
2293 SLJIT_FREE(compiler_common
.range_jump_list
, NULL
);
2294 if (compiler_common
.compiler
)
2295 sljit_free_compiler(compiler_common
.compiler
);
2297 return compiler_common
.machine
;
2299 if (compiler_common
.machine
) {
2300 SLJIT_FREE(compiler_common
.machine
, NULL
);
2303 *error
= REGEX_MEMORY_ERROR
;
2307 #undef TERM_OFFSET_OF
2317 void regex_free_machine(struct regex_machine
*machine
)
2319 sljit_free_code(machine
->continue_match
, NULL
);
2320 SLJIT_FREE(machine
, NULL
);
2323 const char* regex_get_platform_name(void)
2325 return sljit_get_platform_name();
2328 /* --------------------------------------------------------------------- */
2329 /* Mathching utilities */
2330 /* --------------------------------------------------------------------- */
2332 struct regex_match
* regex_begin_match(struct regex_machine
*machine
)
2337 sljit_sw
*entry_addrs
;
2339 struct regex_match
*match
= (struct regex_match
*)SLJIT_MALLOC(sizeof(struct regex_match
) + (sljit_uw
)(machine
->size
* 2 - 1) * sizeof(sljit_sw
), NULL
);
2343 ptr1
= match
->states
;
2344 ptr2
= match
->states
+ machine
->size
;
2346 entry_addrs
= (sljit_sw
*)machine
->entry_addrs
;
2348 match
->current
= ptr1
;
2351 match
->machine
= machine
;
2353 /* Init machine states. */
2354 switch (machine
->no_states
) {
2356 while (ptr1
< end
) {
2357 *ptr1
++ = *entry_addrs
;
2358 *ptr2
++ = *entry_addrs
++;
2365 while (ptr1
< end
) {
2366 *ptr1
++ = *entry_addrs
;
2367 *ptr2
++ = *entry_addrs
++;
2376 while (ptr1
< end
) {
2377 *ptr1
++ = *entry_addrs
;
2378 *ptr2
++ = *entry_addrs
++;
2389 SLJIT_UNREACHABLE();
2393 SLJIT_ASSERT(ptr1
== end
);
2395 match
->u
.continue_match
= machine
->continue_match
;
2397 regex_reset_match(match
);
2401 void regex_reset_match(struct regex_match
*match
)
2403 struct regex_machine
*machine
= match
->machine
;
2404 sljit_sw current
, ind
;
2405 sljit_sw
*current_ptr
;
2407 match
->best_end
= 0;
2408 match
->fast_quit
= 0;
2409 match
->fast_forward
= 0;
2411 if (match
->head
!= 0) {
2412 /* Clear the current state. */
2413 current
= match
->head
;
2414 current_ptr
= match
->current
;
2416 ind
= (current
/ (sljit_sw
)sizeof(sljit_sw
)) + 1;
2417 current
= current_ptr
[ind
];
2418 current_ptr
[ind
] = -1;
2419 } while (current
!= 0);
2421 match
->head
= machine
->u
.call_init(match
->current
, match
);
2424 void regex_free_match(struct regex_match
*match
)
2426 SLJIT_FREE(match
, NULL
);
2429 void regex_continue_match(struct regex_match
*match
, const regex_char_t
*input_string
, int length
)
2431 match
->u
.call_continue(match
, input_string
, length
);
2434 int regex_get_result(struct regex_match
*match
, int *end
, int *id
)
2436 int flags
= match
->machine
->flags
;
2439 *end
= (int)match
->best_end
;
2440 *id
= (int)match
->best_id
;
2441 if (!(flags
& (REGEX_MATCH_END
| REGEX_FAKE_MATCH_END
)))
2442 return (int)match
->best_begin
;
2444 if (flags
& REGEX_FAKE_MATCH_END
) {
2445 SLJIT_ASSERT(!(flags
& (REGEX_MATCH_BEGIN
| REGEX_MATCH_END
)));
2446 if (match
->best_begin
!= -1)
2447 return (int)match
->best_begin
;
2449 no_states
= match
->machine
->no_states
;
2450 if (match
->current
[no_states
+ 1] == -1)
2452 if (flags
& REGEX_ID_CHECK
)
2453 *id
= (int)match
->current
[no_states
+ 3];
2454 if (!(flags
& REGEX_FAKE_MATCH_BEGIN
))
2455 *end
= (int)match
->index
- 1;
2457 *end
= (int)match
->index
- 2;
2458 return (int)match
->current
[no_states
+ 2];
2461 /* Check the status of the last code. */
2462 if (!(flags
& REGEX_MATCH_BEGIN
)) {
2463 /* No shortcut in this case. */
2464 if (!(flags
& REGEX_ID_CHECK
)) {
2465 if (match
->current
[1] == -1)
2467 *end
= (int)match
->index
- 1;
2468 return (int)match
->current
[2];
2471 if (match
->current
[1] == -1)
2473 *end
= (int)match
->index
- 1;
2474 *id
= (int)match
->current
[3];
2475 return (int)match
->current
[2];
2478 /* Shortcut is possible in this case. */
2479 if (!(flags
& REGEX_ID_CHECK
)) {
2480 if (match
->current
[1] == -1 || match
->head
== -1)
2482 *end
= (int)match
->index
- 1;
2486 if (match
->current
[1] == -1 || match
->head
== -1)
2488 *end
= (int)match
->index
- 1;
2489 *id
= (int)match
->current
[2];
2494 int regex_is_match_finished(struct regex_match
*match
)
2496 return (int)match
->fast_quit
;
2499 #ifdef REGEX_MATCH_VERBOSE
2500 void regex_continue_match_debug(struct regex_match
*match
, const regex_char_t
*input_string
, int length
)
2505 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2508 sljit_sw no_states
= match
->machine
->no_states
;
2509 sljit_sw len
= match
->machine
->size
;
2511 while (length
> 0) {
2512 match
->u
.call_continue(match
, input_string
, 1);
2514 if (match
->fast_forward
) {
2515 if (match
->machine
->flags
& REGEX_MATCH_VERBOSE
)
2516 printf("fast forward\n");
2519 /* Verbose (first). */
2520 if (match
->machine
->flags
& REGEX_MATCH_VERBOSE
) {
2521 ptr
= match
->current
;
2524 printf("'%c' (%3ld->%3ld [%3ld]) ", *input_string
, (long)match
->best_begin
, (long)match
->best_end
, (long)match
->best_id
);
2526 printf("[%3ld:", (long)count
++);
2527 switch (no_states
) {
2537 printf("+,%3ld] ", (long)ptr
[2]);
2544 printf("+,%3ld,%3ld] ", (long)ptr
[2], (long)ptr
[3]);
2546 printf(" ,XXX,XXX] ");
2554 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2555 /* Sanity check (later). */
2559 SLJIT_ASSERT(ptr
[1] == -1);
2563 /* Check number of active elements. */
2564 ptr
= match
->current
+ no_states
;
2565 end
= ptr
+ len
- no_states
;
2573 /* Check chain list. */
2574 current
= match
->head
;
2575 ptr
= match
->current
;
2576 while (current
!= 0) {
2577 SLJIT_ASSERT(current
>= 0 && current
< len
* (sljit_sw
)sizeof(sljit_sw
));
2578 SLJIT_ASSERT((current
% (no_states
* (sljit_sw
)sizeof(sljit_sw
))) == 0);
2579 SLJIT_ASSERT(count
> 0);
2580 current
= ptr
[(current
/ (sljit_sw
)sizeof(sljit_sw
)) + 1];
2583 SLJIT_ASSERT(count
== 0);
2586 if (match
->fast_quit
) {
2587 /* the machine has stopped working. */
2588 if (match
->machine
->flags
& REGEX_MATCH_VERBOSE
)
2589 printf("Best match has found\n");