Add some more cases to the app-id unit tests
[glib.git] / glib / pcre / pcre_jit_compile.c
blobacb7ea22a13ac0b4e938c27a5c09c54c82592104
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
47 #include "pcre_internal.h"
49 #ifdef SUPPORT_JIT
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
62 #include "sljit/sljitLir.c"
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
68 /* Allocate memory on the stack. Fast, but limited size. */
69 #define LOCAL_SPACE_SIZE 32768
71 #define STACK_GROWTH_RATE 8192
73 /* Enable to check that the allocation could destroy temporaries. */
74 #if defined SLJIT_DEBUG && SLJIT_DEBUG
75 #define DESTROY_REGISTERS 1
76 #endif
79 Short summary about the backtracking mechanism empolyed by the jit code generator:
81 The code generator follows the recursive nature of the PERL compatible regular
82 expressions. The basic blocks of regular expressions are condition checkers
83 whose execute different commands depending on the result of the condition check.
84 The relationship between the operators can be horizontal (concatenation) and
85 vertical (sub-expression) (See struct backtrack_common for more details).
87 'ab' - 'a' and 'b' regexps are concatenated
88 'a+' - 'a' is the sub-expression of the '+' operator
90 The condition checkers are boolean (true/false) checkers. Machine code is generated
91 for the checker itself and for the actions depending on the result of the checker.
92 The 'true' case is called as the try path (expected path), and the other is called as
93 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
94 branches on the try path.
96 Greedy star operator (*) :
97 Try path: match happens.
98 Backtrack path: match failed.
99 Non-greedy star operator (*?) :
100 Try path: no need to perform a match.
101 Backtrack path: match is required.
103 The following example shows how the code generated for a capturing bracket
104 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
105 we have the following regular expression:
107 A(B|C)D
109 The generated code will be the following:
111 A try path
112 '(' try path (pushing arguments to the stack)
113 B try path
114 ')' try path (pushing arguments to the stack)
115 D try path
116 return with successful match
118 D backtrack path
119 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
120 B backtrack path
121 C expected path
122 jump to D try path
123 C backtrack path
124 A backtrack path
126 Notice, that the order of backtrack code paths are the opposite of the fast
127 code paths. In this way the topmost value on the stack is always belong
128 to the current backtrack code path. The backtrack path must check
129 whether there is a next alternative. If so, it needs to jump back to
130 the try path eventually. Otherwise it needs to clear out its own stack
131 frame and continue the execution on the backtrack code paths.
135 Saved stack frames:
137 Atomic blocks and asserts require reloading the values of local variables
138 when the backtrack mechanism performed. Because of OP_RECURSE, the locals
139 are not necessarly known in compile time, thus we need a dynamic restore
140 mechanism.
142 The stack frames are stored in a chain list, and have the following format:
143 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
145 Thus we can restore the locals to a particular point in the stack.
148 typedef struct jit_arguments {
149 /* Pointers first. */
150 struct sljit_stack *stack;
151 const pcre_uchar *str;
152 const pcre_uchar *begin;
153 const pcre_uchar *end;
154 int *offsets;
155 pcre_uchar *uchar_ptr;
156 pcre_uchar *mark_ptr;
157 /* Everything else after. */
158 int offsetcount;
159 int calllimit;
160 pcre_uint8 notbol;
161 pcre_uint8 noteol;
162 pcre_uint8 notempty;
163 pcre_uint8 notempty_atstart;
164 } jit_arguments;
166 typedef struct executable_functions {
167 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
168 PUBL(jit_callback) callback;
169 void *userdata;
170 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
171 } executable_functions;
173 typedef struct jump_list {
174 struct sljit_jump *jump;
175 struct jump_list *next;
176 } jump_list;
178 enum stub_types { stack_alloc };
180 typedef struct stub_list {
181 enum stub_types type;
182 int data;
183 struct sljit_jump *start;
184 struct sljit_label *leave;
185 struct stub_list *next;
186 } stub_list;
188 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
190 /* The following structure is the key data type for the recursive
191 code generator. It is allocated by compile_trypath, and contains
192 the aguments for compile_backtrackpath. Must be the first member
193 of its descendants. */
194 typedef struct backtrack_common {
195 /* Concatenation stack. */
196 struct backtrack_common *prev;
197 jump_list *nextbacktracks;
198 /* Internal stack (for component operators). */
199 struct backtrack_common *top;
200 jump_list *topbacktracks;
201 /* Opcode pointer. */
202 pcre_uchar *cc;
203 } backtrack_common;
205 typedef struct assert_backtrack {
206 backtrack_common common;
207 jump_list *condfailed;
208 /* Less than 0 (-1) if a frame is not needed. */
209 int framesize;
210 /* Points to our private memory word on the stack. */
211 int localptr;
212 /* For iterators. */
213 struct sljit_label *trypath;
214 } assert_backtrack;
216 typedef struct bracket_backtrack {
217 backtrack_common common;
218 /* Where to coninue if an alternative is successfully matched. */
219 struct sljit_label *alttrypath;
220 /* For rmin and rmax iterators. */
221 struct sljit_label *recursivetrypath;
222 /* For greedy ? operator. */
223 struct sljit_label *zerotrypath;
224 /* Contains the branches of a failed condition. */
225 union {
226 /* Both for OP_COND, OP_SCOND. */
227 jump_list *condfailed;
228 assert_backtrack *assert;
229 /* For OP_ONCE. -1 if not needed. */
230 int framesize;
231 } u;
232 /* Points to our private memory word on the stack. */
233 int localptr;
234 } bracket_backtrack;
236 typedef struct bracketpos_backtrack {
237 backtrack_common common;
238 /* Points to our private memory word on the stack. */
239 int localptr;
240 /* Reverting stack is needed. */
241 int framesize;
242 /* Allocated stack size. */
243 int stacksize;
244 } bracketpos_backtrack;
246 typedef struct braminzero_backtrack {
247 backtrack_common common;
248 struct sljit_label *trypath;
249 } braminzero_backtrack;
251 typedef struct iterator_backtrack {
252 backtrack_common common;
253 /* Next iteration. */
254 struct sljit_label *trypath;
255 } iterator_backtrack;
257 typedef struct recurse_entry {
258 struct recurse_entry *next;
259 /* Contains the function entry. */
260 struct sljit_label *entry;
261 /* Collects the calls until the function is not created. */
262 jump_list *calls;
263 /* Points to the starting opcode. */
264 int start;
265 } recurse_entry;
267 typedef struct recurse_backtrack {
268 backtrack_common common;
269 } recurse_backtrack;
271 typedef struct compiler_common {
272 struct sljit_compiler *compiler;
273 pcre_uchar *start;
275 /* Opcode local area direct map. */
276 int *localptrs;
277 int cbraptr;
278 /* OVector starting point. Must be divisible by 2. */
279 int ovector_start;
280 /* Last known position of the requested byte. */
281 int req_char_ptr;
282 /* Head of the last recursion. */
283 int recursive_head;
284 /* First inspected character for partial matching. */
285 int start_used_ptr;
286 /* Starting pointer for partial soft matches. */
287 int hit_start;
288 /* End pointer of the first line. */
289 int first_line_end;
290 /* Points to the marked string. */
291 int mark_ptr;
293 /* Other */
294 const pcre_uint8 *fcc;
295 sljit_w lcc;
296 int mode;
297 int nltype;
298 int newline;
299 int bsr_nltype;
300 int endonly;
301 BOOL has_set_som;
302 sljit_w ctypes;
303 sljit_uw name_table;
304 sljit_w name_count;
305 sljit_w name_entry_size;
307 /* Labels and jump lists. */
308 struct sljit_label *partialmatchlabel;
309 struct sljit_label *leavelabel;
310 struct sljit_label *acceptlabel;
311 stub_list *stubs;
312 recurse_entry *entries;
313 recurse_entry *currententry;
314 jump_list *partialmatch;
315 jump_list *leave;
316 jump_list *accept;
317 jump_list *calllimit;
318 jump_list *stackalloc;
319 jump_list *revertframes;
320 jump_list *wordboundary;
321 jump_list *anynewline;
322 jump_list *hspace;
323 jump_list *vspace;
324 jump_list *casefulcmp;
325 jump_list *caselesscmp;
326 BOOL jscript_compat;
327 #ifdef SUPPORT_UTF
328 BOOL utf;
329 #ifdef SUPPORT_UCP
330 BOOL use_ucp;
331 #endif
332 jump_list *utfreadchar;
333 #ifdef COMPILE_PCRE8
334 jump_list *utfreadtype8;
335 #endif
336 #endif /* SUPPORT_UTF */
337 #ifdef SUPPORT_UCP
338 jump_list *getunichartype;
339 jump_list *getunichartype_2;
340 jump_list *getunicharscript;
341 #endif
342 } compiler_common;
344 /* For byte_sequence_compare. */
346 typedef struct compare_context {
347 int length;
348 int sourcereg;
349 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
350 int ucharptr;
351 union {
352 sljit_i asint;
353 sljit_uh asushort;
354 #ifdef COMPILE_PCRE8
355 sljit_ub asbyte;
356 sljit_ub asuchars[4];
357 #else
358 #ifdef COMPILE_PCRE16
359 sljit_uh asuchars[2];
360 #endif
361 #endif
362 } c;
363 union {
364 sljit_i asint;
365 sljit_uh asushort;
366 #ifdef COMPILE_PCRE8
367 sljit_ub asbyte;
368 sljit_ub asuchars[4];
369 #else
370 #ifdef COMPILE_PCRE16
371 sljit_uh asuchars[2];
372 #endif
373 #endif
374 } oc;
375 #endif
376 } compare_context;
378 enum {
379 frame_end = 0,
380 frame_setstrbegin = -1,
381 frame_setmark = -2
384 /* Undefine sljit macros. */
385 #undef CMP
387 /* Used for accessing the elements of the stack. */
388 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
390 #define TMP1 SLJIT_TEMPORARY_REG1
391 #define TMP2 SLJIT_TEMPORARY_REG3
392 #define TMP3 SLJIT_TEMPORARY_EREG2
393 #define STR_PTR SLJIT_SAVED_REG1
394 #define STR_END SLJIT_SAVED_REG2
395 #define STACK_TOP SLJIT_TEMPORARY_REG2
396 #define STACK_LIMIT SLJIT_SAVED_REG3
397 #define ARGUMENTS SLJIT_SAVED_EREG1
398 #define CALL_COUNT SLJIT_SAVED_EREG2
399 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
401 /* Locals layout. */
402 /* These two locals can be used by the current opcode. */
403 #define LOCALS0 (0 * sizeof(sljit_w))
404 #define LOCALS1 (1 * sizeof(sljit_w))
405 /* Two local variables for possessive quantifiers (char1 cannot use them). */
406 #define POSSESSIVE0 (2 * sizeof(sljit_w))
407 #define POSSESSIVE1 (3 * sizeof(sljit_w))
408 /* Max limit of recursions. */
409 #define CALL_LIMIT (4 * sizeof(sljit_w))
410 /* The output vector is stored on the stack, and contains pointers
411 to characters. The vector data is divided into two groups: the first
412 group contains the start / end character pointers, and the second is
413 the start pointers when the end of the capturing group has not yet reached. */
414 #define OVECTOR_START (common->ovector_start)
415 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
416 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
417 #define PRIV_DATA(cc) (common->localptrs[(cc) - common->start])
419 #ifdef COMPILE_PCRE8
420 #define MOV_UCHAR SLJIT_MOV_UB
421 #define MOVU_UCHAR SLJIT_MOVU_UB
422 #else
423 #ifdef COMPILE_PCRE16
424 #define MOV_UCHAR SLJIT_MOV_UH
425 #define MOVU_UCHAR SLJIT_MOVU_UH
426 #else
427 #error Unsupported compiling mode
428 #endif
429 #endif
431 /* Shortcuts. */
432 #define DEFINE_COMPILER \
433 struct sljit_compiler *compiler = common->compiler
434 #define OP1(op, dst, dstw, src, srcw) \
435 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
436 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
437 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
438 #define LABEL() \
439 sljit_emit_label(compiler)
440 #define JUMP(type) \
441 sljit_emit_jump(compiler, (type))
442 #define JUMPTO(type, label) \
443 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
444 #define JUMPHERE(jump) \
445 sljit_set_label((jump), sljit_emit_label(compiler))
446 #define CMP(type, src1, src1w, src2, src2w) \
447 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
448 #define CMPTO(type, src1, src1w, src2, src2w, label) \
449 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
450 #define COND_VALUE(op, dst, dstw, type) \
451 sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
452 #define GET_LOCAL_BASE(dst, dstw, offset) \
453 sljit_get_local_base(compiler, (dst), (dstw), (offset))
455 static pcre_uchar* bracketend(pcre_uchar* cc)
457 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
458 do cc += GET(cc, 1); while (*cc == OP_ALT);
459 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
460 cc += 1 + LINK_SIZE;
461 return cc;
464 /* Functions whose might need modification for all new supported opcodes:
465 next_opcode
466 get_localspace
467 set_localptrs
468 get_framesize
469 init_frame
470 get_localsize
471 copy_locals
472 compile_trypath
473 compile_backtrackpath
476 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
478 SLJIT_UNUSED_ARG(common);
479 switch(*cc)
481 case OP_SOD:
482 case OP_SOM:
483 case OP_SET_SOM:
484 case OP_NOT_WORD_BOUNDARY:
485 case OP_WORD_BOUNDARY:
486 case OP_NOT_DIGIT:
487 case OP_DIGIT:
488 case OP_NOT_WHITESPACE:
489 case OP_WHITESPACE:
490 case OP_NOT_WORDCHAR:
491 case OP_WORDCHAR:
492 case OP_ANY:
493 case OP_ALLANY:
494 case OP_ANYNL:
495 case OP_NOT_HSPACE:
496 case OP_HSPACE:
497 case OP_NOT_VSPACE:
498 case OP_VSPACE:
499 case OP_EXTUNI:
500 case OP_EODN:
501 case OP_EOD:
502 case OP_CIRC:
503 case OP_CIRCM:
504 case OP_DOLL:
505 case OP_DOLLM:
506 case OP_TYPESTAR:
507 case OP_TYPEMINSTAR:
508 case OP_TYPEPLUS:
509 case OP_TYPEMINPLUS:
510 case OP_TYPEQUERY:
511 case OP_TYPEMINQUERY:
512 case OP_TYPEPOSSTAR:
513 case OP_TYPEPOSPLUS:
514 case OP_TYPEPOSQUERY:
515 case OP_CRSTAR:
516 case OP_CRMINSTAR:
517 case OP_CRPLUS:
518 case OP_CRMINPLUS:
519 case OP_CRQUERY:
520 case OP_CRMINQUERY:
521 case OP_DEF:
522 case OP_BRAZERO:
523 case OP_BRAMINZERO:
524 case OP_BRAPOSZERO:
525 case OP_COMMIT:
526 case OP_FAIL:
527 case OP_ACCEPT:
528 case OP_ASSERT_ACCEPT:
529 case OP_SKIPZERO:
530 return cc + 1;
532 case OP_ANYBYTE:
533 #ifdef SUPPORT_UTF
534 if (common->utf) return NULL;
535 #endif
536 return cc + 1;
538 case OP_CHAR:
539 case OP_CHARI:
540 case OP_NOT:
541 case OP_NOTI:
542 case OP_STAR:
543 case OP_MINSTAR:
544 case OP_PLUS:
545 case OP_MINPLUS:
546 case OP_QUERY:
547 case OP_MINQUERY:
548 case OP_POSSTAR:
549 case OP_POSPLUS:
550 case OP_POSQUERY:
551 case OP_STARI:
552 case OP_MINSTARI:
553 case OP_PLUSI:
554 case OP_MINPLUSI:
555 case OP_QUERYI:
556 case OP_MINQUERYI:
557 case OP_POSSTARI:
558 case OP_POSPLUSI:
559 case OP_POSQUERYI:
560 case OP_NOTSTAR:
561 case OP_NOTMINSTAR:
562 case OP_NOTPLUS:
563 case OP_NOTMINPLUS:
564 case OP_NOTQUERY:
565 case OP_NOTMINQUERY:
566 case OP_NOTPOSSTAR:
567 case OP_NOTPOSPLUS:
568 case OP_NOTPOSQUERY:
569 case OP_NOTSTARI:
570 case OP_NOTMINSTARI:
571 case OP_NOTPLUSI:
572 case OP_NOTMINPLUSI:
573 case OP_NOTQUERYI:
574 case OP_NOTMINQUERYI:
575 case OP_NOTPOSSTARI:
576 case OP_NOTPOSPLUSI:
577 case OP_NOTPOSQUERYI:
578 cc += 2;
579 #ifdef SUPPORT_UTF
580 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
581 #endif
582 return cc;
584 case OP_UPTO:
585 case OP_MINUPTO:
586 case OP_EXACT:
587 case OP_POSUPTO:
588 case OP_UPTOI:
589 case OP_MINUPTOI:
590 case OP_EXACTI:
591 case OP_POSUPTOI:
592 case OP_NOTUPTO:
593 case OP_NOTMINUPTO:
594 case OP_NOTEXACT:
595 case OP_NOTPOSUPTO:
596 case OP_NOTUPTOI:
597 case OP_NOTMINUPTOI:
598 case OP_NOTEXACTI:
599 case OP_NOTPOSUPTOI:
600 cc += 2 + IMM2_SIZE;
601 #ifdef SUPPORT_UTF
602 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
603 #endif
604 return cc;
606 case OP_NOTPROP:
607 case OP_PROP:
608 return cc + 1 + 2;
610 case OP_TYPEUPTO:
611 case OP_TYPEMINUPTO:
612 case OP_TYPEEXACT:
613 case OP_TYPEPOSUPTO:
614 case OP_REF:
615 case OP_REFI:
616 case OP_CREF:
617 case OP_NCREF:
618 case OP_RREF:
619 case OP_NRREF:
620 case OP_CLOSE:
621 cc += 1 + IMM2_SIZE;
622 return cc;
624 case OP_CRRANGE:
625 case OP_CRMINRANGE:
626 return cc + 1 + 2 * IMM2_SIZE;
628 case OP_CLASS:
629 case OP_NCLASS:
630 return cc + 1 + 32 / sizeof(pcre_uchar);
632 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
633 case OP_XCLASS:
634 return cc + GET(cc, 1);
635 #endif
637 case OP_RECURSE:
638 case OP_ASSERT:
639 case OP_ASSERT_NOT:
640 case OP_ASSERTBACK:
641 case OP_ASSERTBACK_NOT:
642 case OP_REVERSE:
643 case OP_ONCE:
644 case OP_ONCE_NC:
645 case OP_BRA:
646 case OP_BRAPOS:
647 case OP_COND:
648 case OP_SBRA:
649 case OP_SBRAPOS:
650 case OP_SCOND:
651 case OP_ALT:
652 case OP_KET:
653 case OP_KETRMAX:
654 case OP_KETRMIN:
655 case OP_KETRPOS:
656 return cc + 1 + LINK_SIZE;
658 case OP_CBRA:
659 case OP_CBRAPOS:
660 case OP_SCBRA:
661 case OP_SCBRAPOS:
662 return cc + 1 + LINK_SIZE + IMM2_SIZE;
664 case OP_MARK:
665 return cc + 1 + 2 + cc[1];
667 default:
668 return NULL;
672 static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
674 int localspace = 0;
675 pcre_uchar *alternative;
676 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
677 while (cc < ccend)
679 switch(*cc)
681 case OP_SET_SOM:
682 common->has_set_som = TRUE;
683 cc += 1;
684 break;
686 case OP_ASSERT:
687 case OP_ASSERT_NOT:
688 case OP_ASSERTBACK:
689 case OP_ASSERTBACK_NOT:
690 case OP_ONCE:
691 case OP_ONCE_NC:
692 case OP_BRAPOS:
693 case OP_SBRA:
694 case OP_SBRAPOS:
695 case OP_SCOND:
696 localspace += sizeof(sljit_w);
697 cc += 1 + LINK_SIZE;
698 break;
700 case OP_CBRAPOS:
701 case OP_SCBRAPOS:
702 localspace += sizeof(sljit_w);
703 cc += 1 + LINK_SIZE + IMM2_SIZE;
704 break;
706 case OP_COND:
707 /* Might be a hidden SCOND. */
708 alternative = cc + GET(cc, 1);
709 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
710 localspace += sizeof(sljit_w);
711 cc += 1 + LINK_SIZE;
712 break;
714 case OP_RECURSE:
715 /* Set its value only once. */
716 if (common->recursive_head == 0)
718 common->recursive_head = common->ovector_start;
719 common->ovector_start += sizeof(sljit_w);
721 cc += 1 + LINK_SIZE;
722 break;
724 case OP_MARK:
725 if (common->mark_ptr == 0)
727 common->mark_ptr = common->ovector_start;
728 common->ovector_start += sizeof(sljit_w);
730 cc += 1 + 2 + cc[1];
731 break;
733 default:
734 cc = next_opcode(common, cc);
735 if (cc == NULL)
736 return -1;
737 break;
740 return localspace;
743 static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend)
745 pcre_uchar *cc = common->start;
746 pcre_uchar *alternative;
747 while (cc < ccend)
749 switch(*cc)
751 case OP_ASSERT:
752 case OP_ASSERT_NOT:
753 case OP_ASSERTBACK:
754 case OP_ASSERTBACK_NOT:
755 case OP_ONCE:
756 case OP_ONCE_NC:
757 case OP_BRAPOS:
758 case OP_SBRA:
759 case OP_SBRAPOS:
760 case OP_SCOND:
761 common->localptrs[cc - common->start] = localptr;
762 localptr += sizeof(sljit_w);
763 cc += 1 + LINK_SIZE;
764 break;
766 case OP_CBRAPOS:
767 case OP_SCBRAPOS:
768 common->localptrs[cc - common->start] = localptr;
769 localptr += sizeof(sljit_w);
770 cc += 1 + LINK_SIZE + IMM2_SIZE;
771 break;
773 case OP_COND:
774 /* Might be a hidden SCOND. */
775 alternative = cc + GET(cc, 1);
776 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
778 common->localptrs[cc - common->start] = localptr;
779 localptr += sizeof(sljit_w);
781 cc += 1 + LINK_SIZE;
782 break;
784 default:
785 cc = next_opcode(common, cc);
786 SLJIT_ASSERT(cc != NULL);
787 break;
792 /* Returns with -1 if no need for frame. */
793 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
795 pcre_uchar *ccend = bracketend(cc);
796 int length = 0;
797 BOOL possessive = FALSE;
798 BOOL setsom_found = recursive;
799 BOOL setmark_found = recursive;
801 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
803 length = 3;
804 possessive = TRUE;
807 cc = next_opcode(common, cc);
808 SLJIT_ASSERT(cc != NULL);
809 while (cc < ccend)
810 switch(*cc)
812 case OP_SET_SOM:
813 SLJIT_ASSERT(common->has_set_som);
814 if (!setsom_found)
816 length += 2;
817 setsom_found = TRUE;
819 cc += 1;
820 break;
822 case OP_MARK:
823 SLJIT_ASSERT(common->mark_ptr != 0);
824 if (!setmark_found)
826 length += 2;
827 setmark_found = TRUE;
829 cc += 1 + 2 + cc[1];
830 break;
832 case OP_RECURSE:
833 if (common->has_set_som && !setsom_found)
835 length += 2;
836 setsom_found = TRUE;
838 if (common->mark_ptr != 0 && !setmark_found)
840 length += 2;
841 setmark_found = TRUE;
843 cc += 1 + LINK_SIZE;
844 break;
846 case OP_CBRA:
847 case OP_CBRAPOS:
848 case OP_SCBRA:
849 case OP_SCBRAPOS:
850 length += 3;
851 cc += 1 + LINK_SIZE + IMM2_SIZE;
852 break;
854 default:
855 cc = next_opcode(common, cc);
856 SLJIT_ASSERT(cc != NULL);
857 break;
860 /* Possessive quantifiers can use a special case. */
861 if (SLJIT_UNLIKELY(possessive) && length == 3)
862 return -1;
864 if (length > 0)
865 return length + 1;
866 return -1;
869 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
871 DEFINE_COMPILER;
872 pcre_uchar *ccend = bracketend(cc);
873 BOOL setsom_found = recursive;
874 BOOL setmark_found = recursive;
875 int offset;
877 /* >= 1 + shortest item size (2) */
878 SLJIT_UNUSED_ARG(stacktop);
879 SLJIT_ASSERT(stackpos >= stacktop + 2);
881 stackpos = STACK(stackpos);
882 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
883 cc = next_opcode(common, cc);
884 SLJIT_ASSERT(cc != NULL);
885 while (cc < ccend)
886 switch(*cc)
888 case OP_SET_SOM:
889 SLJIT_ASSERT(common->has_set_som);
890 if (!setsom_found)
892 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
893 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
894 stackpos += (int)sizeof(sljit_w);
895 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
896 stackpos += (int)sizeof(sljit_w);
897 setsom_found = TRUE;
899 cc += 1;
900 break;
902 case OP_MARK:
903 SLJIT_ASSERT(common->mark_ptr != 0);
904 if (!setmark_found)
906 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
907 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
908 stackpos += (int)sizeof(sljit_w);
909 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
910 stackpos += (int)sizeof(sljit_w);
911 setmark_found = TRUE;
913 cc += 1 + 2 + cc[1];
914 break;
916 case OP_RECURSE:
917 if (common->has_set_som && !setsom_found)
919 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
920 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
921 stackpos += (int)sizeof(sljit_w);
922 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
923 stackpos += (int)sizeof(sljit_w);
924 setsom_found = TRUE;
926 if (common->mark_ptr != 0 && !setmark_found)
928 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
929 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
930 stackpos += (int)sizeof(sljit_w);
931 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
932 stackpos += (int)sizeof(sljit_w);
933 setmark_found = TRUE;
935 cc += 1 + LINK_SIZE;
936 break;
938 case OP_CBRA:
939 case OP_CBRAPOS:
940 case OP_SCBRA:
941 case OP_SCBRAPOS:
942 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
943 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
944 stackpos += (int)sizeof(sljit_w);
945 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
946 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
947 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
948 stackpos += (int)sizeof(sljit_w);
949 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
950 stackpos += (int)sizeof(sljit_w);
952 cc += 1 + LINK_SIZE + IMM2_SIZE;
953 break;
955 default:
956 cc = next_opcode(common, cc);
957 SLJIT_ASSERT(cc != NULL);
958 break;
961 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
962 SLJIT_ASSERT(stackpos == STACK(stacktop));
965 static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
967 int localsize = 2;
968 pcre_uchar *alternative;
969 /* Calculate the sum of the local variables. */
970 while (cc < ccend)
972 switch(*cc)
974 case OP_ASSERT:
975 case OP_ASSERT_NOT:
976 case OP_ASSERTBACK:
977 case OP_ASSERTBACK_NOT:
978 case OP_ONCE:
979 case OP_ONCE_NC:
980 case OP_BRAPOS:
981 case OP_SBRA:
982 case OP_SBRAPOS:
983 case OP_SCOND:
984 localsize++;
985 cc += 1 + LINK_SIZE;
986 break;
988 case OP_CBRA:
989 case OP_SCBRA:
990 localsize++;
991 cc += 1 + LINK_SIZE + IMM2_SIZE;
992 break;
994 case OP_CBRAPOS:
995 case OP_SCBRAPOS:
996 localsize += 2;
997 cc += 1 + LINK_SIZE + IMM2_SIZE;
998 break;
1000 case OP_COND:
1001 /* Might be a hidden SCOND. */
1002 alternative = cc + GET(cc, 1);
1003 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1004 localsize++;
1005 cc += 1 + LINK_SIZE;
1006 break;
1008 default:
1009 cc = next_opcode(common, cc);
1010 SLJIT_ASSERT(cc != NULL);
1011 break;
1014 SLJIT_ASSERT(cc == ccend);
1015 return localsize;
1018 static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1019 BOOL save, int stackptr, int stacktop)
1021 DEFINE_COMPILER;
1022 int srcw[2];
1023 int count;
1024 BOOL tmp1next = TRUE;
1025 BOOL tmp1empty = TRUE;
1026 BOOL tmp2empty = TRUE;
1027 pcre_uchar *alternative;
1028 enum {
1029 start,
1030 loop,
1032 } status;
1034 status = save ? start : loop;
1035 stackptr = STACK(stackptr - 2);
1036 stacktop = STACK(stacktop - 1);
1038 if (!save)
1040 stackptr += sizeof(sljit_w);
1041 if (stackptr < stacktop)
1043 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1044 stackptr += sizeof(sljit_w);
1045 tmp1empty = FALSE;
1047 if (stackptr < stacktop)
1049 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1050 stackptr += sizeof(sljit_w);
1051 tmp2empty = FALSE;
1053 /* The tmp1next must be TRUE in either way. */
1056 while (status != end)
1058 count = 0;
1059 switch(status)
1061 case start:
1062 SLJIT_ASSERT(save && common->recursive_head != 0);
1063 count = 1;
1064 srcw[0] = common->recursive_head;
1065 status = loop;
1066 break;
1068 case loop:
1069 if (cc >= ccend)
1071 status = end;
1072 break;
1075 switch(*cc)
1077 case OP_ASSERT:
1078 case OP_ASSERT_NOT:
1079 case OP_ASSERTBACK:
1080 case OP_ASSERTBACK_NOT:
1081 case OP_ONCE:
1082 case OP_ONCE_NC:
1083 case OP_BRAPOS:
1084 case OP_SBRA:
1085 case OP_SBRAPOS:
1086 case OP_SCOND:
1087 count = 1;
1088 srcw[0] = PRIV_DATA(cc);
1089 SLJIT_ASSERT(srcw[0] != 0);
1090 cc += 1 + LINK_SIZE;
1091 break;
1093 case OP_CBRA:
1094 case OP_SCBRA:
1095 count = 1;
1096 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1097 cc += 1 + LINK_SIZE + IMM2_SIZE;
1098 break;
1100 case OP_CBRAPOS:
1101 case OP_SCBRAPOS:
1102 count = 2;
1103 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1104 srcw[0] = PRIV_DATA(cc);
1105 SLJIT_ASSERT(srcw[0] != 0);
1106 cc += 1 + LINK_SIZE + IMM2_SIZE;
1107 break;
1109 case OP_COND:
1110 /* Might be a hidden SCOND. */
1111 alternative = cc + GET(cc, 1);
1112 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1114 count = 1;
1115 srcw[0] = PRIV_DATA(cc);
1116 SLJIT_ASSERT(srcw[0] != 0);
1118 cc += 1 + LINK_SIZE;
1119 break;
1121 default:
1122 cc = next_opcode(common, cc);
1123 SLJIT_ASSERT(cc != NULL);
1124 break;
1126 break;
1128 case end:
1129 SLJIT_ASSERT_STOP();
1130 break;
1133 while (count > 0)
1135 count--;
1136 if (save)
1138 if (tmp1next)
1140 if (!tmp1empty)
1142 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1143 stackptr += sizeof(sljit_w);
1145 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1146 tmp1empty = FALSE;
1147 tmp1next = FALSE;
1149 else
1151 if (!tmp2empty)
1153 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1154 stackptr += sizeof(sljit_w);
1156 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1157 tmp2empty = FALSE;
1158 tmp1next = TRUE;
1161 else
1163 if (tmp1next)
1165 SLJIT_ASSERT(!tmp1empty);
1166 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1167 tmp1empty = stackptr >= stacktop;
1168 if (!tmp1empty)
1170 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1171 stackptr += sizeof(sljit_w);
1173 tmp1next = FALSE;
1175 else
1177 SLJIT_ASSERT(!tmp2empty);
1178 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1179 tmp2empty = stackptr >= stacktop;
1180 if (!tmp2empty)
1182 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1183 stackptr += sizeof(sljit_w);
1185 tmp1next = TRUE;
1191 if (save)
1193 if (tmp1next)
1195 if (!tmp1empty)
1197 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1198 stackptr += sizeof(sljit_w);
1200 if (!tmp2empty)
1202 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1203 stackptr += sizeof(sljit_w);
1206 else
1208 if (!tmp2empty)
1210 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1211 stackptr += sizeof(sljit_w);
1213 if (!tmp1empty)
1215 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1216 stackptr += sizeof(sljit_w);
1220 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1223 static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1225 return (value & (value - 1)) == 0;
1228 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1230 while (list)
1232 /* sljit_set_label is clever enough to do nothing
1233 if either the jump or the label is NULL */
1234 sljit_set_label(list->jump, label);
1235 list = list->next;
1239 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1241 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1242 if (list_item)
1244 list_item->next = *list;
1245 list_item->jump = jump;
1246 *list = list_item;
1250 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1252 DEFINE_COMPILER;
1253 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1255 if (list_item)
1257 list_item->type = type;
1258 list_item->data = data;
1259 list_item->start = start;
1260 list_item->leave = LABEL();
1261 list_item->next = common->stubs;
1262 common->stubs = list_item;
1266 static void flush_stubs(compiler_common *common)
1268 DEFINE_COMPILER;
1269 stub_list* list_item = common->stubs;
1271 while (list_item)
1273 JUMPHERE(list_item->start);
1274 switch(list_item->type)
1276 case stack_alloc:
1277 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1278 break;
1280 JUMPTO(SLJIT_JUMP, list_item->leave);
1281 list_item = list_item->next;
1283 common->stubs = NULL;
1286 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1288 DEFINE_COMPILER;
1290 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1291 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1294 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1296 /* May destroy all locals and registers except TMP2. */
1297 DEFINE_COMPILER;
1299 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1300 #ifdef DESTROY_REGISTERS
1301 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1302 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1303 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1304 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1305 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1306 #endif
1307 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1310 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1312 DEFINE_COMPILER;
1313 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1316 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1318 DEFINE_COMPILER;
1319 struct sljit_label *loop;
1320 int i;
1321 /* At this point we can freely use all temporary registers. */
1322 /* TMP1 returns with begin - 1. */
1323 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1324 if (length < 8)
1326 for (i = 0; i < length; i++)
1327 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1329 else
1331 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, OVECTOR_START - sizeof(sljit_w));
1332 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1333 loop = LABEL();
1334 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1335 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1336 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1340 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1342 DEFINE_COMPILER;
1343 struct sljit_label *loop;
1344 struct sljit_jump *earlyexit;
1346 /* At this point we can freely use all registers. */
1347 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1348 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1350 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1351 if (common->mark_ptr != 0)
1352 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1353 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1354 if (common->mark_ptr != 0)
1355 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_TEMPORARY_REG3, 0);
1356 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1357 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1358 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1359 /* Unlikely, but possible */
1360 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1361 loop = LABEL();
1362 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1363 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1364 /* Copy the integer value to the output buffer */
1365 #ifdef COMPILE_PCRE16
1366 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1367 #endif
1368 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1369 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1370 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1371 JUMPHERE(earlyexit);
1373 /* Calculate the return value, which is the maximum ovector value. */
1374 if (topbracket > 1)
1376 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1377 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1379 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1380 loop = LABEL();
1381 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1382 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1383 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1384 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1386 else
1387 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1390 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *leave)
1392 DEFINE_COMPILER;
1394 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1395 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1397 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
1398 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1399 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1400 CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, leave);
1402 /* Store match begin and end. */
1403 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1404 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1405 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1406 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1407 #ifdef COMPILE_PCRE16
1408 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1409 #endif
1410 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1412 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0);
1413 #ifdef COMPILE_PCRE16
1414 OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1415 #endif
1416 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0);
1418 JUMPTO(SLJIT_JUMP, leave);
1421 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1423 /* May destroy TMP1. */
1424 DEFINE_COMPILER;
1425 struct sljit_jump *jump;
1427 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1429 /* The value of -1 must be kept for start_used_ptr! */
1430 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1431 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1432 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1433 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1434 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1435 JUMPHERE(jump);
1437 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1439 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1440 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1441 JUMPHERE(jump);
1445 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1447 /* Detects if the character has an othercase. */
1448 unsigned int c;
1450 #ifdef SUPPORT_UTF
1451 if (common->utf)
1453 GETCHAR(c, cc);
1454 if (c > 127)
1456 #ifdef SUPPORT_UCP
1457 return c != UCD_OTHERCASE(c);
1458 #else
1459 return FALSE;
1460 #endif
1462 #ifndef COMPILE_PCRE8
1463 return common->fcc[c] != c;
1464 #endif
1466 else
1467 #endif
1468 c = *cc;
1469 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1472 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1474 /* Returns with the othercase. */
1475 #ifdef SUPPORT_UTF
1476 if (common->utf && c > 127)
1478 #ifdef SUPPORT_UCP
1479 return UCD_OTHERCASE(c);
1480 #else
1481 return c;
1482 #endif
1484 #endif
1485 return TABLE_GET(c, common->fcc, c);
1488 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1490 /* Detects if the character and its othercase has only 1 bit difference. */
1491 unsigned int c, oc, bit;
1492 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1493 int n;
1494 #endif
1496 #ifdef SUPPORT_UTF
1497 if (common->utf)
1499 GETCHAR(c, cc);
1500 if (c <= 127)
1501 oc = common->fcc[c];
1502 else
1504 #ifdef SUPPORT_UCP
1505 oc = UCD_OTHERCASE(c);
1506 #else
1507 oc = c;
1508 #endif
1511 else
1513 c = *cc;
1514 oc = TABLE_GET(c, common->fcc, c);
1516 #else
1517 c = *cc;
1518 oc = TABLE_GET(c, common->fcc, c);
1519 #endif
1521 SLJIT_ASSERT(c != oc);
1523 bit = c ^ oc;
1524 /* Optimized for English alphabet. */
1525 if (c <= 127 && bit == 0x20)
1526 return (0 << 8) | 0x20;
1528 /* Since c != oc, they must have at least 1 bit difference. */
1529 if (!ispowerof2(bit))
1530 return 0;
1532 #ifdef COMPILE_PCRE8
1534 #ifdef SUPPORT_UTF
1535 if (common->utf && c > 127)
1537 n = GET_EXTRALEN(*cc);
1538 while ((bit & 0x3f) == 0)
1540 n--;
1541 bit >>= 6;
1543 return (n << 8) | bit;
1545 #endif /* SUPPORT_UTF */
1546 return (0 << 8) | bit;
1548 #else /* COMPILE_PCRE8 */
1550 #ifdef COMPILE_PCRE16
1551 #ifdef SUPPORT_UTF
1552 if (common->utf && c > 65535)
1554 if (bit >= (1 << 10))
1555 bit >>= 10;
1556 else
1557 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
1559 #endif /* SUPPORT_UTF */
1560 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
1561 #endif /* COMPILE_PCRE16 */
1563 #endif /* COMPILE_PCRE8 */
1566 static void check_partial(compiler_common *common, BOOL force)
1568 /* Checks whether a partial matching is occured. Does not modify registers. */
1569 DEFINE_COMPILER;
1570 struct sljit_jump *jump = NULL;
1572 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
1574 if (common->mode == JIT_COMPILE)
1575 return;
1577 if (!force)
1578 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1579 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1580 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
1582 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1583 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1584 else
1586 if (common->partialmatchlabel != NULL)
1587 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1588 else
1589 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1592 if (jump != NULL)
1593 JUMPHERE(jump);
1596 static struct sljit_jump *check_str_end(compiler_common *common)
1598 /* Does not affect registers. Usually used in a tight spot. */
1599 DEFINE_COMPILER;
1600 struct sljit_jump *jump;
1601 struct sljit_jump *nohit;
1602 struct sljit_jump *return_value;
1604 if (common->mode == JIT_COMPILE)
1605 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1607 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
1608 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1610 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1611 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1612 JUMPHERE(nohit);
1613 return_value = JUMP(SLJIT_JUMP);
1615 else
1617 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1618 if (common->partialmatchlabel != NULL)
1619 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1620 else
1621 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1623 JUMPHERE(jump);
1624 return return_value;
1627 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
1629 DEFINE_COMPILER;
1630 struct sljit_jump *jump;
1632 if (common->mode == JIT_COMPILE)
1634 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1635 return;
1638 /* Partial matching mode. */
1639 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
1640 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
1641 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1643 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1644 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
1646 else
1648 if (common->partialmatchlabel != NULL)
1649 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1650 else
1651 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1653 JUMPHERE(jump);
1656 static void read_char(compiler_common *common)
1658 /* Reads the character into TMP1, updates STR_PTR.
1659 Does not check STR_END. TMP2 Destroyed. */
1660 DEFINE_COMPILER;
1661 #ifdef SUPPORT_UTF
1662 struct sljit_jump *jump;
1663 #endif
1665 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1666 #ifdef SUPPORT_UTF
1667 if (common->utf)
1669 #ifdef COMPILE_PCRE8
1670 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1671 #else
1672 #ifdef COMPILE_PCRE16
1673 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1674 #endif
1675 #endif /* COMPILE_PCRE8 */
1676 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1677 JUMPHERE(jump);
1679 #endif
1680 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1683 static void peek_char(compiler_common *common)
1685 /* Reads the character into TMP1, keeps STR_PTR.
1686 Does not check STR_END. TMP2 Destroyed. */
1687 DEFINE_COMPILER;
1688 #ifdef SUPPORT_UTF
1689 struct sljit_jump *jump;
1690 #endif
1692 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1693 #ifdef SUPPORT_UTF
1694 if (common->utf)
1696 #ifdef COMPILE_PCRE8
1697 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1698 #else
1699 #ifdef COMPILE_PCRE16
1700 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1701 #endif
1702 #endif /* COMPILE_PCRE8 */
1703 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1704 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1705 JUMPHERE(jump);
1707 #endif
1710 static void read_char8_type(compiler_common *common)
1712 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
1713 DEFINE_COMPILER;
1714 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
1715 struct sljit_jump *jump;
1716 #endif
1718 #ifdef SUPPORT_UTF
1719 if (common->utf)
1721 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1722 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1723 #ifdef COMPILE_PCRE8
1724 /* This can be an extra read in some situations, but hopefully
1725 it is needed in most cases. */
1726 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1727 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
1728 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
1729 JUMPHERE(jump);
1730 #else
1731 #ifdef COMPILE_PCRE16
1732 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1733 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1734 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1735 JUMPHERE(jump);
1736 /* Skip low surrogate if necessary. */
1737 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
1738 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
1739 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1740 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
1741 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1742 #endif
1743 #endif /* COMPILE_PCRE8 */
1744 return;
1746 #endif
1747 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1748 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1749 #ifdef COMPILE_PCRE16
1750 /* The ctypes array contains only 256 values. */
1751 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1752 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1753 #endif
1754 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1755 #ifdef COMPILE_PCRE16
1756 JUMPHERE(jump);
1757 #endif
1760 static void skip_char_back(compiler_common *common)
1762 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
1763 DEFINE_COMPILER;
1764 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1765 struct sljit_label *label;
1767 if (common->utf)
1769 label = LABEL();
1770 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1771 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1772 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
1773 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
1774 return;
1776 #endif
1777 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1778 if (common->utf)
1780 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1781 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1782 /* Skip low surrogate if necessary. */
1783 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1784 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
1785 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1786 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1787 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1788 return;
1790 #endif
1791 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1794 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
1796 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
1797 DEFINE_COMPILER;
1799 if (nltype == NLTYPE_ANY)
1801 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
1802 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1804 else if (nltype == NLTYPE_ANYCRLF)
1806 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
1807 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1808 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1809 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
1810 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1812 else
1814 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
1815 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
1819 #ifdef SUPPORT_UTF
1821 #ifdef COMPILE_PCRE8
1822 static void do_utfreadchar(compiler_common *common)
1824 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
1825 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
1826 DEFINE_COMPILER;
1827 struct sljit_jump *jump;
1829 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1830 /* Searching for the first zero. */
1831 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
1832 jump = JUMP(SLJIT_C_NOT_ZERO);
1833 /* Two byte sequence. */
1834 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1835 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1836 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
1837 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
1838 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1839 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1840 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1841 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1842 JUMPHERE(jump);
1844 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
1845 jump = JUMP(SLJIT_C_NOT_ZERO);
1846 /* Three byte sequence. */
1847 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1848 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
1849 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
1850 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1851 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1852 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1853 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1854 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
1855 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1856 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1857 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
1858 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1859 JUMPHERE(jump);
1861 /* Four byte sequence. */
1862 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1863 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
1864 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
1865 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1866 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
1867 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1868 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1869 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1870 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1871 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1872 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
1873 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
1874 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1875 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1876 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
1877 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1880 static void do_utfreadtype8(compiler_common *common)
1882 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
1883 of the character (>= 0xc0). Return value in TMP1. */
1884 DEFINE_COMPILER;
1885 struct sljit_jump *jump;
1886 struct sljit_jump *compare;
1888 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1890 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
1891 jump = JUMP(SLJIT_C_NOT_ZERO);
1892 /* Two byte sequence. */
1893 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1894 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1895 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
1896 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1897 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
1898 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
1899 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1900 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1901 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1903 JUMPHERE(compare);
1904 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1905 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1906 JUMPHERE(jump);
1908 /* We only have types for characters less than 256. */
1909 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
1910 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1911 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1912 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1915 #else /* COMPILE_PCRE8 */
1917 #ifdef COMPILE_PCRE16
1918 static void do_utfreadchar(compiler_common *common)
1920 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
1921 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
1922 DEFINE_COMPILER;
1923 struct sljit_jump *jump;
1925 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1926 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
1927 /* Do nothing, only return. */
1928 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1930 JUMPHERE(jump);
1931 /* Combine two 16 bit characters. */
1932 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1933 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1934 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
1935 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
1936 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
1937 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1938 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1939 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
1940 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1942 #endif /* COMPILE_PCRE16 */
1944 #endif /* COMPILE_PCRE8 */
1946 #endif /* SUPPORT_UTF */
1948 #ifdef SUPPORT_UCP
1950 static sljit_w SLJIT_CALL getunichartype(sljit_w c)
1952 return (sljit_w)(unsigned int)UCD_CHARTYPE((unsigned int)c);
1955 static sljit_w SLJIT_CALL getunicharscript(sljit_w c)
1957 return (sljit_w)(unsigned int)UCD_SCRIPT((unsigned int)c);
1960 static void do_getunichartype(compiler_common *common)
1962 /* Character comes in TMP1. Returns chartype in TMP1 */
1963 DEFINE_COMPILER;
1965 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1966 /* Save registers */
1967 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
1968 sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(getunichartype));
1969 /* Restore registers */
1970 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
1971 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1974 static void do_getunichartype_2(compiler_common *common)
1976 /* Character comes in TMP1. Returns chartype in TMP1 */
1977 DEFINE_COMPILER;
1979 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1980 /* Save registers */
1981 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STACK_TOP, 0);
1982 sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(getunichartype));
1983 /* Restore registers */
1984 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1985 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1988 static void do_getunicharscript(compiler_common *common)
1990 /* Character comes in TMP1. Returns chartype in TMP1 */
1991 DEFINE_COMPILER;
1993 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1994 /* Save registers */
1995 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
1996 sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(getunicharscript));
1997 /* Restore registers */
1998 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
1999 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2001 #endif
2003 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2005 DEFINE_COMPILER;
2006 struct sljit_label *mainloop;
2007 struct sljit_label *newlinelabel = NULL;
2008 struct sljit_jump *start;
2009 struct sljit_jump *end = NULL;
2010 struct sljit_jump *nl = NULL;
2011 #ifdef SUPPORT_UTF
2012 struct sljit_jump *singlechar;
2013 #endif
2014 jump_list *newline = NULL;
2015 BOOL newlinecheck = FALSE;
2016 BOOL readuchar = FALSE;
2018 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2019 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2020 newlinecheck = TRUE;
2022 if (firstline)
2024 /* Search for the end of the first line. */
2025 SLJIT_ASSERT(common->first_line_end != 0);
2026 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);
2027 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_END, 0);
2029 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2031 mainloop = LABEL();
2032 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2033 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2034 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2035 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2036 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2037 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2038 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2040 else
2042 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2043 mainloop = LABEL();
2044 /* Continual stores does not cause data dependency. */
2045 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2046 read_char(common);
2047 check_newlinechar(common, common->nltype, &newline, TRUE);
2048 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2049 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2050 set_jumps(newline, LABEL());
2053 JUMPHERE(end);
2054 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2057 start = JUMP(SLJIT_JUMP);
2059 if (newlinecheck)
2061 newlinelabel = LABEL();
2062 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2063 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2064 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2065 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2066 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2067 #ifdef COMPILE_PCRE16
2068 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2069 #endif
2070 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2071 nl = JUMP(SLJIT_JUMP);
2074 mainloop = LABEL();
2076 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2077 #ifdef SUPPORT_UTF
2078 if (common->utf) readuchar = TRUE;
2079 #endif
2080 if (newlinecheck) readuchar = TRUE;
2082 if (readuchar)
2083 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2085 if (newlinecheck)
2086 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2088 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2089 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2090 if (common->utf)
2092 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2093 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2094 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2095 JUMPHERE(singlechar);
2097 #endif
2098 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2099 if (common->utf)
2101 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2102 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2103 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2104 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2105 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2106 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2107 JUMPHERE(singlechar);
2109 #endif
2110 JUMPHERE(start);
2112 if (newlinecheck)
2114 JUMPHERE(end);
2115 JUMPHERE(nl);
2118 return mainloop;
2121 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2123 DEFINE_COMPILER;
2124 struct sljit_label *start;
2125 struct sljit_jump *leave;
2126 struct sljit_jump *found;
2127 pcre_uchar oc, bit;
2129 if (firstline)
2131 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2132 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2135 start = LABEL();
2136 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2137 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2139 oc = first_char;
2140 if (caseless)
2142 oc = TABLE_GET(first_char, common->fcc, first_char);
2143 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2144 if (first_char > 127 && common->utf)
2145 oc = UCD_OTHERCASE(first_char);
2146 #endif
2148 if (first_char == oc)
2149 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2150 else
2152 bit = first_char ^ oc;
2153 if (ispowerof2(bit))
2155 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2156 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2158 else
2160 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2161 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2162 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2163 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2164 found = JUMP(SLJIT_C_NOT_ZERO);
2168 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2169 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2170 if (common->utf)
2172 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2173 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2174 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2176 #endif
2177 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2178 if (common->utf)
2180 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2181 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2182 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2183 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2184 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2185 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2187 #endif
2188 JUMPTO(SLJIT_JUMP, start);
2189 JUMPHERE(found);
2190 JUMPHERE(leave);
2192 if (firstline)
2193 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2196 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2198 DEFINE_COMPILER;
2199 struct sljit_label *loop;
2200 struct sljit_jump *lastchar;
2201 struct sljit_jump *firstchar;
2202 struct sljit_jump *leave;
2203 struct sljit_jump *foundcr = NULL;
2204 struct sljit_jump *notfoundnl;
2205 jump_list *newline = NULL;
2207 if (firstline)
2209 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2210 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2213 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2215 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2216 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2217 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2218 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2219 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2221 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2222 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2223 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
2224 #ifdef COMPILE_PCRE16
2225 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2226 #endif
2227 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2229 loop = LABEL();
2230 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2231 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2232 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2233 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2234 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2235 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2237 JUMPHERE(leave);
2238 JUMPHERE(firstchar);
2239 JUMPHERE(lastchar);
2241 if (firstline)
2242 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2243 return;
2246 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2247 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2248 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2249 skip_char_back(common);
2251 loop = LABEL();
2252 read_char(common);
2253 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2254 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2255 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2256 check_newlinechar(common, common->nltype, &newline, FALSE);
2257 set_jumps(newline, loop);
2259 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2261 leave = JUMP(SLJIT_JUMP);
2262 JUMPHERE(foundcr);
2263 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2264 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2265 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2266 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2267 #ifdef COMPILE_PCRE16
2268 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2269 #endif
2270 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2271 JUMPHERE(notfoundnl);
2272 JUMPHERE(leave);
2274 JUMPHERE(lastchar);
2275 JUMPHERE(firstchar);
2277 if (firstline)
2278 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2281 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2283 DEFINE_COMPILER;
2284 struct sljit_label *start;
2285 struct sljit_jump *leave;
2286 struct sljit_jump *found;
2287 #ifndef COMPILE_PCRE8
2288 struct sljit_jump *jump;
2289 #endif
2291 if (firstline)
2293 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2294 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2297 start = LABEL();
2298 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2299 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2300 #ifdef SUPPORT_UTF
2301 if (common->utf)
2302 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2303 #endif
2304 #ifndef COMPILE_PCRE8
2305 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2306 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2307 JUMPHERE(jump);
2308 #endif
2309 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2310 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2311 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2312 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2313 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2314 found = JUMP(SLJIT_C_NOT_ZERO);
2316 #ifdef SUPPORT_UTF
2317 if (common->utf)
2318 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2319 #endif
2320 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2321 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2322 if (common->utf)
2324 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2325 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2326 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2328 #endif
2329 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2330 if (common->utf)
2332 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2333 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2334 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2335 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2336 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2337 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2339 #endif
2340 JUMPTO(SLJIT_JUMP, start);
2341 JUMPHERE(found);
2342 JUMPHERE(leave);
2344 if (firstline)
2345 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2348 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2350 DEFINE_COMPILER;
2351 struct sljit_label *loop;
2352 struct sljit_jump *toolong;
2353 struct sljit_jump *alreadyfound;
2354 struct sljit_jump *found;
2355 struct sljit_jump *foundoc = NULL;
2356 struct sljit_jump *notfound;
2357 pcre_uchar oc, bit;
2359 SLJIT_ASSERT(common->req_char_ptr != 0);
2360 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
2361 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2362 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2363 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2365 if (has_firstchar)
2366 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2367 else
2368 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2370 loop = LABEL();
2371 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2373 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2374 oc = req_char;
2375 if (caseless)
2377 oc = TABLE_GET(req_char, common->fcc, req_char);
2378 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2379 if (req_char > 127 && common->utf)
2380 oc = UCD_OTHERCASE(req_char);
2381 #endif
2383 if (req_char == oc)
2384 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2385 else
2387 bit = req_char ^ oc;
2388 if (ispowerof2(bit))
2390 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
2391 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
2393 else
2395 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2396 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
2399 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2400 JUMPTO(SLJIT_JUMP, loop);
2402 JUMPHERE(found);
2403 if (foundoc)
2404 JUMPHERE(foundoc);
2405 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
2406 JUMPHERE(alreadyfound);
2407 JUMPHERE(toolong);
2408 return notfound;
2411 static void do_revertframes(compiler_common *common)
2413 DEFINE_COMPILER;
2414 struct sljit_jump *jump;
2415 struct sljit_label *mainloop;
2417 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2418 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
2419 GET_LOCAL_BASE(TMP3, 0, 0);
2421 /* Drop frames until we reach STACK_TOP. */
2422 mainloop = LABEL();
2423 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2424 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2425 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
2426 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2427 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
2428 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
2429 JUMPTO(SLJIT_JUMP, mainloop);
2431 JUMPHERE(jump);
2432 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2433 /* End of dropping frames. */
2434 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2436 JUMPHERE(jump);
2437 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
2438 /* Set string begin. */
2439 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2440 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2441 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
2442 JUMPTO(SLJIT_JUMP, mainloop);
2444 JUMPHERE(jump);
2445 if (common->mark_ptr != 0)
2447 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark);
2448 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2449 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2450 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
2451 JUMPTO(SLJIT_JUMP, mainloop);
2453 JUMPHERE(jump);
2456 /* Unknown command. */
2457 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2458 JUMPTO(SLJIT_JUMP, mainloop);
2461 static void check_wordboundary(compiler_common *common)
2463 DEFINE_COMPILER;
2464 struct sljit_jump *skipread;
2465 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
2466 struct sljit_jump *jump;
2467 #endif
2469 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
2471 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2472 /* Get type of the previous char, and put it to LOCALS1. */
2473 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2474 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2475 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
2476 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
2477 skip_char_back(common);
2478 check_start_used_ptr(common);
2479 read_char(common);
2481 /* Testing char type. */
2482 #ifdef SUPPORT_UCP
2483 if (common->use_ucp)
2485 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2486 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2487 add_jump(compiler, &common->getunichartype_2, JUMP(SLJIT_FAST_CALL));
2488 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2489 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2490 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2491 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2492 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2493 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2494 JUMPHERE(jump);
2495 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
2497 else
2498 #endif
2500 #ifndef COMPILE_PCRE8
2501 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2502 #elif defined SUPPORT_UTF
2503 /* Here LOCALS1 has already been zeroed. */
2504 jump = NULL;
2505 if (common->utf)
2506 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2507 #endif /* COMPILE_PCRE8 */
2508 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
2509 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
2510 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2511 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2512 #ifndef COMPILE_PCRE8
2513 JUMPHERE(jump);
2514 #elif defined SUPPORT_UTF
2515 if (jump != NULL)
2516 JUMPHERE(jump);
2517 #endif /* COMPILE_PCRE8 */
2519 JUMPHERE(skipread);
2521 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2522 skipread = check_str_end(common);
2523 peek_char(common);
2525 /* Testing char type. This is a code duplication. */
2526 #ifdef SUPPORT_UCP
2527 if (common->use_ucp)
2529 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2530 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2531 add_jump(compiler, &common->getunichartype_2, JUMP(SLJIT_FAST_CALL));
2532 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2533 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2534 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2535 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2536 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2537 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2538 JUMPHERE(jump);
2540 else
2541 #endif
2543 #ifndef COMPILE_PCRE8
2544 /* TMP2 may be destroyed by peek_char. */
2545 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2546 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2547 #elif defined SUPPORT_UTF
2548 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2549 jump = NULL;
2550 if (common->utf)
2551 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2552 #endif
2553 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
2554 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
2555 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2556 #ifndef COMPILE_PCRE8
2557 JUMPHERE(jump);
2558 #elif defined SUPPORT_UTF
2559 if (jump != NULL)
2560 JUMPHERE(jump);
2561 #endif /* COMPILE_PCRE8 */
2563 JUMPHERE(skipread);
2565 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2566 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2569 static void check_anynewline(compiler_common *common)
2571 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2572 DEFINE_COMPILER;
2574 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2576 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2577 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2578 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2579 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2580 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2581 #ifdef COMPILE_PCRE8
2582 if (common->utf)
2584 #endif
2585 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2586 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2587 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2588 #ifdef COMPILE_PCRE8
2590 #endif
2591 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2592 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2593 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2596 static void check_hspace(compiler_common *common)
2598 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2599 DEFINE_COMPILER;
2601 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2603 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
2604 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2605 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2606 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2607 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
2608 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2609 #ifdef COMPILE_PCRE8
2610 if (common->utf)
2612 #endif
2613 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2614 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
2615 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2616 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
2617 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2618 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
2619 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
2620 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2621 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
2622 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2623 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
2624 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2625 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
2626 #ifdef COMPILE_PCRE8
2628 #endif
2629 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2630 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2632 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2635 static void check_vspace(compiler_common *common)
2637 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2638 DEFINE_COMPILER;
2640 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2642 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2643 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2644 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2645 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2646 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2647 #ifdef COMPILE_PCRE8
2648 if (common->utf)
2650 #endif
2651 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2652 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2653 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2654 #ifdef COMPILE_PCRE8
2656 #endif
2657 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2658 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2660 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2663 #define CHAR1 STR_END
2664 #define CHAR2 STACK_TOP
2666 static void do_casefulcmp(compiler_common *common)
2668 DEFINE_COMPILER;
2669 struct sljit_jump *jump;
2670 struct sljit_label *label;
2672 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2673 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2674 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
2675 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
2676 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2677 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2679 label = LABEL();
2680 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
2681 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2682 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2683 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2684 JUMPTO(SLJIT_C_NOT_ZERO, label);
2686 JUMPHERE(jump);
2687 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2688 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
2689 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2690 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2693 #define LCC_TABLE STACK_LIMIT
2695 static void do_caselesscmp(compiler_common *common)
2697 DEFINE_COMPILER;
2698 struct sljit_jump *jump;
2699 struct sljit_label *label;
2701 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2702 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2704 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
2705 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
2706 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
2707 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
2708 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2709 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2711 label = LABEL();
2712 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
2713 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2714 #ifndef COMPILE_PCRE8
2715 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
2716 #endif
2717 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
2718 #ifndef COMPILE_PCRE8
2719 JUMPHERE(jump);
2720 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
2721 #endif
2722 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
2723 #ifndef COMPILE_PCRE8
2724 JUMPHERE(jump);
2725 #endif
2726 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2727 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2728 JUMPTO(SLJIT_C_NOT_ZERO, label);
2730 JUMPHERE(jump);
2731 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2732 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
2733 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2734 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2735 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2738 #undef LCC_TABLE
2739 #undef CHAR1
2740 #undef CHAR2
2742 #if defined SUPPORT_UTF && defined SUPPORT_UCP
2744 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
2746 /* This function would be ineffective to do in JIT level. */
2747 int c1, c2;
2748 const pcre_uchar *src2 = args->uchar_ptr;
2749 const pcre_uchar *end2 = args->end;
2751 while (src1 < end1)
2753 if (src2 >= end2)
2754 return (pcre_uchar*)1;
2755 GETCHARINC(c1, src1);
2756 GETCHARINC(c2, src2);
2757 if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return NULL;
2759 return src2;
2762 #endif /* SUPPORT_UTF && SUPPORT_UCP */
2764 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
2765 compare_context* context, jump_list **backtracks)
2767 DEFINE_COMPILER;
2768 unsigned int othercasebit = 0;
2769 pcre_uchar *othercasechar = NULL;
2770 #ifdef SUPPORT_UTF
2771 int utflength;
2772 #endif
2774 if (caseless && char_has_othercase(common, cc))
2776 othercasebit = char_get_othercase_bit(common, cc);
2777 SLJIT_ASSERT(othercasebit);
2778 /* Extracting bit difference info. */
2779 #ifdef COMPILE_PCRE8
2780 othercasechar = cc + (othercasebit >> 8);
2781 othercasebit &= 0xff;
2782 #else
2783 #ifdef COMPILE_PCRE16
2784 othercasechar = cc + (othercasebit >> 9);
2785 if ((othercasebit & 0x100) != 0)
2786 othercasebit = (othercasebit & 0xff) << 8;
2787 else
2788 othercasebit &= 0xff;
2789 #endif
2790 #endif
2793 if (context->sourcereg == -1)
2795 #ifdef COMPILE_PCRE8
2796 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2797 if (context->length >= 4)
2798 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2799 else if (context->length >= 2)
2800 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2801 else
2802 #endif
2803 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2804 #else
2805 #ifdef COMPILE_PCRE16
2806 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2807 if (context->length >= 4)
2808 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2809 else
2810 #endif
2811 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2812 #endif
2813 #endif /* COMPILE_PCRE8 */
2814 context->sourcereg = TMP2;
2817 #ifdef SUPPORT_UTF
2818 utflength = 1;
2819 if (common->utf && HAS_EXTRALEN(*cc))
2820 utflength += GET_EXTRALEN(*cc);
2824 #endif
2826 context->length -= IN_UCHARS(1);
2827 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2829 /* Unaligned read is supported. */
2830 if (othercasebit != 0 && othercasechar == cc)
2832 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
2833 context->oc.asuchars[context->ucharptr] = othercasebit;
2835 else
2837 context->c.asuchars[context->ucharptr] = *cc;
2838 context->oc.asuchars[context->ucharptr] = 0;
2840 context->ucharptr++;
2842 #ifdef COMPILE_PCRE8
2843 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
2844 #else
2845 if (context->ucharptr >= 2 || context->length == 0)
2846 #endif
2848 if (context->length >= 4)
2849 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2850 #ifdef COMPILE_PCRE8
2851 else if (context->length >= 2)
2852 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2853 else if (context->length >= 1)
2854 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2855 #else
2856 else if (context->length >= 2)
2857 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2858 #endif
2859 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2861 switch(context->ucharptr)
2863 case 4 / sizeof(pcre_uchar):
2864 if (context->oc.asint != 0)
2865 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
2866 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
2867 break;
2869 case 2 / sizeof(pcre_uchar):
2870 if (context->oc.asushort != 0)
2871 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
2872 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
2873 break;
2875 #ifdef COMPILE_PCRE8
2876 case 1:
2877 if (context->oc.asbyte != 0)
2878 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
2879 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
2880 break;
2881 #endif
2883 default:
2884 SLJIT_ASSERT_STOP();
2885 break;
2887 context->ucharptr = 0;
2890 #else
2892 /* Unaligned read is unsupported. */
2893 #ifdef COMPILE_PCRE8
2894 if (context->length > 0)
2895 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2896 #else
2897 if (context->length > 0)
2898 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2899 #endif
2900 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2902 if (othercasebit != 0 && othercasechar == cc)
2904 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
2905 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
2907 else
2908 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
2910 #endif
2912 cc++;
2913 #ifdef SUPPORT_UTF
2914 utflength--;
2916 while (utflength > 0);
2917 #endif
2919 return cc;
2922 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2924 #define SET_TYPE_OFFSET(value) \
2925 if ((value) != typeoffset) \
2927 if ((value) > typeoffset) \
2928 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
2929 else \
2930 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
2932 typeoffset = (value);
2934 #define SET_CHAR_OFFSET(value) \
2935 if ((value) != charoffset) \
2937 if ((value) > charoffset) \
2938 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
2939 else \
2940 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
2942 charoffset = (value);
2944 static void compile_xclass_trypath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
2946 DEFINE_COMPILER;
2947 jump_list *found = NULL;
2948 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
2949 unsigned int c;
2950 int compares;
2951 struct sljit_jump *jump = NULL;
2952 pcre_uchar *ccbegin;
2953 #ifdef SUPPORT_UCP
2954 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
2955 BOOL charsaved = FALSE;
2956 int typereg = TMP1, scriptreg = TMP1;
2957 unsigned int typeoffset;
2958 #endif
2959 int invertcmp, numberofcmps;
2960 unsigned int charoffset;
2962 /* Although SUPPORT_UTF must be defined, we are not necessary in utf mode. */
2963 detect_partial_match(common, backtracks);
2964 read_char(common);
2966 if ((*cc++ & XCL_MAP) != 0)
2968 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2969 #ifndef COMPILE_PCRE8
2970 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2971 #elif defined SUPPORT_UTF
2972 if (common->utf)
2973 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2974 #endif
2976 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2977 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2978 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
2979 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2980 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2981 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
2983 #ifndef COMPILE_PCRE8
2984 JUMPHERE(jump);
2985 #elif defined SUPPORT_UTF
2986 if (common->utf)
2987 JUMPHERE(jump);
2988 #endif
2989 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2990 #ifdef SUPPORT_UCP
2991 charsaved = TRUE;
2992 #endif
2993 cc += 32 / sizeof(pcre_uchar);
2996 /* Scanning the necessary info. */
2997 ccbegin = cc;
2998 compares = 0;
2999 while (*cc != XCL_END)
3001 compares++;
3002 if (*cc == XCL_SINGLE)
3004 cc += 2;
3005 #ifdef SUPPORT_UTF
3006 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3007 #endif
3008 #ifdef SUPPORT_UCP
3009 needschar = TRUE;
3010 #endif
3012 else if (*cc == XCL_RANGE)
3014 cc += 2;
3015 #ifdef SUPPORT_UTF
3016 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3017 #endif
3018 cc++;
3019 #ifdef SUPPORT_UTF
3020 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3021 #endif
3022 #ifdef SUPPORT_UCP
3023 needschar = TRUE;
3024 #endif
3026 #ifdef SUPPORT_UCP
3027 else
3029 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
3030 cc++;
3031 switch(*cc)
3033 case PT_ANY:
3034 break;
3036 case PT_LAMP:
3037 case PT_GC:
3038 case PT_PC:
3039 case PT_ALNUM:
3040 needstype = TRUE;
3041 break;
3043 case PT_SC:
3044 needsscript = TRUE;
3045 break;
3047 case PT_SPACE:
3048 case PT_PXSPACE:
3049 case PT_WORD:
3050 needstype = TRUE;
3051 needschar = TRUE;
3052 break;
3054 default:
3055 SLJIT_ASSERT_STOP();
3056 break;
3058 cc += 2;
3060 #endif
3063 #ifdef SUPPORT_UCP
3064 /* Simple register allocation. TMP1 is preferred if possible. */
3065 if (needstype || needsscript)
3067 if ((needschar || needsscript) && !charsaved)
3068 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3070 /* Needed to save important temporary registers. */
3071 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 &&
3072 STACK_TOP == SLJIT_TEMPORARY_REG2 &&
3073 TMP2 == SLJIT_TEMPORARY_REG3);
3075 if (needschar)
3077 if (needstype)
3078 typereg = RETURN_ADDR;
3079 if (needsscript)
3080 scriptreg = TMP3;
3082 else if (needstype && needsscript)
3083 scriptreg = TMP3;
3084 /* In all other cases only one of them was specified, and that can goes to TMP1. */
3086 if (needstype)
3088 add_jump(compiler, &common->getunichartype, JUMP(SLJIT_FAST_CALL));
3089 if (typereg != TMP1)
3090 OP1(SLJIT_MOV, typereg, 0, TMP1, 0);
3093 if (needsscript)
3095 /* Get the char again */
3096 if (needstype)
3097 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3099 add_jump(compiler, &common->getunicharscript, JUMP(SLJIT_FAST_CALL));
3100 if (scriptreg != TMP1)
3101 OP1(SLJIT_MOV, scriptreg, 0, TMP1, 0);
3104 if (needschar)
3105 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3107 #endif
3109 /* Generating code. */
3110 cc = ccbegin;
3111 charoffset = 0;
3112 numberofcmps = 0;
3113 #ifdef SUPPORT_UCP
3114 typeoffset = 0;
3115 #endif
3117 while (*cc != XCL_END)
3119 compares--;
3120 invertcmp = (compares == 0 && list != backtracks);
3121 jump = NULL;
3123 if (*cc == XCL_SINGLE)
3125 cc ++;
3126 #ifdef SUPPORT_UTF
3127 if (common->utf)
3129 GETCHARINC(c, cc);
3131 else
3132 #endif
3133 c = *cc++;
3135 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3137 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3138 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3139 numberofcmps++;
3141 else if (numberofcmps > 0)
3143 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3144 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3145 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3146 numberofcmps = 0;
3148 else
3150 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3151 numberofcmps = 0;
3154 else if (*cc == XCL_RANGE)
3156 cc ++;
3157 #ifdef SUPPORT_UTF
3158 if (common->utf)
3160 GETCHARINC(c, cc);
3162 else
3163 #endif
3164 c = *cc++;
3165 SET_CHAR_OFFSET(c);
3166 #ifdef SUPPORT_UTF
3167 if (common->utf)
3169 GETCHARINC(c, cc);
3171 else
3172 #endif
3173 c = *cc++;
3174 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3176 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3177 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3178 numberofcmps++;
3180 else if (numberofcmps > 0)
3182 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3183 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3184 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3185 numberofcmps = 0;
3187 else
3189 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3190 numberofcmps = 0;
3193 #ifdef SUPPORT_UCP
3194 else
3196 if (*cc == XCL_NOTPROP)
3197 invertcmp ^= 0x1;
3198 cc++;
3199 switch(*cc)
3201 case PT_ANY:
3202 if (list != backtracks)
3204 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
3205 continue;
3207 else if (cc[-1] == XCL_NOTPROP)
3208 continue;
3209 jump = JUMP(SLJIT_JUMP);
3210 break;
3212 case PT_LAMP:
3213 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
3214 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3215 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
3216 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3217 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
3218 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3219 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3220 break;
3222 case PT_GC:
3223 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
3224 SET_TYPE_OFFSET(c);
3225 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
3226 break;
3228 case PT_PC:
3229 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
3230 break;
3232 case PT_SC:
3233 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
3234 break;
3236 case PT_SPACE:
3237 case PT_PXSPACE:
3238 if (*cc == PT_SPACE)
3240 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3241 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
3243 SET_CHAR_OFFSET(9);
3244 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
3245 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3246 if (*cc == PT_SPACE)
3247 JUMPHERE(jump);
3249 SET_TYPE_OFFSET(ucp_Zl);
3250 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
3251 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3252 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3253 break;
3255 case PT_WORD:
3256 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
3257 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3258 /* ... fall through */
3260 case PT_ALNUM:
3261 SET_TYPE_OFFSET(ucp_Ll);
3262 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3263 COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3264 SET_TYPE_OFFSET(ucp_Nd);
3265 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3266 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3267 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3268 break;
3270 cc += 2;
3272 #endif
3274 if (jump != NULL)
3275 add_jump(compiler, compares > 0 ? list : backtracks, jump);
3278 if (found != NULL)
3279 set_jumps(found, LABEL());
3282 #undef SET_TYPE_OFFSET
3283 #undef SET_CHAR_OFFSET
3285 #endif
3287 static pcre_uchar *compile_char1_trypath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
3289 DEFINE_COMPILER;
3290 int length;
3291 unsigned int c, oc, bit;
3292 compare_context context;
3293 struct sljit_jump *jump[4];
3294 #ifdef SUPPORT_UTF
3295 struct sljit_label *label;
3296 #ifdef SUPPORT_UCP
3297 pcre_uchar propdata[5];
3298 #endif
3299 #endif
3301 switch(type)
3303 case OP_SOD:
3304 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3305 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3306 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3307 return cc;
3309 case OP_SOM:
3310 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3311 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3312 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3313 return cc;
3315 case OP_NOT_WORD_BOUNDARY:
3316 case OP_WORD_BOUNDARY:
3317 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
3318 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3319 return cc;
3321 case OP_NOT_DIGIT:
3322 case OP_DIGIT:
3323 detect_partial_match(common, backtracks);
3324 read_char8_type(common);
3325 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
3326 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3327 return cc;
3329 case OP_NOT_WHITESPACE:
3330 case OP_WHITESPACE:
3331 detect_partial_match(common, backtracks);
3332 read_char8_type(common);
3333 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
3334 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3335 return cc;
3337 case OP_NOT_WORDCHAR:
3338 case OP_WORDCHAR:
3339 detect_partial_match(common, backtracks);
3340 read_char8_type(common);
3341 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
3342 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3343 return cc;
3345 case OP_ANY:
3346 detect_partial_match(common, backtracks);
3347 read_char(common);
3348 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3350 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
3351 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
3352 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3353 else
3354 jump[1] = check_str_end(common);
3356 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3357 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
3358 if (jump[1] != NULL)
3359 JUMPHERE(jump[1]);
3360 JUMPHERE(jump[0]);
3362 else
3363 check_newlinechar(common, common->nltype, backtracks, TRUE);
3364 return cc;
3366 case OP_ALLANY:
3367 detect_partial_match(common, backtracks);
3368 #ifdef SUPPORT_UTF
3369 if (common->utf)
3371 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3372 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3373 #ifdef COMPILE_PCRE8
3374 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3375 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3376 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3377 #else /* COMPILE_PCRE8 */
3378 #ifdef COMPILE_PCRE16
3379 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3380 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3381 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3382 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
3383 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3384 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3385 #endif /* COMPILE_PCRE16 */
3386 #endif /* COMPILE_PCRE8 */
3387 JUMPHERE(jump[0]);
3388 return cc;
3390 #endif
3391 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3392 return cc;
3394 case OP_ANYBYTE:
3395 detect_partial_match(common, backtracks);
3396 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3397 return cc;
3399 #ifdef SUPPORT_UTF
3400 #ifdef SUPPORT_UCP
3401 case OP_NOTPROP:
3402 case OP_PROP:
3403 propdata[0] = 0;
3404 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
3405 propdata[2] = cc[0];
3406 propdata[3] = cc[1];
3407 propdata[4] = XCL_END;
3408 compile_xclass_trypath(common, propdata, backtracks);
3409 return cc + 2;
3410 #endif
3411 #endif
3413 case OP_ANYNL:
3414 detect_partial_match(common, backtracks);
3415 read_char(common);
3416 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3417 /* We don't need to handle soft partial matching case. */
3418 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
3419 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3420 else
3421 jump[1] = check_str_end(common);
3422 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3423 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3424 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3425 jump[3] = JUMP(SLJIT_JUMP);
3426 JUMPHERE(jump[0]);
3427 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
3428 JUMPHERE(jump[1]);
3429 JUMPHERE(jump[2]);
3430 JUMPHERE(jump[3]);
3431 return cc;
3433 case OP_NOT_HSPACE:
3434 case OP_HSPACE:
3435 detect_partial_match(common, backtracks);
3436 read_char(common);
3437 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
3438 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3439 return cc;
3441 case OP_NOT_VSPACE:
3442 case OP_VSPACE:
3443 detect_partial_match(common, backtracks);
3444 read_char(common);
3445 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
3446 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3447 return cc;
3449 #ifdef SUPPORT_UCP
3450 case OP_EXTUNI:
3451 detect_partial_match(common, backtracks);
3452 read_char(common);
3453 add_jump(compiler, &common->getunichartype, JUMP(SLJIT_FAST_CALL));
3454 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3455 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc));
3457 label = LABEL();
3458 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3459 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3460 read_char(common);
3461 add_jump(compiler, &common->getunichartype, JUMP(SLJIT_FAST_CALL));
3462 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3463 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label);
3465 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3466 JUMPHERE(jump[0]);
3467 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
3469 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3470 /* Since we successfully read a char above, partial matching must occure. */
3471 check_partial(common, TRUE);
3472 JUMPHERE(jump[0]);
3474 return cc;
3475 #endif
3477 case OP_EODN:
3478 /* Requires rather complex checks. */
3479 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3480 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3482 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3483 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3484 if (common->mode == JIT_COMPILE)
3485 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3486 else
3488 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
3489 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
3490 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS);
3491 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
3492 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_NOT_EQUAL);
3493 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
3494 check_partial(common, TRUE);
3495 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3496 JUMPHERE(jump[1]);
3498 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3499 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3500 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3502 else if (common->nltype == NLTYPE_FIXED)
3504 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3505 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3506 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3507 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3509 else
3511 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3512 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3513 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3514 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
3515 jump[2] = JUMP(SLJIT_C_GREATER);
3516 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
3517 /* Equal. */
3518 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3519 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3520 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3522 JUMPHERE(jump[1]);
3523 if (common->nltype == NLTYPE_ANYCRLF)
3525 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3526 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
3527 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3529 else
3531 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
3532 read_char(common);
3533 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
3534 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3535 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
3536 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3538 JUMPHERE(jump[2]);
3539 JUMPHERE(jump[3]);
3541 JUMPHERE(jump[0]);
3542 check_partial(common, FALSE);
3543 return cc;
3545 case OP_EOD:
3546 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
3547 check_partial(common, FALSE);
3548 return cc;
3550 case OP_CIRC:
3551 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3552 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3553 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
3554 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3555 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3556 return cc;
3558 case OP_CIRCM:
3559 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3560 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3561 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
3562 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3563 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3564 jump[0] = JUMP(SLJIT_JUMP);
3565 JUMPHERE(jump[1]);
3567 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3568 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3570 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3571 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
3572 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3573 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3574 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3575 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3577 else
3579 skip_char_back(common);
3580 read_char(common);
3581 check_newlinechar(common, common->nltype, backtracks, FALSE);
3583 JUMPHERE(jump[0]);
3584 return cc;
3586 case OP_DOLL:
3587 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3588 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3589 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3591 if (!common->endonly)
3592 compile_char1_trypath(common, OP_EODN, cc, backtracks);
3593 else
3595 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
3596 check_partial(common, FALSE);
3598 return cc;
3600 case OP_DOLLM:
3601 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3602 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3603 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3604 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3605 check_partial(common, FALSE);
3606 jump[0] = JUMP(SLJIT_JUMP);
3607 JUMPHERE(jump[1]);
3609 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3611 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3612 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3613 if (common->mode == JIT_COMPILE)
3614 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
3615 else
3617 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
3618 /* STR_PTR = STR_END - IN_UCHARS(1) */
3619 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3620 check_partial(common, TRUE);
3621 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3622 JUMPHERE(jump[1]);
3625 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3626 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3627 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3629 else
3631 peek_char(common);
3632 check_newlinechar(common, common->nltype, backtracks, FALSE);
3634 JUMPHERE(jump[0]);
3635 return cc;
3637 case OP_CHAR:
3638 case OP_CHARI:
3639 length = 1;
3640 #ifdef SUPPORT_UTF
3641 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
3642 #endif
3643 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
3645 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3646 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3648 context.length = IN_UCHARS(length);
3649 context.sourcereg = -1;
3650 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3651 context.ucharptr = 0;
3652 #endif
3653 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
3655 detect_partial_match(common, backtracks);
3656 read_char(common);
3657 #ifdef SUPPORT_UTF
3658 if (common->utf)
3660 GETCHAR(c, cc);
3662 else
3663 #endif
3664 c = *cc;
3665 if (type == OP_CHAR || !char_has_othercase(common, cc))
3667 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
3668 return cc + length;
3670 oc = char_othercase(common, c);
3671 bit = c ^ oc;
3672 if (ispowerof2(bit))
3674 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3675 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3676 return cc + length;
3678 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
3679 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3680 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_othercase(common, c));
3681 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3682 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
3683 return cc + length;
3685 case OP_NOT:
3686 case OP_NOTI:
3687 detect_partial_match(common, backtracks);
3688 length = 1;
3689 #ifdef SUPPORT_UTF
3690 if (common->utf)
3692 #ifdef COMPILE_PCRE8
3693 c = *cc;
3694 if (c < 128)
3696 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3697 if (type == OP_NOT || !char_has_othercase(common, cc))
3698 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3699 else
3701 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
3702 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
3703 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
3705 /* Skip the variable-length character. */
3706 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3707 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3708 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3709 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3710 JUMPHERE(jump[0]);
3711 return cc + 1;
3713 else
3714 #endif /* COMPILE_PCRE8 */
3716 GETCHARLEN(c, cc, length);
3717 read_char(common);
3720 else
3721 #endif /* SUPPORT_UTF */
3723 read_char(common);
3724 c = *cc;
3727 if (type == OP_NOT || !char_has_othercase(common, cc))
3728 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3729 else
3731 oc = char_othercase(common, c);
3732 bit = c ^ oc;
3733 if (ispowerof2(bit))
3735 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3736 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3738 else
3740 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3741 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
3744 return cc + length;
3746 case OP_CLASS:
3747 case OP_NCLASS:
3748 detect_partial_match(common, backtracks);
3749 read_char(common);
3750 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3751 jump[0] = NULL;
3752 #ifdef COMPILE_PCRE8
3753 /* This check only affects 8 bit mode. In other modes, we
3754 always need to compare the value with 255. */
3755 if (common->utf)
3756 #endif /* COMPILE_PCRE8 */
3758 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3759 if (type == OP_CLASS)
3761 add_jump(compiler, backtracks, jump[0]);
3762 jump[0] = NULL;
3765 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
3766 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3767 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3768 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3769 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3770 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3771 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
3772 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3773 if (jump[0] != NULL)
3774 JUMPHERE(jump[0]);
3775 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
3776 return cc + 32 / sizeof(pcre_uchar);
3778 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3779 case OP_XCLASS:
3780 compile_xclass_trypath(common, cc + LINK_SIZE, backtracks);
3781 return cc + GET(cc, 0) - 1;
3782 #endif
3784 case OP_REVERSE:
3785 length = GET(cc, 0);
3786 if (length == 0)
3787 return cc + LINK_SIZE;
3788 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3789 #ifdef SUPPORT_UTF
3790 if (common->utf)
3792 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3793 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
3794 label = LABEL();
3795 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
3796 skip_char_back(common);
3797 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3798 JUMPTO(SLJIT_C_NOT_ZERO, label);
3800 else
3801 #endif
3803 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3804 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3805 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
3807 check_start_used_ptr(common);
3808 return cc + LINK_SIZE;
3810 SLJIT_ASSERT_STOP();
3811 return cc;
3814 static SLJIT_INLINE pcre_uchar *compile_charn_trypath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
3816 /* This function consumes at least one input character. */
3817 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
3818 DEFINE_COMPILER;
3819 pcre_uchar *ccbegin = cc;
3820 compare_context context;
3821 int size;
3823 context.length = 0;
3826 if (cc >= ccend)
3827 break;
3829 if (*cc == OP_CHAR)
3831 size = 1;
3832 #ifdef SUPPORT_UTF
3833 if (common->utf && HAS_EXTRALEN(cc[1]))
3834 size += GET_EXTRALEN(cc[1]);
3835 #endif
3837 else if (*cc == OP_CHARI)
3839 size = 1;
3840 #ifdef SUPPORT_UTF
3841 if (common->utf)
3843 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3844 size = 0;
3845 else if (HAS_EXTRALEN(cc[1]))
3846 size += GET_EXTRALEN(cc[1]);
3848 else
3849 #endif
3850 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3851 size = 0;
3853 else
3854 size = 0;
3856 cc += 1 + size;
3857 context.length += IN_UCHARS(size);
3859 while (size > 0 && context.length <= 128);
3861 cc = ccbegin;
3862 if (context.length > 0)
3864 /* We have a fixed-length byte sequence. */
3865 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
3866 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3868 context.sourcereg = -1;
3869 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3870 context.ucharptr = 0;
3871 #endif
3872 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
3873 return cc;
3876 /* A non-fixed length character will be checked if length == 0. */
3877 return compile_char1_trypath(common, *cc, cc + 1, backtracks);
3880 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3882 DEFINE_COMPILER;
3883 int offset = GET2(cc, 1) << 1;
3885 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3886 if (!common->jscript_compat)
3888 if (backtracks == NULL)
3890 /* OVECTOR(1) contains the "string begin - 1" constant. */
3891 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
3892 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3893 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3894 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3895 return JUMP(SLJIT_C_NOT_ZERO);
3897 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3899 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3902 /* Forward definitions. */
3903 static void compile_trypath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
3904 static void compile_backtrackpath(compiler_common *, struct backtrack_common *);
3906 #define PUSH_BACKTRACK(size, ccstart, error) \
3907 do \
3909 backtrack = sljit_alloc_memory(compiler, (size)); \
3910 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3911 return error; \
3912 memset(backtrack, 0, size); \
3913 backtrack->prev = parent->top; \
3914 backtrack->cc = (ccstart); \
3915 parent->top = backtrack; \
3917 while (0)
3919 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
3920 do \
3922 backtrack = sljit_alloc_memory(compiler, (size)); \
3923 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3924 return; \
3925 memset(backtrack, 0, size); \
3926 backtrack->prev = parent->top; \
3927 backtrack->cc = (ccstart); \
3928 parent->top = backtrack; \
3930 while (0)
3932 #define BACKTRACK_AS(type) ((type *)backtrack)
3934 static pcre_uchar *compile_ref_trypath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
3936 DEFINE_COMPILER;
3937 int offset = GET2(cc, 1) << 1;
3938 struct sljit_jump *jump = NULL;
3939 struct sljit_jump *partial;
3940 struct sljit_jump *nopartial;
3942 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3943 /* OVECTOR(1) contains the "string begin - 1" constant. */
3944 if (withchecks && !common->jscript_compat)
3945 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3947 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3948 if (common->utf && *cc == OP_REFI)
3950 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
3951 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3952 if (withchecks)
3953 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
3955 /* Needed to save important temporary registers. */
3956 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
3957 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
3958 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
3959 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
3960 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3961 if (common->mode == JIT_COMPILE)
3962 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
3963 else
3965 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
3966 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3967 check_partial(common, FALSE);
3968 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3969 JUMPHERE(nopartial);
3971 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
3973 else
3974 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3976 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
3977 if (withchecks)
3978 jump = JUMP(SLJIT_C_ZERO);
3980 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3981 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
3982 if (common->mode == JIT_COMPILE)
3983 add_jump(compiler, backtracks, partial);
3985 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3986 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3988 if (common->mode != JIT_COMPILE)
3990 nopartial = JUMP(SLJIT_JUMP);
3991 JUMPHERE(partial);
3992 /* TMP2 -= STR_END - STR_PTR */
3993 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
3994 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
3995 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
3996 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
3997 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3998 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3999 JUMPHERE(partial);
4000 check_partial(common, FALSE);
4001 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4002 JUMPHERE(nopartial);
4006 if (jump != NULL)
4008 if (emptyfail)
4009 add_jump(compiler, backtracks, jump);
4010 else
4011 JUMPHERE(jump);
4013 return cc + 1 + IMM2_SIZE;
4016 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4018 DEFINE_COMPILER;
4019 backtrack_common *backtrack;
4020 pcre_uchar type;
4021 struct sljit_label *label;
4022 struct sljit_jump *zerolength;
4023 struct sljit_jump *jump = NULL;
4024 pcre_uchar *ccbegin = cc;
4025 int min = 0, max = 0;
4026 BOOL minimize;
4028 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
4030 type = cc[1 + IMM2_SIZE];
4031 minimize = (type & 0x1) != 0;
4032 switch(type)
4034 case OP_CRSTAR:
4035 case OP_CRMINSTAR:
4036 min = 0;
4037 max = 0;
4038 cc += 1 + IMM2_SIZE + 1;
4039 break;
4040 case OP_CRPLUS:
4041 case OP_CRMINPLUS:
4042 min = 1;
4043 max = 0;
4044 cc += 1 + IMM2_SIZE + 1;
4045 break;
4046 case OP_CRQUERY:
4047 case OP_CRMINQUERY:
4048 min = 0;
4049 max = 1;
4050 cc += 1 + IMM2_SIZE + 1;
4051 break;
4052 case OP_CRRANGE:
4053 case OP_CRMINRANGE:
4054 min = GET2(cc, 1 + IMM2_SIZE + 1);
4055 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
4056 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
4057 break;
4058 default:
4059 SLJIT_ASSERT_STOP();
4060 break;
4063 if (!minimize)
4065 if (min == 0)
4067 allocate_stack(common, 2);
4068 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4069 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4070 /* Temporary release of STR_PTR. */
4071 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4072 zerolength = compile_ref_checks(common, ccbegin, NULL);
4073 /* Restore if not zero length. */
4074 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4076 else
4078 allocate_stack(common, 1);
4079 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4080 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4083 if (min > 1 || max > 1)
4084 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4086 label = LABEL();
4087 compile_ref_trypath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
4089 if (min > 1 || max > 1)
4091 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4092 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4093 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4094 if (min > 1)
4095 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
4096 if (max > 1)
4098 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
4099 allocate_stack(common, 1);
4100 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4101 JUMPTO(SLJIT_JUMP, label);
4102 JUMPHERE(jump);
4106 if (max == 0)
4108 /* Includes min > 1 case as well. */
4109 allocate_stack(common, 1);
4110 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4111 JUMPTO(SLJIT_JUMP, label);
4114 JUMPHERE(zerolength);
4115 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
4117 decrease_call_count(common);
4118 return cc;
4121 allocate_stack(common, 2);
4122 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4123 if (type != OP_CRMINSTAR)
4124 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4126 if (min == 0)
4128 zerolength = compile_ref_checks(common, ccbegin, NULL);
4129 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4130 jump = JUMP(SLJIT_JUMP);
4132 else
4133 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4135 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
4136 if (max > 0)
4137 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
4139 compile_ref_trypath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
4140 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4142 if (min > 1)
4144 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4145 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4146 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4147 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->trypath);
4149 else if (max > 0)
4150 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4152 if (jump != NULL)
4153 JUMPHERE(jump);
4154 JUMPHERE(zerolength);
4156 decrease_call_count(common);
4157 return cc;
4160 static SLJIT_INLINE pcre_uchar *compile_recurse_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4162 DEFINE_COMPILER;
4163 backtrack_common *backtrack;
4164 recurse_entry *entry = common->entries;
4165 recurse_entry *prev = NULL;
4166 int start = GET(cc, 1);
4168 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
4169 while (entry != NULL)
4171 if (entry->start == start)
4172 break;
4173 prev = entry;
4174 entry = entry->next;
4177 if (entry == NULL)
4179 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
4180 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4181 return NULL;
4182 entry->next = NULL;
4183 entry->entry = NULL;
4184 entry->calls = NULL;
4185 entry->start = start;
4187 if (prev != NULL)
4188 prev->next = entry;
4189 else
4190 common->entries = entry;
4193 if (common->has_set_som && common->mark_ptr != 0)
4195 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
4196 allocate_stack(common, 2);
4197 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
4198 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4199 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4201 else if (common->has_set_som || common->mark_ptr != 0)
4203 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
4204 allocate_stack(common, 1);
4205 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4208 if (entry->entry == NULL)
4209 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
4210 else
4211 JUMPTO(SLJIT_FAST_CALL, entry->entry);
4212 /* Leave if the match is failed. */
4213 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4214 return cc + 1 + LINK_SIZE;
4217 static pcre_uchar *compile_assert_trypath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
4219 DEFINE_COMPILER;
4220 int framesize;
4221 int localptr;
4222 backtrack_common altbacktrack;
4223 pcre_uchar *ccbegin;
4224 pcre_uchar opcode;
4225 pcre_uchar bra = OP_BRA;
4226 jump_list *tmp = NULL;
4227 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
4228 jump_list **found;
4229 /* Saving previous accept variables. */
4230 struct sljit_label *save_leavelabel = common->leavelabel;
4231 struct sljit_label *save_acceptlabel = common->acceptlabel;
4232 jump_list *save_leave = common->leave;
4233 jump_list *save_accept = common->accept;
4234 struct sljit_jump *jump;
4235 struct sljit_jump *brajump = NULL;
4237 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4239 SLJIT_ASSERT(!conditional);
4240 bra = *cc;
4241 cc++;
4243 localptr = PRIV_DATA(cc);
4244 SLJIT_ASSERT(localptr != 0);
4245 framesize = get_framesize(common, cc, FALSE);
4246 backtrack->framesize = framesize;
4247 backtrack->localptr = localptr;
4248 opcode = *cc;
4249 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
4250 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
4251 ccbegin = cc;
4252 cc += GET(cc, 1);
4254 if (bra == OP_BRAMINZERO)
4256 /* This is a braminzero backtrack path. */
4257 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4258 free_stack(common, 1);
4259 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4262 if (framesize < 0)
4264 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4265 allocate_stack(common, 1);
4266 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4268 else
4270 allocate_stack(common, framesize + 2);
4271 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4272 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
4273 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4274 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4275 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4276 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
4279 memset(&altbacktrack, 0, sizeof(backtrack_common));
4280 common->leavelabel = NULL;
4281 common->leave = NULL;
4282 while (1)
4284 common->acceptlabel = NULL;
4285 common->accept = NULL;
4286 altbacktrack.top = NULL;
4287 altbacktrack.topbacktracks = NULL;
4289 if (*ccbegin == OP_ALT)
4290 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4292 altbacktrack.cc = ccbegin;
4293 compile_trypath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
4294 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4296 common->leavelabel = save_leavelabel;
4297 common->acceptlabel = save_acceptlabel;
4298 common->leave = save_leave;
4299 common->accept = save_accept;
4300 return NULL;
4302 common->acceptlabel = LABEL();
4303 if (common->accept != NULL)
4304 set_jumps(common->accept, common->acceptlabel);
4306 /* Reset stack. */
4307 if (framesize < 0)
4308 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4309 else {
4310 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
4312 /* We don't need to keep the STR_PTR, only the previous localptr. */
4313 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
4315 else
4317 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4318 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4322 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
4324 /* We know that STR_PTR was stored on the top of the stack. */
4325 if (conditional)
4326 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4327 else if (bra == OP_BRAZERO)
4329 if (framesize < 0)
4330 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4331 else
4333 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4334 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w));
4335 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4337 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4338 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4340 else if (framesize >= 0)
4342 /* For OP_BRA and OP_BRAMINZERO. */
4343 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4346 add_jump(compiler, found, JUMP(SLJIT_JUMP));
4348 compile_backtrackpath(common, altbacktrack.top);
4349 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4351 common->leavelabel = save_leavelabel;
4352 common->acceptlabel = save_acceptlabel;
4353 common->leave = save_leave;
4354 common->accept = save_accept;
4355 return NULL;
4357 set_jumps(altbacktrack.topbacktracks, LABEL());
4359 if (*cc != OP_ALT)
4360 break;
4362 ccbegin = cc;
4363 cc += GET(cc, 1);
4365 /* None of them matched. */
4366 if (common->leave != NULL)
4367 set_jumps(common->leave, LABEL());
4369 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
4371 /* Assert is failed. */
4372 if (conditional || bra == OP_BRAZERO)
4373 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4375 if (framesize < 0)
4377 /* The topmost item should be 0. */
4378 if (bra == OP_BRAZERO)
4379 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4380 else
4381 free_stack(common, 1);
4383 else
4385 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4386 /* The topmost item should be 0. */
4387 if (bra == OP_BRAZERO)
4389 free_stack(common, framesize + 1);
4390 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4392 else
4393 free_stack(common, framesize + 2);
4394 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4396 jump = JUMP(SLJIT_JUMP);
4397 if (bra != OP_BRAZERO)
4398 add_jump(compiler, target, jump);
4400 /* Assert is successful. */
4401 set_jumps(tmp, LABEL());
4402 if (framesize < 0)
4404 /* We know that STR_PTR was stored on the top of the stack. */
4405 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4406 /* Keep the STR_PTR on the top of the stack. */
4407 if (bra == OP_BRAZERO)
4408 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4409 else if (bra == OP_BRAMINZERO)
4411 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4412 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4415 else
4417 if (bra == OP_BRA)
4419 /* We don't need to keep the STR_PTR, only the previous localptr. */
4420 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
4421 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4423 else
4425 /* We don't need to keep the STR_PTR, only the previous localptr. */
4426 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w));
4427 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4428 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
4432 if (bra == OP_BRAZERO)
4434 backtrack->trypath = LABEL();
4435 sljit_set_label(jump, backtrack->trypath);
4437 else if (bra == OP_BRAMINZERO)
4439 JUMPTO(SLJIT_JUMP, backtrack->trypath);
4440 JUMPHERE(brajump);
4441 if (framesize >= 0)
4443 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4444 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4445 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4447 set_jumps(backtrack->common.topbacktracks, LABEL());
4450 else
4452 /* AssertNot is successful. */
4453 if (framesize < 0)
4455 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4456 if (bra != OP_BRA)
4457 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4458 else
4459 free_stack(common, 1);
4461 else
4463 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4464 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4465 /* The topmost item should be 0. */
4466 if (bra != OP_BRA)
4468 free_stack(common, framesize + 1);
4469 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4471 else
4472 free_stack(common, framesize + 2);
4473 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4476 if (bra == OP_BRAZERO)
4477 backtrack->trypath = LABEL();
4478 else if (bra == OP_BRAMINZERO)
4480 JUMPTO(SLJIT_JUMP, backtrack->trypath);
4481 JUMPHERE(brajump);
4484 if (bra != OP_BRA)
4486 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
4487 set_jumps(backtrack->common.topbacktracks, LABEL());
4488 backtrack->common.topbacktracks = NULL;
4492 common->leavelabel = save_leavelabel;
4493 common->acceptlabel = save_acceptlabel;
4494 common->leave = save_leave;
4495 common->accept = save_accept;
4496 return cc + 1 + LINK_SIZE;
4499 static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table)
4501 int condition = FALSE;
4502 pcre_uchar *slotA = name_table;
4503 pcre_uchar *slotB;
4504 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
4505 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
4506 sljit_w no_capture;
4507 int i;
4509 locals += refno & 0xff;
4510 refno >>= 8;
4511 no_capture = locals[1];
4513 for (i = 0; i < name_count; i++)
4515 if (GET2(slotA, 0) == refno) break;
4516 slotA += name_entry_size;
4519 if (i < name_count)
4521 /* Found a name for the number - there can be only one; duplicate names
4522 for different numbers are allowed, but not vice versa. First scan down
4523 for duplicates. */
4525 slotB = slotA;
4526 while (slotB > name_table)
4528 slotB -= name_entry_size;
4529 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4531 condition = locals[GET2(slotB, 0) << 1] != no_capture;
4532 if (condition) break;
4534 else break;
4537 /* Scan up for duplicates */
4538 if (!condition)
4540 slotB = slotA;
4541 for (i++; i < name_count; i++)
4543 slotB += name_entry_size;
4544 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4546 condition = locals[GET2(slotB, 0) << 1] != no_capture;
4547 if (condition) break;
4549 else break;
4553 return condition;
4556 static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table)
4558 int condition = FALSE;
4559 pcre_uchar *slotA = name_table;
4560 pcre_uchar *slotB;
4561 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
4562 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
4563 sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)];
4564 int i;
4566 for (i = 0; i < name_count; i++)
4568 if (GET2(slotA, 0) == recno) break;
4569 slotA += name_entry_size;
4572 if (i < name_count)
4574 /* Found a name for the number - there can be only one; duplicate
4575 names for different numbers are allowed, but not vice versa. First
4576 scan down for duplicates. */
4578 slotB = slotA;
4579 while (slotB > name_table)
4581 slotB -= name_entry_size;
4582 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4584 condition = GET2(slotB, 0) == group_num;
4585 if (condition) break;
4587 else break;
4590 /* Scan up for duplicates */
4591 if (!condition)
4593 slotB = slotA;
4594 for (i++; i < name_count; i++)
4596 slotB += name_entry_size;
4597 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4599 condition = GET2(slotB, 0) == group_num;
4600 if (condition) break;
4602 else break;
4606 return condition;
4610 Handling bracketed expressions is probably the most complex part.
4612 Stack layout naming characters:
4613 S - Push the current STR_PTR
4614 0 - Push a 0 (NULL)
4615 A - Push the current STR_PTR. Needed for restoring the STR_PTR
4616 before the next alternative. Not pushed if there are no alternatives.
4617 M - Any values pushed by the current alternative. Can be empty, or anything.
4618 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
4619 L - Push the previous local (pointed by localptr) to the stack
4620 () - opional values stored on the stack
4621 ()* - optonal, can be stored multiple times
4623 The following list shows the regular expression templates, their PCRE byte codes
4624 and stack layout supported by pcre-sljit.
4626 (?:) OP_BRA | OP_KET A M
4627 () OP_CBRA | OP_KET C M
4628 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
4629 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
4630 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
4631 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
4632 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
4633 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
4634 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
4635 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
4636 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
4637 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
4638 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
4639 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
4640 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
4641 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
4642 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
4643 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
4644 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
4645 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
4646 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
4647 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
4650 Stack layout naming characters:
4651 A - Push the alternative index (starting from 0) on the stack.
4652 Not pushed if there is no alternatives.
4653 M - Any values pushed by the current alternative. Can be empty, or anything.
4655 The next list shows the possible content of a bracket:
4656 (|) OP_*BRA | OP_ALT ... M A
4657 (?()|) OP_*COND | OP_ALT M A
4658 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
4659 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
4660 Or nothing, if trace is unnecessary
4663 static pcre_uchar *compile_bracket_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4665 DEFINE_COMPILER;
4666 backtrack_common *backtrack;
4667 pcre_uchar opcode;
4668 int localptr = 0;
4669 int offset = 0;
4670 int stacksize;
4671 pcre_uchar *ccbegin;
4672 pcre_uchar *trypath;
4673 pcre_uchar bra = OP_BRA;
4674 pcre_uchar ket;
4675 assert_backtrack *assert;
4676 BOOL has_alternatives;
4677 struct sljit_jump *jump;
4678 struct sljit_jump *skip;
4679 struct sljit_label *rmaxlabel = NULL;
4680 struct sljit_jump *braminzerojump = NULL;
4682 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
4684 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4686 bra = *cc;
4687 cc++;
4688 opcode = *cc;
4691 opcode = *cc;
4692 ccbegin = cc;
4693 trypath = ccbegin + 1 + LINK_SIZE;
4695 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
4697 /* Drop this bracket_backtrack. */
4698 parent->top = backtrack->prev;
4699 return bracketend(cc);
4702 ket = *(bracketend(cc) - 1 - LINK_SIZE);
4703 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
4704 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
4705 cc += GET(cc, 1);
4707 has_alternatives = *cc == OP_ALT;
4708 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
4710 has_alternatives = (*trypath == OP_RREF) ? FALSE : TRUE;
4711 if (*trypath == OP_NRREF)
4713 stacksize = GET2(trypath, 1);
4714 if (common->currententry == NULL || stacksize == RREF_ANY)
4715 has_alternatives = FALSE;
4716 else if (common->currententry->start == 0)
4717 has_alternatives = stacksize != 0;
4718 else
4719 has_alternatives = stacksize != GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4723 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
4724 opcode = OP_SCOND;
4725 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
4726 opcode = OP_ONCE;
4728 if (opcode == OP_CBRA || opcode == OP_SCBRA)
4730 /* Capturing brackets has a pre-allocated space. */
4731 offset = GET2(ccbegin, 1 + LINK_SIZE);
4732 localptr = OVECTOR_PRIV(offset);
4733 offset <<= 1;
4734 BACKTRACK_AS(bracket_backtrack)->localptr = localptr;
4735 trypath += IMM2_SIZE;
4737 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
4739 /* Other brackets simply allocate the next entry. */
4740 localptr = PRIV_DATA(ccbegin);
4741 SLJIT_ASSERT(localptr != 0);
4742 BACKTRACK_AS(bracket_backtrack)->localptr = localptr;
4743 if (opcode == OP_ONCE)
4744 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
4747 /* Instructions before the first alternative. */
4748 stacksize = 0;
4749 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4750 stacksize++;
4751 if (bra == OP_BRAZERO)
4752 stacksize++;
4754 if (stacksize > 0)
4755 allocate_stack(common, stacksize);
4757 stacksize = 0;
4758 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4760 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4761 stacksize++;
4764 if (bra == OP_BRAZERO)
4765 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
4767 if (bra == OP_BRAMINZERO)
4769 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
4770 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4771 if (ket != OP_KETRMIN)
4773 free_stack(common, 1);
4774 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4776 else
4778 if (opcode == OP_ONCE || opcode >= OP_SBRA)
4780 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4781 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4782 /* Nothing stored during the first run. */
4783 skip = JUMP(SLJIT_JUMP);
4784 JUMPHERE(jump);
4785 /* Checking zero-length iteration. */
4786 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
4788 /* When we come from outside, localptr contains the previous STR_PTR. */
4789 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4791 else
4793 /* Except when the whole stack frame must be saved. */
4794 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4795 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_w));
4797 JUMPHERE(skip);
4799 else
4801 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4802 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4803 JUMPHERE(jump);
4808 if (ket == OP_KETRMIN)
4809 BACKTRACK_AS(bracket_backtrack)->recursivetrypath = LABEL();
4811 if (ket == OP_KETRMAX)
4813 rmaxlabel = LABEL();
4814 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
4815 BACKTRACK_AS(bracket_backtrack)->alttrypath = rmaxlabel;
4818 /* Handling capturing brackets and alternatives. */
4819 if (opcode == OP_ONCE)
4821 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
4823 /* Neither capturing brackets nor recursions are not found in the block. */
4824 if (ket == OP_KETRMIN)
4826 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4827 allocate_stack(common, 2);
4828 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4829 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4830 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4832 else if (ket == OP_KETRMAX || has_alternatives)
4834 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4835 allocate_stack(common, 1);
4836 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4838 else
4839 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4841 else
4843 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
4845 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
4846 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4847 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
4848 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4849 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4850 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4851 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
4853 else
4855 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
4856 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4857 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
4858 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4859 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4860 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
4864 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
4866 /* Saving the previous values. */
4867 allocate_stack(common, 3);
4868 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4869 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4870 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4871 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4872 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4873 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4874 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
4876 else if (opcode == OP_SBRA || opcode == OP_SCOND)
4878 /* Saving the previous value. */
4879 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4880 allocate_stack(common, 1);
4881 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4882 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4884 else if (has_alternatives)
4886 /* Pushing the starting string pointer. */
4887 allocate_stack(common, 1);
4888 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4891 /* Generating code for the first alternative. */
4892 if (opcode == OP_COND || opcode == OP_SCOND)
4894 if (*trypath == OP_CREF)
4896 SLJIT_ASSERT(has_alternatives);
4897 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
4898 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(trypath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4899 trypath += 1 + IMM2_SIZE;
4901 else if (*trypath == OP_NCREF)
4903 SLJIT_ASSERT(has_alternatives);
4904 stacksize = GET2(trypath, 1);
4905 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4907 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4908 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4909 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4910 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_w)));
4911 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
4912 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4913 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
4914 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4915 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4917 JUMPHERE(jump);
4918 trypath += 1 + IMM2_SIZE;
4920 else if (*trypath == OP_RREF || *trypath == OP_NRREF)
4922 /* Never has other case. */
4923 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
4925 stacksize = GET2(trypath, 1);
4926 if (common->currententry == NULL)
4927 stacksize = 0;
4928 else if (stacksize == RREF_ANY)
4929 stacksize = 1;
4930 else if (common->currententry->start == 0)
4931 stacksize = stacksize == 0;
4932 else
4933 stacksize = stacksize == GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4935 if (*trypath == OP_RREF || stacksize || common->currententry == NULL)
4937 SLJIT_ASSERT(!has_alternatives);
4938 if (stacksize != 0)
4939 trypath += 1 + IMM2_SIZE;
4940 else
4942 if (*cc == OP_ALT)
4944 trypath = cc + 1 + LINK_SIZE;
4945 cc += GET(cc, 1);
4947 else
4948 trypath = cc;
4951 else
4953 SLJIT_ASSERT(has_alternatives);
4955 stacksize = GET2(trypath, 1);
4956 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4957 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4958 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4959 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
4960 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
4961 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
4962 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4963 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
4964 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4965 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4966 trypath += 1 + IMM2_SIZE;
4969 else
4971 SLJIT_ASSERT(has_alternatives && *trypath >= OP_ASSERT && *trypath <= OP_ASSERTBACK_NOT);
4972 /* Similar code as PUSH_BACKTRACK macro. */
4973 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
4974 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4975 return NULL;
4976 memset(assert, 0, sizeof(assert_backtrack));
4977 assert->common.cc = trypath;
4978 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
4979 trypath = compile_assert_trypath(common, trypath, assert, TRUE);
4983 compile_trypath(common, trypath, cc, backtrack);
4984 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4985 return NULL;
4987 if (opcode == OP_ONCE)
4989 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
4991 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4992 /* TMP2 which is set here used by OP_KETRMAX below. */
4993 if (ket == OP_KETRMAX)
4994 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
4995 else if (ket == OP_KETRMIN)
4997 /* Move the STR_PTR to the localptr. */
4998 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
5001 else
5003 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
5004 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_w));
5005 if (ket == OP_KETRMAX)
5007 /* TMP2 which is set here used by OP_KETRMAX below. */
5008 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5013 stacksize = 0;
5014 if (ket != OP_KET || bra != OP_BRA)
5015 stacksize++;
5016 if (has_alternatives && opcode != OP_ONCE)
5017 stacksize++;
5019 if (stacksize > 0)
5020 allocate_stack(common, stacksize);
5022 stacksize = 0;
5023 if (ket != OP_KET)
5025 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5026 stacksize++;
5028 else if (bra != OP_BRA)
5030 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5031 stacksize++;
5034 if (has_alternatives)
5036 if (opcode != OP_ONCE)
5037 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5038 if (ket != OP_KETRMAX)
5039 BACKTRACK_AS(bracket_backtrack)->alttrypath = LABEL();
5042 /* Must be after the trypath label. */
5043 if (offset != 0)
5045 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5046 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5047 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
5050 if (ket == OP_KETRMAX)
5052 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5054 if (has_alternatives)
5055 BACKTRACK_AS(bracket_backtrack)->alttrypath = LABEL();
5056 /* Checking zero-length iteration. */
5057 if (opcode != OP_ONCE)
5059 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0, rmaxlabel);
5060 /* Drop STR_PTR for greedy plus quantifier. */
5061 if (bra != OP_BRAZERO)
5062 free_stack(common, 1);
5064 else
5065 /* TMP2 must contain the starting STR_PTR. */
5066 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
5068 else
5069 JUMPTO(SLJIT_JUMP, rmaxlabel);
5070 BACKTRACK_AS(bracket_backtrack)->recursivetrypath = LABEL();
5073 if (bra == OP_BRAZERO)
5074 BACKTRACK_AS(bracket_backtrack)->zerotrypath = LABEL();
5076 if (bra == OP_BRAMINZERO)
5078 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
5079 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->trypath);
5080 if (braminzerojump != NULL)
5082 JUMPHERE(braminzerojump);
5083 /* We need to release the end pointer to perform the
5084 backtrack for the zero-length iteration. When
5085 framesize is < 0, OP_ONCE will do the release itself. */
5086 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
5088 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5089 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5091 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
5092 free_stack(common, 1);
5094 /* Continue to the normal backtrack. */
5097 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
5098 decrease_call_count(common);
5100 /* Skip the other alternatives. */
5101 while (*cc == OP_ALT)
5102 cc += GET(cc, 1);
5103 cc += 1 + LINK_SIZE;
5104 return cc;
5107 static pcre_uchar *compile_bracketpos_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5109 DEFINE_COMPILER;
5110 backtrack_common *backtrack;
5111 pcre_uchar opcode;
5112 int localptr;
5113 int cbraprivptr = 0;
5114 int framesize;
5115 int stacksize;
5116 int offset = 0;
5117 BOOL zero = FALSE;
5118 pcre_uchar *ccbegin = NULL;
5119 int stack;
5120 struct sljit_label *loop = NULL;
5121 struct jump_list *emptymatch = NULL;
5123 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
5124 if (*cc == OP_BRAPOSZERO)
5126 zero = TRUE;
5127 cc++;
5130 opcode = *cc;
5131 localptr = PRIV_DATA(cc);
5132 SLJIT_ASSERT(localptr != 0);
5133 BACKTRACK_AS(bracketpos_backtrack)->localptr = localptr;
5134 switch(opcode)
5136 case OP_BRAPOS:
5137 case OP_SBRAPOS:
5138 ccbegin = cc + 1 + LINK_SIZE;
5139 break;
5141 case OP_CBRAPOS:
5142 case OP_SCBRAPOS:
5143 offset = GET2(cc, 1 + LINK_SIZE);
5144 cbraprivptr = OVECTOR_PRIV(offset);
5145 offset <<= 1;
5146 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
5147 break;
5149 default:
5150 SLJIT_ASSERT_STOP();
5151 break;
5154 framesize = get_framesize(common, cc, FALSE);
5155 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
5156 if (framesize < 0)
5158 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
5159 if (!zero)
5160 stacksize++;
5161 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
5162 allocate_stack(common, stacksize);
5163 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
5165 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5167 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5168 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5169 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5170 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5172 else
5173 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5175 if (!zero)
5176 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
5178 else
5180 stacksize = framesize + 1;
5181 if (!zero)
5182 stacksize++;
5183 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5184 stacksize++;
5185 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
5186 allocate_stack(common, stacksize);
5188 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5189 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
5190 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
5191 stack = 0;
5192 if (!zero)
5194 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
5195 stack++;
5197 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5199 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
5200 stack++;
5202 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
5203 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
5206 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5207 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5209 loop = LABEL();
5210 while (*cc != OP_KETRPOS)
5212 backtrack->top = NULL;
5213 backtrack->topbacktracks = NULL;
5214 cc += GET(cc, 1);
5216 compile_trypath(common, ccbegin, cc, backtrack);
5217 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5218 return NULL;
5220 if (framesize < 0)
5222 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5224 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5226 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5227 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5228 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5229 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5231 else
5233 if (opcode == OP_SBRAPOS)
5234 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5235 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5238 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5239 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5241 if (!zero)
5242 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5244 else
5246 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5248 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, stacksize * sizeof(sljit_w));
5249 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5250 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5251 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5252 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5254 else
5256 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5257 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_w));
5258 if (opcode == OP_SBRAPOS)
5259 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
5260 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w), STR_PTR, 0);
5263 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5264 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5266 if (!zero)
5268 if (framesize < 0)
5269 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5270 else
5271 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5274 JUMPTO(SLJIT_JUMP, loop);
5275 flush_stubs(common);
5277 compile_backtrackpath(common, backtrack->top);
5278 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5279 return NULL;
5280 set_jumps(backtrack->topbacktracks, LABEL());
5282 if (framesize < 0)
5284 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5285 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5286 else
5287 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5289 else
5291 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5293 /* Last alternative. */
5294 if (*cc == OP_KETRPOS)
5295 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5296 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5298 else
5300 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5301 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
5305 if (*cc == OP_KETRPOS)
5306 break;
5307 ccbegin = cc + 1 + LINK_SIZE;
5310 backtrack->topbacktracks = NULL;
5311 if (!zero)
5313 if (framesize < 0)
5314 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
5315 else /* TMP2 is set to [localptr] above. */
5316 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0));
5319 /* None of them matched. */
5320 set_jumps(emptymatch, LABEL());
5321 decrease_call_count(common);
5322 return cc + 1 + LINK_SIZE;
5325 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
5327 int class_len;
5329 *opcode = *cc;
5330 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
5332 cc++;
5333 *type = OP_CHAR;
5335 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
5337 cc++;
5338 *type = OP_CHARI;
5339 *opcode -= OP_STARI - OP_STAR;
5341 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
5343 cc++;
5344 *type = OP_NOT;
5345 *opcode -= OP_NOTSTAR - OP_STAR;
5347 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
5349 cc++;
5350 *type = OP_NOTI;
5351 *opcode -= OP_NOTSTARI - OP_STAR;
5353 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
5355 cc++;
5356 *opcode -= OP_TYPESTAR - OP_STAR;
5357 *type = 0;
5359 else
5361 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
5362 *type = *opcode;
5363 cc++;
5364 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
5365 *opcode = cc[class_len - 1];
5366 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
5368 *opcode -= OP_CRSTAR - OP_STAR;
5369 if (end != NULL)
5370 *end = cc + class_len;
5372 else
5374 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
5375 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
5376 *arg2 = GET2(cc, class_len);
5378 if (*arg2 == 0)
5380 SLJIT_ASSERT(*arg1 != 0);
5381 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
5383 if (*arg1 == *arg2)
5384 *opcode = OP_EXACT;
5386 if (end != NULL)
5387 *end = cc + class_len + 2 * IMM2_SIZE;
5389 return cc;
5392 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
5394 *arg1 = GET2(cc, 0);
5395 cc += IMM2_SIZE;
5398 if (*type == 0)
5400 *type = *cc;
5401 if (end != NULL)
5402 *end = next_opcode(common, cc);
5403 cc++;
5404 return cc;
5407 if (end != NULL)
5409 *end = cc + 1;
5410 #ifdef SUPPORT_UTF
5411 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
5412 #endif
5414 return cc;
5417 static pcre_uchar *compile_iterator_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5419 DEFINE_COMPILER;
5420 backtrack_common *backtrack;
5421 pcre_uchar opcode;
5422 pcre_uchar type;
5423 int arg1 = -1, arg2 = -1;
5424 pcre_uchar* end;
5425 jump_list *nomatch = NULL;
5426 struct sljit_jump *jump = NULL;
5427 struct sljit_label *label;
5429 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5431 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
5433 switch(opcode)
5435 case OP_STAR:
5436 case OP_PLUS:
5437 case OP_UPTO:
5438 case OP_CRRANGE:
5439 if (type == OP_ANYNL || type == OP_EXTUNI)
5441 if (opcode == OP_STAR || opcode == OP_UPTO)
5443 allocate_stack(common, 2);
5444 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5445 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5447 else
5449 allocate_stack(common, 1);
5450 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5452 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
5453 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5455 label = LABEL();
5456 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
5457 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
5459 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5460 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5461 if (opcode == OP_CRRANGE && arg2 > 0)
5462 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
5463 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
5464 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
5465 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5468 allocate_stack(common, 1);
5469 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5470 JUMPTO(SLJIT_JUMP, label);
5471 if (jump != NULL)
5472 JUMPHERE(jump);
5474 else
5476 if (opcode == OP_PLUS)
5477 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
5478 allocate_stack(common, 2);
5479 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5480 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5481 label = LABEL();
5482 compile_char1_trypath(common, type, cc, &nomatch);
5483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5484 if (opcode <= OP_PLUS || (opcode == OP_CRRANGE && arg1 == 0))
5486 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5487 JUMPTO(SLJIT_JUMP, label);
5489 else
5491 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5492 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5493 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5494 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5496 set_jumps(nomatch, LABEL());
5497 if (opcode == OP_CRRANGE)
5498 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, arg2 + 1));
5499 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5501 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
5502 break;
5504 case OP_MINSTAR:
5505 case OP_MINPLUS:
5506 if (opcode == OP_MINPLUS)
5507 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
5508 allocate_stack(common, 1);
5509 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5510 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
5511 break;
5513 case OP_MINUPTO:
5514 case OP_CRMINRANGE:
5515 allocate_stack(common, 2);
5516 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5517 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5518 if (opcode == OP_CRMINRANGE)
5519 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
5520 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
5521 break;
5523 case OP_QUERY:
5524 case OP_MINQUERY:
5525 allocate_stack(common, 1);
5526 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5527 if (opcode == OP_QUERY)
5528 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
5529 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
5530 break;
5532 case OP_EXACT:
5533 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
5534 label = LABEL();
5535 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
5536 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5537 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5538 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5539 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5540 break;
5542 case OP_POSSTAR:
5543 case OP_POSPLUS:
5544 case OP_POSUPTO:
5545 if (opcode != OP_POSSTAR)
5546 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
5547 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5548 label = LABEL();
5549 compile_char1_trypath(common, type, cc, &nomatch);
5550 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5551 if (opcode != OP_POSUPTO)
5553 if (opcode == OP_POSPLUS)
5554 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2);
5555 JUMPTO(SLJIT_JUMP, label);
5557 else
5559 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5560 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5561 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5562 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5564 set_jumps(nomatch, LABEL());
5565 if (opcode == OP_POSPLUS)
5566 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2));
5567 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5568 break;
5570 case OP_POSQUERY:
5571 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5572 compile_char1_trypath(common, type, cc, &nomatch);
5573 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5574 set_jumps(nomatch, LABEL());
5575 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5576 break;
5578 default:
5579 SLJIT_ASSERT_STOP();
5580 break;
5583 decrease_call_count(common);
5584 return end;
5587 static SLJIT_INLINE pcre_uchar *compile_fail_accept_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5589 DEFINE_COMPILER;
5590 backtrack_common *backtrack;
5592 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5594 if (*cc == OP_FAIL)
5596 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
5597 return cc + 1;
5600 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
5602 /* No need to check notempty conditions. */
5603 if (common->acceptlabel == NULL)
5604 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
5605 else
5606 JUMPTO(SLJIT_JUMP, common->acceptlabel);
5607 return cc + 1;
5610 if (common->acceptlabel == NULL)
5611 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
5612 else
5613 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
5614 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5615 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
5616 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5617 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
5618 if (common->acceptlabel == NULL)
5619 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5620 else
5621 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
5622 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5623 if (common->acceptlabel == NULL)
5624 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
5625 else
5626 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
5627 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
5628 return cc + 1;
5631 static SLJIT_INLINE pcre_uchar *compile_close_trypath(compiler_common *common, pcre_uchar *cc)
5633 DEFINE_COMPILER;
5634 int offset = GET2(cc, 1);
5636 /* Data will be discarded anyway... */
5637 if (common->currententry != NULL)
5638 return cc + 1 + IMM2_SIZE;
5640 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
5641 offset <<= 1;
5642 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5643 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5644 return cc + 1 + IMM2_SIZE;
5647 static void compile_trypath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
5649 DEFINE_COMPILER;
5650 backtrack_common *backtrack;
5652 while (cc < ccend)
5654 switch(*cc)
5656 case OP_SOD:
5657 case OP_SOM:
5658 case OP_NOT_WORD_BOUNDARY:
5659 case OP_WORD_BOUNDARY:
5660 case OP_NOT_DIGIT:
5661 case OP_DIGIT:
5662 case OP_NOT_WHITESPACE:
5663 case OP_WHITESPACE:
5664 case OP_NOT_WORDCHAR:
5665 case OP_WORDCHAR:
5666 case OP_ANY:
5667 case OP_ALLANY:
5668 case OP_ANYBYTE:
5669 case OP_NOTPROP:
5670 case OP_PROP:
5671 case OP_ANYNL:
5672 case OP_NOT_HSPACE:
5673 case OP_HSPACE:
5674 case OP_NOT_VSPACE:
5675 case OP_VSPACE:
5676 case OP_EXTUNI:
5677 case OP_EODN:
5678 case OP_EOD:
5679 case OP_CIRC:
5680 case OP_CIRCM:
5681 case OP_DOLL:
5682 case OP_DOLLM:
5683 case OP_NOT:
5684 case OP_NOTI:
5685 case OP_REVERSE:
5686 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
5687 break;
5689 case OP_SET_SOM:
5690 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
5691 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5692 allocate_stack(common, 1);
5693 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
5694 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5695 cc++;
5696 break;
5698 case OP_CHAR:
5699 case OP_CHARI:
5700 if (common->mode == JIT_COMPILE)
5701 cc = compile_charn_trypath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
5702 else
5703 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
5704 break;
5706 case OP_STAR:
5707 case OP_MINSTAR:
5708 case OP_PLUS:
5709 case OP_MINPLUS:
5710 case OP_QUERY:
5711 case OP_MINQUERY:
5712 case OP_UPTO:
5713 case OP_MINUPTO:
5714 case OP_EXACT:
5715 case OP_POSSTAR:
5716 case OP_POSPLUS:
5717 case OP_POSQUERY:
5718 case OP_POSUPTO:
5719 case OP_STARI:
5720 case OP_MINSTARI:
5721 case OP_PLUSI:
5722 case OP_MINPLUSI:
5723 case OP_QUERYI:
5724 case OP_MINQUERYI:
5725 case OP_UPTOI:
5726 case OP_MINUPTOI:
5727 case OP_EXACTI:
5728 case OP_POSSTARI:
5729 case OP_POSPLUSI:
5730 case OP_POSQUERYI:
5731 case OP_POSUPTOI:
5732 case OP_NOTSTAR:
5733 case OP_NOTMINSTAR:
5734 case OP_NOTPLUS:
5735 case OP_NOTMINPLUS:
5736 case OP_NOTQUERY:
5737 case OP_NOTMINQUERY:
5738 case OP_NOTUPTO:
5739 case OP_NOTMINUPTO:
5740 case OP_NOTEXACT:
5741 case OP_NOTPOSSTAR:
5742 case OP_NOTPOSPLUS:
5743 case OP_NOTPOSQUERY:
5744 case OP_NOTPOSUPTO:
5745 case OP_NOTSTARI:
5746 case OP_NOTMINSTARI:
5747 case OP_NOTPLUSI:
5748 case OP_NOTMINPLUSI:
5749 case OP_NOTQUERYI:
5750 case OP_NOTMINQUERYI:
5751 case OP_NOTUPTOI:
5752 case OP_NOTMINUPTOI:
5753 case OP_NOTEXACTI:
5754 case OP_NOTPOSSTARI:
5755 case OP_NOTPOSPLUSI:
5756 case OP_NOTPOSQUERYI:
5757 case OP_NOTPOSUPTOI:
5758 case OP_TYPESTAR:
5759 case OP_TYPEMINSTAR:
5760 case OP_TYPEPLUS:
5761 case OP_TYPEMINPLUS:
5762 case OP_TYPEQUERY:
5763 case OP_TYPEMINQUERY:
5764 case OP_TYPEUPTO:
5765 case OP_TYPEMINUPTO:
5766 case OP_TYPEEXACT:
5767 case OP_TYPEPOSSTAR:
5768 case OP_TYPEPOSPLUS:
5769 case OP_TYPEPOSQUERY:
5770 case OP_TYPEPOSUPTO:
5771 cc = compile_iterator_trypath(common, cc, parent);
5772 break;
5774 case OP_CLASS:
5775 case OP_NCLASS:
5776 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
5777 cc = compile_iterator_trypath(common, cc, parent);
5778 else
5779 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
5780 break;
5782 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
5783 case OP_XCLASS:
5784 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
5785 cc = compile_iterator_trypath(common, cc, parent);
5786 else
5787 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
5788 break;
5789 #endif
5791 case OP_REF:
5792 case OP_REFI:
5793 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
5794 cc = compile_ref_iterator_trypath(common, cc, parent);
5795 else
5796 cc = compile_ref_trypath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
5797 break;
5799 case OP_RECURSE:
5800 cc = compile_recurse_trypath(common, cc, parent);
5801 break;
5803 case OP_ASSERT:
5804 case OP_ASSERT_NOT:
5805 case OP_ASSERTBACK:
5806 case OP_ASSERTBACK_NOT:
5807 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
5808 cc = compile_assert_trypath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
5809 break;
5811 case OP_BRAMINZERO:
5812 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
5813 cc = bracketend(cc + 1);
5814 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
5816 allocate_stack(common, 1);
5817 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5819 else
5821 allocate_stack(common, 2);
5822 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5823 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
5825 BACKTRACK_AS(braminzero_backtrack)->trypath = LABEL();
5826 if (cc[1] > OP_ASSERTBACK_NOT)
5827 decrease_call_count(common);
5828 break;
5830 case OP_ONCE:
5831 case OP_ONCE_NC:
5832 case OP_BRA:
5833 case OP_CBRA:
5834 case OP_COND:
5835 case OP_SBRA:
5836 case OP_SCBRA:
5837 case OP_SCOND:
5838 cc = compile_bracket_trypath(common, cc, parent);
5839 break;
5841 case OP_BRAZERO:
5842 if (cc[1] > OP_ASSERTBACK_NOT)
5843 cc = compile_bracket_trypath(common, cc, parent);
5844 else
5846 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
5847 cc = compile_assert_trypath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
5849 break;
5851 case OP_BRAPOS:
5852 case OP_CBRAPOS:
5853 case OP_SBRAPOS:
5854 case OP_SCBRAPOS:
5855 case OP_BRAPOSZERO:
5856 cc = compile_bracketpos_trypath(common, cc, parent);
5857 break;
5859 case OP_MARK:
5860 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
5861 SLJIT_ASSERT(common->mark_ptr != 0);
5862 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5863 allocate_stack(common, 1);
5864 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5865 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5866 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)(cc + 2));
5867 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
5868 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
5869 cc += 1 + 2 + cc[1];
5870 break;
5872 case OP_COMMIT:
5873 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
5874 cc += 1;
5875 break;
5877 case OP_FAIL:
5878 case OP_ACCEPT:
5879 case OP_ASSERT_ACCEPT:
5880 cc = compile_fail_accept_trypath(common, cc, parent);
5881 break;
5883 case OP_CLOSE:
5884 cc = compile_close_trypath(common, cc);
5885 break;
5887 case OP_SKIPZERO:
5888 cc = bracketend(cc + 1);
5889 break;
5891 default:
5892 SLJIT_ASSERT_STOP();
5893 return;
5895 if (cc == NULL)
5896 return;
5898 SLJIT_ASSERT(cc == ccend);
5901 #undef PUSH_BACKTRACK
5902 #undef PUSH_BACKTRACK_NOVALUE
5903 #undef BACKTRACK_AS
5905 #define COMPILE_BACKTRACKPATH(current) \
5906 do \
5908 compile_backtrackpath(common, (current)); \
5909 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5910 return; \
5912 while (0)
5914 #define CURRENT_AS(type) ((type *)current)
5916 static void compile_iterator_backtrackpath(compiler_common *common, struct backtrack_common *current)
5918 DEFINE_COMPILER;
5919 pcre_uchar *cc = current->cc;
5920 pcre_uchar opcode;
5921 pcre_uchar type;
5922 int arg1 = -1, arg2 = -1;
5923 struct sljit_label *label = NULL;
5924 struct sljit_jump *jump = NULL;
5925 jump_list *jumplist = NULL;
5927 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);
5929 switch(opcode)
5931 case OP_STAR:
5932 case OP_PLUS:
5933 case OP_UPTO:
5934 case OP_CRRANGE:
5935 if (type == OP_ANYNL || type == OP_EXTUNI)
5937 set_jumps(current->topbacktracks, LABEL());
5938 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5939 free_stack(common, 1);
5940 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
5942 else
5944 if (opcode <= OP_PLUS || opcode == OP_UPTO)
5945 arg2 = 0;
5946 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5947 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, arg2 + 1);
5948 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, SLJIT_IMM, 1);
5949 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5950 skip_char_back(common);
5951 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5952 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
5953 if (opcode == OP_CRRANGE)
5954 set_jumps(current->topbacktracks, LABEL());
5955 JUMPHERE(jump);
5956 free_stack(common, 2);
5957 if (opcode == OP_PLUS)
5958 set_jumps(current->topbacktracks, LABEL());
5960 break;
5962 case OP_MINSTAR:
5963 case OP_MINPLUS:
5964 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5965 compile_char1_trypath(common, type, cc, &jumplist);
5966 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5967 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
5968 set_jumps(jumplist, LABEL());
5969 free_stack(common, 1);
5970 if (opcode == OP_MINPLUS)
5971 set_jumps(current->topbacktracks, LABEL());
5972 break;
5974 case OP_MINUPTO:
5975 case OP_CRMINRANGE:
5976 if (opcode == OP_CRMINRANGE)
5978 label = LABEL();
5979 set_jumps(current->topbacktracks, label);
5981 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5982 compile_char1_trypath(common, type, cc, &jumplist);
5984 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5985 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5986 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5987 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5989 if (opcode == OP_CRMINRANGE)
5990 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label);
5992 if (opcode == OP_CRMINRANGE && arg1 == 0)
5993 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
5994 else
5995 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_backtrack)->trypath);
5997 set_jumps(jumplist, LABEL());
5998 free_stack(common, 2);
5999 break;
6001 case OP_QUERY:
6002 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6003 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6004 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
6005 jump = JUMP(SLJIT_JUMP);
6006 set_jumps(current->topbacktracks, LABEL());
6007 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6008 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6009 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6010 JUMPHERE(jump);
6011 free_stack(common, 1);
6012 break;
6014 case OP_MINQUERY:
6015 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6016 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6017 jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6018 compile_char1_trypath(common, type, cc, &jumplist);
6019 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6020 set_jumps(jumplist, LABEL());
6021 JUMPHERE(jump);
6022 free_stack(common, 1);
6023 break;
6025 case OP_EXACT:
6026 case OP_POSPLUS:
6027 set_jumps(current->topbacktracks, LABEL());
6028 break;
6030 case OP_POSSTAR:
6031 case OP_POSQUERY:
6032 case OP_POSUPTO:
6033 break;
6035 default:
6036 SLJIT_ASSERT_STOP();
6037 break;
6041 static void compile_ref_iterator_backtrackpath(compiler_common *common, struct backtrack_common *current)
6043 DEFINE_COMPILER;
6044 pcre_uchar *cc = current->cc;
6045 pcre_uchar type;
6047 type = cc[1 + IMM2_SIZE];
6048 if ((type & 0x1) == 0)
6050 set_jumps(current->topbacktracks, LABEL());
6051 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6052 free_stack(common, 1);
6053 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
6054 return;
6057 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6058 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
6059 set_jumps(current->topbacktracks, LABEL());
6060 free_stack(common, 2);
6063 static void compile_recurse_backtrackpath(compiler_common *common, struct backtrack_common *current)
6065 DEFINE_COMPILER;
6067 set_jumps(current->topbacktracks, LABEL());
6069 if (common->has_set_som && common->mark_ptr != 0)
6071 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6072 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6073 free_stack(common, 2);
6074 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
6075 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0);
6077 else if (common->has_set_som || common->mark_ptr != 0)
6079 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6080 free_stack(common, 1);
6081 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
6085 static void compile_assert_backtrackpath(compiler_common *common, struct backtrack_common *current)
6087 DEFINE_COMPILER;
6088 pcre_uchar *cc = current->cc;
6089 pcre_uchar bra = OP_BRA;
6090 struct sljit_jump *brajump = NULL;
6092 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
6093 if (*cc == OP_BRAZERO)
6095 bra = *cc;
6096 cc++;
6099 if (bra == OP_BRAZERO)
6101 SLJIT_ASSERT(current->topbacktracks == NULL);
6102 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6105 if (CURRENT_AS(assert_backtrack)->framesize < 0)
6107 set_jumps(current->topbacktracks, LABEL());
6109 if (bra == OP_BRAZERO)
6111 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6112 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->trypath);
6113 free_stack(common, 1);
6115 return;
6118 if (bra == OP_BRAZERO)
6120 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
6122 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6123 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->trypath);
6124 free_stack(common, 1);
6125 return;
6127 free_stack(common, 1);
6128 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6131 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
6133 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->localptr);
6134 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6135 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_w));
6137 set_jumps(current->topbacktracks, LABEL());
6139 else
6140 set_jumps(current->topbacktracks, LABEL());
6142 if (bra == OP_BRAZERO)
6144 /* We know there is enough place on the stack. */
6145 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
6146 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6147 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->trypath);
6148 JUMPHERE(brajump);
6152 static void compile_bracket_backtrackpath(compiler_common *common, struct backtrack_common *current)
6154 DEFINE_COMPILER;
6155 int opcode;
6156 int offset = 0;
6157 int localptr = CURRENT_AS(bracket_backtrack)->localptr;
6158 int stacksize;
6159 int count;
6160 pcre_uchar *cc = current->cc;
6161 pcre_uchar *ccbegin;
6162 pcre_uchar *ccprev;
6163 jump_list *jumplist = NULL;
6164 jump_list *jumplistitem = NULL;
6165 pcre_uchar bra = OP_BRA;
6166 pcre_uchar ket;
6167 assert_backtrack *assert;
6168 BOOL has_alternatives;
6169 struct sljit_jump *brazero = NULL;
6170 struct sljit_jump *once = NULL;
6171 struct sljit_jump *cond = NULL;
6172 struct sljit_label *rminlabel = NULL;
6174 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6176 bra = *cc;
6177 cc++;
6180 opcode = *cc;
6181 ccbegin = cc;
6182 ket = *(bracketend(ccbegin) - 1 - LINK_SIZE);
6183 cc += GET(cc, 1);
6184 has_alternatives = *cc == OP_ALT;
6185 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6186 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
6187 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6188 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
6189 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6190 opcode = OP_SCOND;
6191 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6192 opcode = OP_ONCE;
6194 if (ket == OP_KETRMAX)
6196 if (bra == OP_BRAZERO)
6198 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6199 free_stack(common, 1);
6200 brazero = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0);
6203 else if (ket == OP_KETRMIN)
6205 if (bra != OP_BRAMINZERO)
6207 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6208 if (opcode >= OP_SBRA || opcode == OP_ONCE)
6210 /* Checking zero-length iteration. */
6211 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
6212 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, CURRENT_AS(bracket_backtrack)->recursivetrypath);
6213 else
6215 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6216 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_w), CURRENT_AS(bracket_backtrack)->recursivetrypath);
6218 if (opcode != OP_ONCE)
6219 free_stack(common, 1);
6221 else
6222 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursivetrypath);
6224 rminlabel = LABEL();
6226 else if (bra == OP_BRAZERO)
6228 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6229 free_stack(common, 1);
6230 brazero = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
6233 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
6235 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
6237 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6238 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6240 once = JUMP(SLJIT_JUMP);
6242 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6244 if (has_alternatives)
6246 /* Always exactly one alternative. */
6247 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6248 free_stack(common, 1);
6250 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
6251 if (SLJIT_UNLIKELY(!jumplistitem))
6252 return;
6253 jumplist = jumplistitem;
6254 jumplistitem->next = NULL;
6255 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 1);
6258 else if (*cc == OP_ALT)
6260 /* Build a jump list. Get the last successfully matched branch index. */
6261 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6262 free_stack(common, 1);
6263 count = 1;
6266 /* Append as the last item. */
6267 if (jumplist != NULL)
6269 jumplistitem->next = sljit_alloc_memory(compiler, sizeof(jump_list));
6270 jumplistitem = jumplistitem->next;
6272 else
6274 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
6275 jumplist = jumplistitem;
6278 if (SLJIT_UNLIKELY(!jumplistitem))
6279 return;
6281 jumplistitem->next = NULL;
6282 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, count++);
6283 cc += GET(cc, 1);
6285 while (*cc == OP_ALT);
6287 cc = ccbegin + GET(ccbegin, 1);
6290 COMPILE_BACKTRACKPATH(current->top);
6291 if (current->topbacktracks)
6292 set_jumps(current->topbacktracks, LABEL());
6294 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6296 /* Conditional block always has at most one alternative. */
6297 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
6299 SLJIT_ASSERT(has_alternatives);
6300 assert = CURRENT_AS(bracket_backtrack)->u.assert;
6301 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
6303 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
6304 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6305 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
6307 cond = JUMP(SLJIT_JUMP);
6308 set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
6310 else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
6312 SLJIT_ASSERT(has_alternatives);
6313 cond = JUMP(SLJIT_JUMP);
6314 set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
6316 else
6317 SLJIT_ASSERT(!has_alternatives);
6320 if (has_alternatives)
6322 count = 1;
6325 current->top = NULL;
6326 current->topbacktracks = NULL;
6327 current->nextbacktracks = NULL;
6328 if (*cc == OP_ALT)
6330 ccprev = cc + 1 + LINK_SIZE;
6331 cc += GET(cc, 1);
6332 if (opcode != OP_COND && opcode != OP_SCOND)
6334 if (localptr != 0 && opcode != OP_ONCE)
6335 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6336 else
6337 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6339 compile_trypath(common, ccprev, cc, current);
6340 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6341 return;
6344 /* Instructions after the current alternative is succesfully matched. */
6345 /* There is a similar code in compile_bracket_trypath. */
6346 if (opcode == OP_ONCE)
6348 if (CURRENT_AS(bracket_backtrack)->u.framesize < 0)
6350 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6351 /* TMP2 which is set here used by OP_KETRMAX below. */
6352 if (ket == OP_KETRMAX)
6353 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6354 else if (ket == OP_KETRMIN)
6356 /* Move the STR_PTR to the localptr. */
6357 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
6360 else
6362 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize + 2) * sizeof(sljit_w));
6363 if (ket == OP_KETRMAX)
6365 /* TMP2 which is set here used by OP_KETRMAX below. */
6366 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6371 stacksize = 0;
6372 if (opcode != OP_ONCE)
6373 stacksize++;
6374 if (ket != OP_KET || bra != OP_BRA)
6375 stacksize++;
6377 if (stacksize > 0) {
6378 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
6379 allocate_stack(common, stacksize);
6380 else
6382 /* We know we have place at least for one item on the top of the stack. */
6383 SLJIT_ASSERT(stacksize == 1);
6384 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
6388 stacksize = 0;
6389 if (ket != OP_KET || bra != OP_BRA)
6391 if (ket != OP_KET)
6392 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6393 else
6394 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6395 stacksize++;
6398 if (opcode != OP_ONCE)
6399 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, count++);
6401 if (offset != 0)
6403 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6404 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6405 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
6408 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alttrypath);
6410 if (opcode != OP_ONCE)
6412 SLJIT_ASSERT(jumplist);
6413 JUMPHERE(jumplist->jump);
6414 jumplist = jumplist->next;
6417 COMPILE_BACKTRACKPATH(current->top);
6418 if (current->topbacktracks)
6419 set_jumps(current->topbacktracks, LABEL());
6420 SLJIT_ASSERT(!current->nextbacktracks);
6422 while (*cc == OP_ALT);
6423 SLJIT_ASSERT(!jumplist);
6425 if (cond != NULL)
6427 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
6428 assert = CURRENT_AS(bracket_backtrack)->u.assert;
6429 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
6432 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
6433 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6434 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
6436 JUMPHERE(cond);
6439 /* Free the STR_PTR. */
6440 if (localptr == 0)
6441 free_stack(common, 1);
6444 if (offset != 0)
6446 /* Using both tmp register is better for instruction scheduling. */
6447 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6448 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6449 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6450 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
6451 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(2));
6452 free_stack(common, 3);
6454 else if (opcode == OP_SBRA || opcode == OP_SCOND)
6456 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(0));
6457 free_stack(common, 1);
6459 else if (opcode == OP_ONCE)
6461 cc = ccbegin + GET(ccbegin, 1);
6462 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
6464 /* Reset head and drop saved frame. */
6465 stacksize = (ket == OP_KETRMAX || ket == OP_KETRMIN || *cc == OP_ALT) ? 2 : 1;
6466 free_stack(common, CURRENT_AS(bracket_backtrack)->u.framesize + stacksize);
6468 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
6470 /* The STR_PTR must be released. */
6471 free_stack(common, 1);
6474 JUMPHERE(once);
6475 /* Restore previous localptr */
6476 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
6477 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_w));
6478 else if (ket == OP_KETRMIN)
6480 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6481 /* See the comment below. */
6482 free_stack(common, 2);
6483 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
6487 if (ket == OP_KETRMAX)
6489 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6490 if (bra != OP_BRAZERO)
6491 free_stack(common, 1);
6492 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursivetrypath);
6493 if (bra == OP_BRAZERO)
6495 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6496 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zerotrypath);
6497 JUMPHERE(brazero);
6498 free_stack(common, 1);
6501 else if (ket == OP_KETRMIN)
6503 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6505 /* OP_ONCE removes everything in case of a backtrack, so we don't
6506 need to explicitly release the STR_PTR. The extra release would
6507 affect badly the free_stack(2) above. */
6508 if (opcode != OP_ONCE)
6509 free_stack(common, 1);
6510 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rminlabel);
6511 if (opcode == OP_ONCE)
6512 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
6513 else if (bra == OP_BRAMINZERO)
6514 free_stack(common, 1);
6516 else if (bra == OP_BRAZERO)
6518 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6519 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zerotrypath);
6520 JUMPHERE(brazero);
6524 static void compile_bracketpos_backtrackpath(compiler_common *common, struct backtrack_common *current)
6526 DEFINE_COMPILER;
6527 int offset;
6528 struct sljit_jump *jump;
6530 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
6532 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
6534 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
6535 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6536 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6537 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6538 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
6540 set_jumps(current->topbacktracks, LABEL());
6541 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
6542 return;
6545 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_backtrack)->localptr);
6546 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6548 if (current->topbacktracks)
6550 jump = JUMP(SLJIT_JUMP);
6551 set_jumps(current->topbacktracks, LABEL());
6552 /* Drop the stack frame. */
6553 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
6554 JUMPHERE(jump);
6556 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_backtrack)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_w));
6559 static void compile_braminzero_backtrackpath(compiler_common *common, struct backtrack_common *current)
6561 assert_backtrack backtrack;
6563 current->top = NULL;
6564 current->topbacktracks = NULL;
6565 current->nextbacktracks = NULL;
6566 if (current->cc[1] > OP_ASSERTBACK_NOT)
6568 /* Manual call of compile_bracket_trypath and compile_bracket_backtrackpath. */
6569 compile_bracket_trypath(common, current->cc, current);
6570 compile_bracket_backtrackpath(common, current->top);
6572 else
6574 memset(&backtrack, 0, sizeof(backtrack));
6575 backtrack.common.cc = current->cc;
6576 backtrack.trypath = CURRENT_AS(braminzero_backtrack)->trypath;
6577 /* Manual call of compile_assert_trypath. */
6578 compile_assert_trypath(common, current->cc, &backtrack, FALSE);
6580 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
6583 static void compile_backtrackpath(compiler_common *common, struct backtrack_common *current)
6585 DEFINE_COMPILER;
6587 while (current)
6589 if (current->nextbacktracks != NULL)
6590 set_jumps(current->nextbacktracks, LABEL());
6591 switch(*current->cc)
6593 case OP_SET_SOM:
6594 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6595 free_stack(common, 1);
6596 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP1, 0);
6597 break;
6599 case OP_STAR:
6600 case OP_MINSTAR:
6601 case OP_PLUS:
6602 case OP_MINPLUS:
6603 case OP_QUERY:
6604 case OP_MINQUERY:
6605 case OP_UPTO:
6606 case OP_MINUPTO:
6607 case OP_EXACT:
6608 case OP_POSSTAR:
6609 case OP_POSPLUS:
6610 case OP_POSQUERY:
6611 case OP_POSUPTO:
6612 case OP_STARI:
6613 case OP_MINSTARI:
6614 case OP_PLUSI:
6615 case OP_MINPLUSI:
6616 case OP_QUERYI:
6617 case OP_MINQUERYI:
6618 case OP_UPTOI:
6619 case OP_MINUPTOI:
6620 case OP_EXACTI:
6621 case OP_POSSTARI:
6622 case OP_POSPLUSI:
6623 case OP_POSQUERYI:
6624 case OP_POSUPTOI:
6625 case OP_NOTSTAR:
6626 case OP_NOTMINSTAR:
6627 case OP_NOTPLUS:
6628 case OP_NOTMINPLUS:
6629 case OP_NOTQUERY:
6630 case OP_NOTMINQUERY:
6631 case OP_NOTUPTO:
6632 case OP_NOTMINUPTO:
6633 case OP_NOTEXACT:
6634 case OP_NOTPOSSTAR:
6635 case OP_NOTPOSPLUS:
6636 case OP_NOTPOSQUERY:
6637 case OP_NOTPOSUPTO:
6638 case OP_NOTSTARI:
6639 case OP_NOTMINSTARI:
6640 case OP_NOTPLUSI:
6641 case OP_NOTMINPLUSI:
6642 case OP_NOTQUERYI:
6643 case OP_NOTMINQUERYI:
6644 case OP_NOTUPTOI:
6645 case OP_NOTMINUPTOI:
6646 case OP_NOTEXACTI:
6647 case OP_NOTPOSSTARI:
6648 case OP_NOTPOSPLUSI:
6649 case OP_NOTPOSQUERYI:
6650 case OP_NOTPOSUPTOI:
6651 case OP_TYPESTAR:
6652 case OP_TYPEMINSTAR:
6653 case OP_TYPEPLUS:
6654 case OP_TYPEMINPLUS:
6655 case OP_TYPEQUERY:
6656 case OP_TYPEMINQUERY:
6657 case OP_TYPEUPTO:
6658 case OP_TYPEMINUPTO:
6659 case OP_TYPEEXACT:
6660 case OP_TYPEPOSSTAR:
6661 case OP_TYPEPOSPLUS:
6662 case OP_TYPEPOSQUERY:
6663 case OP_TYPEPOSUPTO:
6664 case OP_CLASS:
6665 case OP_NCLASS:
6666 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6667 case OP_XCLASS:
6668 #endif
6669 compile_iterator_backtrackpath(common, current);
6670 break;
6672 case OP_REF:
6673 case OP_REFI:
6674 compile_ref_iterator_backtrackpath(common, current);
6675 break;
6677 case OP_RECURSE:
6678 compile_recurse_backtrackpath(common, current);
6679 break;
6681 case OP_ASSERT:
6682 case OP_ASSERT_NOT:
6683 case OP_ASSERTBACK:
6684 case OP_ASSERTBACK_NOT:
6685 compile_assert_backtrackpath(common, current);
6686 break;
6688 case OP_ONCE:
6689 case OP_ONCE_NC:
6690 case OP_BRA:
6691 case OP_CBRA:
6692 case OP_COND:
6693 case OP_SBRA:
6694 case OP_SCBRA:
6695 case OP_SCOND:
6696 compile_bracket_backtrackpath(common, current);
6697 break;
6699 case OP_BRAZERO:
6700 if (current->cc[1] > OP_ASSERTBACK_NOT)
6701 compile_bracket_backtrackpath(common, current);
6702 else
6703 compile_assert_backtrackpath(common, current);
6704 break;
6706 case OP_BRAPOS:
6707 case OP_CBRAPOS:
6708 case OP_SBRAPOS:
6709 case OP_SCBRAPOS:
6710 case OP_BRAPOSZERO:
6711 compile_bracketpos_backtrackpath(common, current);
6712 break;
6714 case OP_BRAMINZERO:
6715 compile_braminzero_backtrackpath(common, current);
6716 break;
6718 case OP_MARK:
6719 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6720 free_stack(common, 1);
6721 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0);
6722 break;
6724 case OP_COMMIT:
6725 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
6726 if (common->leavelabel == NULL)
6727 add_jump(compiler, &common->leave, JUMP(SLJIT_JUMP));
6728 else
6729 JUMPTO(SLJIT_JUMP, common->leavelabel);
6730 break;
6732 case OP_FAIL:
6733 case OP_ACCEPT:
6734 case OP_ASSERT_ACCEPT:
6735 set_jumps(current->topbacktracks, LABEL());
6736 break;
6738 default:
6739 SLJIT_ASSERT_STOP();
6740 break;
6742 current = current->prev;
6746 static SLJIT_INLINE void compile_recurse(compiler_common *common)
6748 DEFINE_COMPILER;
6749 pcre_uchar *cc = common->start + common->currententry->start;
6750 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
6751 pcre_uchar *ccend = bracketend(cc);
6752 int localsize = get_localsize(common, ccbegin, ccend);
6753 int framesize = get_framesize(common, cc, TRUE);
6754 int alternativesize;
6755 BOOL needsframe;
6756 backtrack_common altbacktrack;
6757 struct sljit_label *save_leavelabel = common->leavelabel;
6758 jump_list *save_leave = common->leave;
6759 struct sljit_jump *jump;
6761 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
6762 needsframe = framesize >= 0;
6763 if (!needsframe)
6764 framesize = 0;
6765 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
6767 SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head != 0);
6768 common->currententry->entry = LABEL();
6769 set_jumps(common->currententry->calls, common->currententry->entry);
6771 sljit_emit_fast_enter(compiler, TMP2, 0);
6772 allocate_stack(common, localsize + framesize + alternativesize);
6773 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(localsize + framesize + alternativesize - 1), TMP2, 0);
6774 copy_locals(common, ccbegin, ccend, TRUE, localsize + framesize + alternativesize, framesize + alternativesize);
6775 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head, STACK_TOP, 0);
6776 if (needsframe)
6777 init_frame(common, cc, framesize + alternativesize - 1, alternativesize, TRUE);
6779 if (alternativesize > 0)
6780 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6782 memset(&altbacktrack, 0, sizeof(backtrack_common));
6783 common->leavelabel = NULL;
6784 common->acceptlabel = NULL;
6785 common->leave = NULL;
6786 common->accept = NULL;
6787 altbacktrack.cc = ccbegin;
6788 cc += GET(cc, 1);
6789 while (1)
6791 altbacktrack.top = NULL;
6792 altbacktrack.topbacktracks = NULL;
6794 if (altbacktrack.cc != ccbegin)
6795 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6797 compile_trypath(common, altbacktrack.cc, cc, &altbacktrack);
6798 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6800 common->leavelabel = save_leavelabel;
6801 common->leave = save_leave;
6802 return;
6805 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6807 compile_backtrackpath(common, altbacktrack.top);
6808 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6810 common->leavelabel = save_leavelabel;
6811 common->leave = save_leave;
6812 return;
6814 set_jumps(altbacktrack.topbacktracks, LABEL());
6816 if (*cc != OP_ALT)
6817 break;
6819 altbacktrack.cc = cc + 1 + LINK_SIZE;
6820 cc += GET(cc, 1);
6822 /* None of them matched. */
6823 if (common->leave != NULL)
6824 set_jumps(common->leave, LABEL());
6826 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6827 jump = JUMP(SLJIT_JUMP);
6829 set_jumps(common->accept, LABEL());
6830 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head);
6831 if (needsframe)
6833 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w));
6834 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6835 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w));
6837 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
6839 JUMPHERE(jump);
6840 copy_locals(common, ccbegin, ccend, FALSE, localsize + framesize + alternativesize, framesize + alternativesize);
6841 free_stack(common, localsize + framesize + alternativesize);
6842 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_w));
6843 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
6844 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head, TMP2, 0);
6845 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
6847 common->leavelabel = save_leavelabel;
6848 common->leave = save_leave;
6851 #undef COMPILE_BACKTRACKPATH
6852 #undef CURRENT_AS
6854 void
6855 PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode)
6857 struct sljit_compiler *compiler;
6858 backtrack_common rootbacktrack;
6859 compiler_common common_data;
6860 compiler_common *common = &common_data;
6861 const pcre_uint8 *tables = re->tables;
6862 pcre_study_data *study;
6863 int localsize;
6864 pcre_uchar *ccend;
6865 executable_functions *functions;
6866 void *executable_func;
6867 sljit_uw executable_size;
6868 struct sljit_label *mainloop = NULL;
6869 struct sljit_label *empty_match_found;
6870 struct sljit_label *empty_match_backtrack;
6871 struct sljit_jump *jump;
6872 struct sljit_jump *reqbyte_notfound = NULL;
6873 struct sljit_jump *empty_match;
6875 SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
6876 study = extra->study_data;
6878 if (!tables)
6879 tables = PRIV(default_tables);
6881 memset(&rootbacktrack, 0, sizeof(backtrack_common));
6882 memset(common, 0, sizeof(compiler_common));
6883 rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
6885 common->start = rootbacktrack.cc;
6886 common->fcc = tables + fcc_offset;
6887 common->lcc = (sljit_w)(tables + lcc_offset);
6888 common->mode = mode;
6889 common->nltype = NLTYPE_FIXED;
6890 switch(re->options & PCRE_NEWLINE_BITS)
6892 case 0:
6893 /* Compile-time default */
6894 switch (NEWLINE)
6896 case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
6897 case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
6898 default: common->newline = NEWLINE; break;
6900 break;
6901 case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
6902 case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
6903 case PCRE_NEWLINE_CR+
6904 PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
6905 case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
6906 case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
6907 default: return;
6909 if ((re->options & PCRE_BSR_ANYCRLF) != 0)
6910 common->bsr_nltype = NLTYPE_ANYCRLF;
6911 else if ((re->options & PCRE_BSR_UNICODE) != 0)
6912 common->bsr_nltype = NLTYPE_ANY;
6913 else
6915 #ifdef BSR_ANYCRLF
6916 common->bsr_nltype = NLTYPE_ANYCRLF;
6917 #else
6918 common->bsr_nltype = NLTYPE_ANY;
6919 #endif
6921 common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6922 common->ctypes = (sljit_w)(tables + ctypes_offset);
6923 common->name_table = (sljit_w)((pcre_uchar *)re + re->name_table_offset);
6924 common->name_count = re->name_count;
6925 common->name_entry_size = re->name_entry_size;
6926 common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6927 #ifdef SUPPORT_UTF
6928 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
6929 common->utf = (re->options & PCRE_UTF8) != 0;
6930 #ifdef SUPPORT_UCP
6931 common->use_ucp = (re->options & PCRE_UCP) != 0;
6932 #endif
6933 #endif /* SUPPORT_UTF */
6934 ccend = bracketend(rootbacktrack.cc);
6936 /* Calculate the local space size on the stack. */
6937 common->ovector_start = CALL_LIMIT + sizeof(sljit_w);
6939 SLJIT_ASSERT(*rootbacktrack.cc == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
6940 localsize = get_localspace(common, rootbacktrack.cc, ccend);
6941 if (localsize < 0)
6942 return;
6944 /* Checking flags and updating ovector_start. */
6945 if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
6947 common->req_char_ptr = common->ovector_start;
6948 common->ovector_start += sizeof(sljit_w);
6950 if (mode != JIT_COMPILE)
6952 common->start_used_ptr = common->ovector_start;
6953 common->ovector_start += sizeof(sljit_w);
6954 if (mode == JIT_PARTIAL_SOFT_COMPILE)
6956 common->hit_start = common->ovector_start;
6957 common->ovector_start += sizeof(sljit_w);
6960 if ((re->options & PCRE_FIRSTLINE) != 0)
6962 common->first_line_end = common->ovector_start;
6963 common->ovector_start += sizeof(sljit_w);
6966 /* Aligning ovector to even number of sljit words. */
6967 if ((common->ovector_start & sizeof(sljit_w)) != 0)
6968 common->ovector_start += sizeof(sljit_w);
6970 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
6971 common->cbraptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_w);
6972 localsize += common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w);
6973 if (localsize > SLJIT_MAX_LOCAL_SIZE)
6974 return;
6975 common->localptrs = (int *)SLJIT_MALLOC((ccend - rootbacktrack.cc) * sizeof(int));
6976 if (!common->localptrs)
6977 return;
6978 memset(common->localptrs, 0, (ccend - rootbacktrack.cc) * sizeof(int));
6979 set_localptrs(common, common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w), ccend);
6981 compiler = sljit_create_compiler();
6982 if (!compiler)
6984 SLJIT_FREE(common->localptrs);
6985 return;
6987 common->compiler = compiler;
6989 /* Main pcre_jit_exec entry. */
6990 sljit_emit_enter(compiler, 1, 5, 5, localsize);
6992 /* Register init. */
6993 reset_ovector(common, (re->top_bracket + 1) * 2);
6994 if (common->req_char_ptr != 0)
6995 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, SLJIT_TEMPORARY_REG1, 0);
6997 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_SAVED_REG1, 0);
6998 OP1(SLJIT_MOV, TMP1, 0, SLJIT_SAVED_REG1, 0);
6999 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
7000 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
7001 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
7002 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, calllimit));
7003 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
7004 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
7005 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT, TMP1, 0);
7007 if (mode == JIT_PARTIAL_SOFT_COMPILE)
7008 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
7010 /* Main part of the matching */
7011 if ((re->options & PCRE_ANCHORED) == 0)
7013 mainloop = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
7014 /* Forward search if possible. */
7015 if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
7017 if ((re->flags & PCRE_FIRSTSET) != 0)
7018 fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);
7019 else if ((re->flags & PCRE_STARTLINE) != 0)
7020 fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
7021 else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
7022 fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
7025 if (common->req_char_ptr != 0)
7026 reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
7028 /* Store the current STR_PTR in OVECTOR(0). */
7029 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
7030 /* Copy the limit of allowed recursions. */
7031 OP1(SLJIT_MOV, CALL_COUNT, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT);
7032 if (common->mark_ptr != 0)
7033 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
7034 /* Copy the beginning of the string. */
7035 if (mode == JIT_PARTIAL_SOFT_COMPILE)
7037 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
7038 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
7039 JUMPHERE(jump);
7041 else if (mode == JIT_PARTIAL_HARD_COMPILE)
7042 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
7044 compile_trypath(common, rootbacktrack.cc, ccend, &rootbacktrack);
7045 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7047 sljit_free_compiler(compiler);
7048 SLJIT_FREE(common->localptrs);
7049 return;
7052 empty_match = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
7053 empty_match_found = LABEL();
7055 common->acceptlabel = LABEL();
7056 if (common->accept != NULL)
7057 set_jumps(common->accept, common->acceptlabel);
7059 /* This means we have a match. Update the ovector. */
7060 copy_ovector(common, re->top_bracket + 1);
7061 common->leavelabel = LABEL();
7062 if (common->leave != NULL)
7063 set_jumps(common->leave, common->leavelabel);
7064 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
7066 if (mode != JIT_COMPILE)
7068 common->partialmatchlabel = LABEL();
7069 set_jumps(common->partialmatch, common->partialmatchlabel);
7070 return_with_partial_match(common, common->leavelabel);
7073 empty_match_backtrack = LABEL();
7074 compile_backtrackpath(common, rootbacktrack.top);
7075 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7077 sljit_free_compiler(compiler);
7078 SLJIT_FREE(common->localptrs);
7079 return;
7082 SLJIT_ASSERT(rootbacktrack.prev == NULL);
7084 if (mode == JIT_PARTIAL_SOFT_COMPILE)
7086 /* Update hit_start only in the first time. */
7087 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
7088 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr);
7089 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
7090 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, TMP1, 0);
7091 JUMPHERE(jump);
7094 /* Check we have remaining characters. */
7095 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
7097 if ((re->options & PCRE_ANCHORED) == 0)
7099 if ((re->options & PCRE_FIRSTLINE) == 0)
7101 if (mode == JIT_COMPILE && study != NULL && study->minlength > 1 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
7103 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength + 1));
7104 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_END, 0, mainloop);
7106 else
7107 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
7109 else
7111 SLJIT_ASSERT(common->first_line_end != 0);
7112 if (mode == JIT_COMPILE && study != NULL && study->minlength > 1 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
7114 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength + 1));
7115 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
7116 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER);
7117 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
7118 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_GREATER_EQUAL);
7119 JUMPTO(SLJIT_C_ZERO, mainloop);
7121 else
7122 CMPTO(SLJIT_C_LESS, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, mainloop);
7126 /* No more remaining characters. */
7127 if (reqbyte_notfound != NULL)
7128 JUMPHERE(reqbyte_notfound);
7130 if (mode == JIT_PARTIAL_SOFT_COMPILE)
7131 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0, common->partialmatchlabel);
7133 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
7134 JUMPTO(SLJIT_JUMP, common->leavelabel);
7136 flush_stubs(common);
7138 JUMPHERE(empty_match);
7139 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7140 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
7141 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack);
7142 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
7143 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found);
7144 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
7145 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found);
7146 JUMPTO(SLJIT_JUMP, empty_match_backtrack);
7148 common->currententry = common->entries;
7149 while (common->currententry != NULL)
7151 /* Might add new entries. */
7152 compile_recurse(common);
7153 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7155 sljit_free_compiler(compiler);
7156 SLJIT_FREE(common->localptrs);
7157 return;
7159 flush_stubs(common);
7160 common->currententry = common->currententry->next;
7163 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
7164 /* This is a (really) rare case. */
7165 set_jumps(common->stackalloc, LABEL());
7166 /* RETURN_ADDR is not a saved register. */
7167 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
7168 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
7169 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7170 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
7171 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
7172 OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
7174 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
7175 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
7176 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7177 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
7178 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top));
7179 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit));
7180 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
7181 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
7183 /* Allocation failed. */
7184 JUMPHERE(jump);
7185 /* We break the return address cache here, but this is a really rare case. */
7186 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
7187 JUMPTO(SLJIT_JUMP, common->leavelabel);
7189 /* Call limit reached. */
7190 set_jumps(common->calllimit, LABEL());
7191 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
7192 JUMPTO(SLJIT_JUMP, common->leavelabel);
7194 if (common->revertframes != NULL)
7196 set_jumps(common->revertframes, LABEL());
7197 do_revertframes(common);
7199 if (common->wordboundary != NULL)
7201 set_jumps(common->wordboundary, LABEL());
7202 check_wordboundary(common);
7204 if (common->anynewline != NULL)
7206 set_jumps(common->anynewline, LABEL());
7207 check_anynewline(common);
7209 if (common->hspace != NULL)
7211 set_jumps(common->hspace, LABEL());
7212 check_hspace(common);
7214 if (common->vspace != NULL)
7216 set_jumps(common->vspace, LABEL());
7217 check_vspace(common);
7219 if (common->casefulcmp != NULL)
7221 set_jumps(common->casefulcmp, LABEL());
7222 do_casefulcmp(common);
7224 if (common->caselesscmp != NULL)
7226 set_jumps(common->caselesscmp, LABEL());
7227 do_caselesscmp(common);
7229 #ifdef SUPPORT_UTF
7230 if (common->utfreadchar != NULL)
7232 set_jumps(common->utfreadchar, LABEL());
7233 do_utfreadchar(common);
7235 #ifdef COMPILE_PCRE8
7236 if (common->utfreadtype8 != NULL)
7238 set_jumps(common->utfreadtype8, LABEL());
7239 do_utfreadtype8(common);
7241 #endif
7242 #endif /* COMPILE_PCRE8 */
7243 #ifdef SUPPORT_UCP
7244 if (common->getunichartype != NULL)
7246 set_jumps(common->getunichartype, LABEL());
7247 do_getunichartype(common);
7249 if (common->getunichartype_2 != NULL)
7251 set_jumps(common->getunichartype_2, LABEL());
7252 do_getunichartype_2(common);
7254 if (common->getunicharscript != NULL)
7256 set_jumps(common->getunicharscript, LABEL());
7257 do_getunicharscript(common);
7259 #endif
7261 SLJIT_FREE(common->localptrs);
7262 executable_func = sljit_generate_code(compiler);
7263 executable_size = sljit_get_generated_code_size(compiler);
7264 sljit_free_compiler(compiler);
7265 if (executable_func == NULL)
7266 return;
7268 /* Reuse the function descriptor if possible. */
7269 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL)
7270 functions = (executable_functions *)extra->executable_jit;
7271 else
7273 functions = SLJIT_MALLOC(sizeof(executable_functions));
7274 if (functions == NULL)
7276 /* This case is highly unlikely since we just recently
7277 freed a lot of memory. Although not impossible. */
7278 sljit_free_code(executable_func);
7279 return;
7281 memset(functions, 0, sizeof(executable_functions));
7282 extra->executable_jit = functions;
7283 extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
7286 functions->executable_funcs[mode] = executable_func;
7287 functions->executable_sizes[mode] = executable_size;
7290 static int jit_machine_stack_exec(jit_arguments *arguments, void* executable_func)
7292 union {
7293 void* executable_func;
7294 jit_function call_executable_func;
7295 } convert_executable_func;
7296 pcre_uint8 local_area[LOCAL_SPACE_SIZE];
7297 struct sljit_stack local_stack;
7299 local_stack.top = (sljit_w)&local_area;
7300 local_stack.base = local_stack.top;
7301 local_stack.limit = local_stack.base + LOCAL_SPACE_SIZE;
7302 local_stack.max_limit = local_stack.limit;
7303 arguments->stack = &local_stack;
7304 convert_executable_func.executable_func = executable_func;
7305 return convert_executable_func.call_executable_func(arguments);
7309 PRIV(jit_exec)(const REAL_PCRE *re, const PUBL(extra) *extra_data, const pcre_uchar *subject,
7310 int length, int start_offset, int options, int *offsets, int offsetcount)
7312 executable_functions *functions = (executable_functions *)extra_data->executable_jit;
7313 union {
7314 void* executable_func;
7315 jit_function call_executable_func;
7316 } convert_executable_func;
7317 jit_arguments arguments;
7318 int maxoffsetcount;
7319 int retval;
7320 int mode = JIT_COMPILE;
7322 if ((options & PCRE_PARTIAL_HARD) != 0)
7323 mode = JIT_PARTIAL_HARD_COMPILE;
7324 else if ((options & PCRE_PARTIAL_SOFT) != 0)
7325 mode = JIT_PARTIAL_SOFT_COMPILE;
7327 if (functions->executable_funcs[mode] == NULL)
7328 return PCRE_ERROR_NULL;
7330 /* Sanity checks should be handled by pcre_exec. */
7331 arguments.stack = NULL;
7332 arguments.str = subject + start_offset;
7333 arguments.begin = subject;
7334 arguments.end = subject + length;
7335 arguments.mark_ptr = NULL;
7336 /* JIT decreases this value less frequently than the interpreter. */
7337 arguments.calllimit = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : extra_data->match_limit;
7338 arguments.notbol = (options & PCRE_NOTBOL) != 0;
7339 arguments.noteol = (options & PCRE_NOTEOL) != 0;
7340 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
7341 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
7342 arguments.offsets = offsets;
7344 /* pcre_exec() rounds offsetcount to a multiple of 3, and then uses only 2/3 of
7345 the output vector for storing captured strings, with the remainder used as
7346 workspace. We don't need the workspace here. For compatibility, we limit the
7347 number of captured strings in the same way as pcre_exec(), so that the user
7348 gets the same result with and without JIT. */
7350 if (offsetcount != 2)
7351 offsetcount = ((offsetcount - (offsetcount % 3)) * 2) / 3;
7352 maxoffsetcount = (re->top_bracket + 1) * 2;
7353 if (offsetcount > maxoffsetcount)
7354 offsetcount = maxoffsetcount;
7355 arguments.offsetcount = offsetcount;
7357 if (functions->callback)
7358 arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata);
7359 else
7360 arguments.stack = (struct sljit_stack *)functions->userdata;
7362 if (arguments.stack == NULL)
7363 retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]);
7364 else
7366 convert_executable_func.executable_func = functions->executable_funcs[mode];
7367 retval = convert_executable_func.call_executable_func(&arguments);
7370 if (retval * 2 > offsetcount)
7371 retval = 0;
7372 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
7373 *(extra_data->mark) = arguments.mark_ptr;
7375 return retval;
7378 void
7379 PRIV(jit_free)(void *executable_funcs)
7381 int i;
7382 executable_functions *functions = (executable_functions *)executable_funcs;
7383 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
7385 if (functions->executable_funcs[i] != NULL)
7386 sljit_free_code(functions->executable_funcs[i]);
7388 SLJIT_FREE(functions);
7392 PRIV(jit_get_size)(void *executable_funcs)
7394 int i;
7395 sljit_uw size = 0;
7396 sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes;
7397 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
7398 size += executable_sizes[i];
7399 return (int)size;
7402 const char*
7403 PRIV(jit_get_target)(void)
7405 return sljit_get_platform_name();
7408 #ifdef COMPILE_PCRE8
7409 PCRE_EXP_DECL pcre_jit_stack *
7410 pcre_jit_stack_alloc(int startsize, int maxsize)
7411 #else
7412 PCRE_EXP_DECL pcre16_jit_stack *
7413 pcre16_jit_stack_alloc(int startsize, int maxsize)
7414 #endif
7416 if (startsize < 1 || maxsize < 1)
7417 return NULL;
7418 if (startsize > maxsize)
7419 startsize = maxsize;
7420 startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
7421 maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
7422 return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize);
7425 #ifdef COMPILE_PCRE8
7426 PCRE_EXP_DECL void
7427 pcre_jit_stack_free(pcre_jit_stack *stack)
7428 #else
7429 PCRE_EXP_DECL void
7430 pcre16_jit_stack_free(pcre16_jit_stack *stack)
7431 #endif
7433 sljit_free_stack((struct sljit_stack *)stack);
7436 #ifdef COMPILE_PCRE8
7437 PCRE_EXP_DECL void
7438 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
7439 #else
7440 PCRE_EXP_DECL void
7441 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
7442 #endif
7444 executable_functions *functions;
7445 if (extra != NULL &&
7446 (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
7447 extra->executable_jit != NULL)
7449 functions = (executable_functions *)extra->executable_jit;
7450 functions->callback = callback;
7451 functions->userdata = userdata;
7455 #else /* SUPPORT_JIT */
7457 /* These are dummy functions to avoid linking errors when JIT support is not
7458 being compiled. */
7460 #ifdef COMPILE_PCRE8
7461 PCRE_EXP_DECL pcre_jit_stack *
7462 pcre_jit_stack_alloc(int startsize, int maxsize)
7463 #else
7464 PCRE_EXP_DECL pcre16_jit_stack *
7465 pcre16_jit_stack_alloc(int startsize, int maxsize)
7466 #endif
7468 (void)startsize;
7469 (void)maxsize;
7470 return NULL;
7473 #ifdef COMPILE_PCRE8
7474 PCRE_EXP_DECL void
7475 pcre_jit_stack_free(pcre_jit_stack *stack)
7476 #else
7477 PCRE_EXP_DECL void
7478 pcre16_jit_stack_free(pcre16_jit_stack *stack)
7479 #endif
7481 (void)stack;
7484 #ifdef COMPILE_PCRE8
7485 PCRE_EXP_DECL void
7486 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
7487 #else
7488 PCRE_EXP_DECL void
7489 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
7490 #endif
7492 (void)extra;
7493 (void)callback;
7494 (void)userdata;
7497 #endif
7499 /* End of pcre_jit_compile.c */