Merge branch '976-disable-assert-checks' into 'master'
[glib.git] / glib / pcre / pcre_jit_compile.c
blobd7233832f1ca65a0adcece1c69a69b57baff5643
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
43 #include "config.h"
45 #include "pcre_internal.h"
47 #ifdef SUPPORT_JIT
49 /* All-in-one: Since we use the JIT compiler only from here,
50 we just include it. This way we don't need to touch the build
51 system files. */
53 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
54 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
55 #define SLJIT_CONFIG_AUTO 1
56 #define SLJIT_CONFIG_STATIC 1
57 #define SLJIT_VERBOSE 0
58 #define SLJIT_DEBUG 0
60 #include "sljit/sljitLir.c"
62 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
63 #error Unsupported architecture
64 #endif
66 /* Allocate memory on the stack. Fast, but limited size. */
67 #define LOCAL_SPACE_SIZE 32768
69 #define STACK_GROWTH_RATE 8192
71 /* Enable to check that the allocation could destroy temporaries. */
72 #if defined SLJIT_DEBUG && SLJIT_DEBUG
73 #define DESTROY_REGISTERS 1
74 #endif
77 Short summary about the backtracking mechanism empolyed by the jit code generator:
79 The code generator follows the recursive nature of the PERL compatible regular
80 expressions. The basic blocks of regular expressions are condition checkers
81 whose execute different commands depending on the result of the condition check.
82 The relationship between the operators can be horizontal (concatenation) and
83 vertical (sub-expression) (See struct backtrack_common for more details).
85 'ab' - 'a' and 'b' regexps are concatenated
86 'a+' - 'a' is the sub-expression of the '+' operator
88 The condition checkers are boolean (true/false) checkers. Machine code is generated
89 for the checker itself and for the actions depending on the result of the checker.
90 The 'true' case is called as the try path (expected path), and the other is called as
91 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
92 branches on the try path.
94 Greedy star operator (*) :
95 Try path: match happens.
96 Backtrack path: match failed.
97 Non-greedy star operator (*?) :
98 Try path: no need to perform a match.
99 Backtrack path: match is required.
101 The following example shows how the code generated for a capturing bracket
102 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
103 we have the following regular expression:
105 A(B|C)D
107 The generated code will be the following:
109 A try path
110 '(' try path (pushing arguments to the stack)
111 B try path
112 ')' try path (pushing arguments to the stack)
113 D try path
114 return with successful match
116 D backtrack path
117 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
118 B backtrack path
119 C expected path
120 jump to D try path
121 C backtrack path
122 A backtrack path
124 Notice, that the order of backtrack code paths are the opposite of the fast
125 code paths. In this way the topmost value on the stack is always belong
126 to the current backtrack code path. The backtrack path must check
127 whether there is a next alternative. If so, it needs to jump back to
128 the try path eventually. Otherwise it needs to clear out its own stack
129 frame and continue the execution on the backtrack code paths.
133 Saved stack frames:
135 Atomic blocks and asserts require reloading the values of local variables
136 when the backtrack mechanism performed. Because of OP_RECURSE, the locals
137 are not necessarly known in compile time, thus we need a dynamic restore
138 mechanism.
140 The stack frames are stored in a chain list, and have the following format:
141 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
143 Thus we can restore the locals to a particular point in the stack.
146 typedef struct jit_arguments {
147 /* Pointers first. */
148 struct sljit_stack *stack;
149 const pcre_uchar *str;
150 const pcre_uchar *begin;
151 const pcre_uchar *end;
152 int *offsets;
153 pcre_uchar *uchar_ptr;
154 pcre_uchar *mark_ptr;
155 /* Everything else after. */
156 int offsetcount;
157 int calllimit;
158 pcre_uint8 notbol;
159 pcre_uint8 noteol;
160 pcre_uint8 notempty;
161 pcre_uint8 notempty_atstart;
162 } jit_arguments;
164 typedef struct executable_functions {
165 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
166 PUBL(jit_callback) callback;
167 void *userdata;
168 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
169 } executable_functions;
171 typedef struct jump_list {
172 struct sljit_jump *jump;
173 struct jump_list *next;
174 } jump_list;
176 enum stub_types { stack_alloc };
178 typedef struct stub_list {
179 enum stub_types type;
180 int data;
181 struct sljit_jump *start;
182 struct sljit_label *leave;
183 struct stub_list *next;
184 } stub_list;
186 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
188 /* The following structure is the key data type for the recursive
189 code generator. It is allocated by compile_trypath, and contains
190 the aguments for compile_backtrackpath. Must be the first member
191 of its descendants. */
192 typedef struct backtrack_common {
193 /* Concatenation stack. */
194 struct backtrack_common *prev;
195 jump_list *nextbacktracks;
196 /* Internal stack (for component operators). */
197 struct backtrack_common *top;
198 jump_list *topbacktracks;
199 /* Opcode pointer. */
200 pcre_uchar *cc;
201 } backtrack_common;
203 typedef struct assert_backtrack {
204 backtrack_common common;
205 jump_list *condfailed;
206 /* Less than 0 (-1) if a frame is not needed. */
207 int framesize;
208 /* Points to our private memory word on the stack. */
209 int localptr;
210 /* For iterators. */
211 struct sljit_label *trypath;
212 } assert_backtrack;
214 typedef struct bracket_backtrack {
215 backtrack_common common;
216 /* Where to coninue if an alternative is successfully matched. */
217 struct sljit_label *alttrypath;
218 /* For rmin and rmax iterators. */
219 struct sljit_label *recursivetrypath;
220 /* For greedy ? operator. */
221 struct sljit_label *zerotrypath;
222 /* Contains the branches of a failed condition. */
223 union {
224 /* Both for OP_COND, OP_SCOND. */
225 jump_list *condfailed;
226 assert_backtrack *assert;
227 /* For OP_ONCE. -1 if not needed. */
228 int framesize;
229 } u;
230 /* Points to our private memory word on the stack. */
231 int localptr;
232 } bracket_backtrack;
234 typedef struct bracketpos_backtrack {
235 backtrack_common common;
236 /* Points to our private memory word on the stack. */
237 int localptr;
238 /* Reverting stack is needed. */
239 int framesize;
240 /* Allocated stack size. */
241 int stacksize;
242 } bracketpos_backtrack;
244 typedef struct braminzero_backtrack {
245 backtrack_common common;
246 struct sljit_label *trypath;
247 } braminzero_backtrack;
249 typedef struct iterator_backtrack {
250 backtrack_common common;
251 /* Next iteration. */
252 struct sljit_label *trypath;
253 } iterator_backtrack;
255 typedef struct recurse_entry {
256 struct recurse_entry *next;
257 /* Contains the function entry. */
258 struct sljit_label *entry;
259 /* Collects the calls until the function is not created. */
260 jump_list *calls;
261 /* Points to the starting opcode. */
262 int start;
263 } recurse_entry;
265 typedef struct recurse_backtrack {
266 backtrack_common common;
267 } recurse_backtrack;
269 typedef struct compiler_common {
270 struct sljit_compiler *compiler;
271 pcre_uchar *start;
273 /* Opcode local area direct map. */
274 int *localptrs;
275 int cbraptr;
276 /* OVector starting point. Must be divisible by 2. */
277 int ovector_start;
278 /* Last known position of the requested byte. */
279 int req_char_ptr;
280 /* Head of the last recursion. */
281 int recursive_head;
282 /* First inspected character for partial matching. */
283 int start_used_ptr;
284 /* Starting pointer for partial soft matches. */
285 int hit_start;
286 /* End pointer of the first line. */
287 int first_line_end;
288 /* Points to the marked string. */
289 int mark_ptr;
291 /* Other */
292 const pcre_uint8 *fcc;
293 sljit_w lcc;
294 int mode;
295 int nltype;
296 int newline;
297 int bsr_nltype;
298 int endonly;
299 BOOL has_set_som;
300 sljit_w ctypes;
301 sljit_uw name_table;
302 sljit_w name_count;
303 sljit_w name_entry_size;
305 /* Labels and jump lists. */
306 struct sljit_label *partialmatchlabel;
307 struct sljit_label *leavelabel;
308 struct sljit_label *acceptlabel;
309 stub_list *stubs;
310 recurse_entry *entries;
311 recurse_entry *currententry;
312 jump_list *partialmatch;
313 jump_list *leave;
314 jump_list *accept;
315 jump_list *calllimit;
316 jump_list *stackalloc;
317 jump_list *revertframes;
318 jump_list *wordboundary;
319 jump_list *anynewline;
320 jump_list *hspace;
321 jump_list *vspace;
322 jump_list *casefulcmp;
323 jump_list *caselesscmp;
324 BOOL jscript_compat;
325 #ifdef SUPPORT_UTF
326 BOOL utf;
327 #ifdef SUPPORT_UCP
328 BOOL use_ucp;
329 #endif
330 jump_list *utfreadchar;
331 #ifdef COMPILE_PCRE8
332 jump_list *utfreadtype8;
333 #endif
334 #endif /* SUPPORT_UTF */
335 #ifdef SUPPORT_UCP
336 jump_list *getunichartype;
337 jump_list *getunichartype_2;
338 jump_list *getunicharscript;
339 #endif
340 } compiler_common;
342 /* For byte_sequence_compare. */
344 typedef struct compare_context {
345 int length;
346 int sourcereg;
347 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
348 int ucharptr;
349 union {
350 sljit_i asint;
351 sljit_uh asushort;
352 #ifdef COMPILE_PCRE8
353 sljit_ub asbyte;
354 sljit_ub asuchars[4];
355 #else
356 #ifdef COMPILE_PCRE16
357 sljit_uh asuchars[2];
358 #endif
359 #endif
360 } c;
361 union {
362 sljit_i asint;
363 sljit_uh asushort;
364 #ifdef COMPILE_PCRE8
365 sljit_ub asbyte;
366 sljit_ub asuchars[4];
367 #else
368 #ifdef COMPILE_PCRE16
369 sljit_uh asuchars[2];
370 #endif
371 #endif
372 } oc;
373 #endif
374 } compare_context;
376 enum {
377 frame_end = 0,
378 frame_setstrbegin = -1,
379 frame_setmark = -2
382 /* Undefine sljit macros. */
383 #undef CMP
385 /* Used for accessing the elements of the stack. */
386 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
388 #define TMP1 SLJIT_TEMPORARY_REG1
389 #define TMP2 SLJIT_TEMPORARY_REG3
390 #define TMP3 SLJIT_TEMPORARY_EREG2
391 #define STR_PTR SLJIT_SAVED_REG1
392 #define STR_END SLJIT_SAVED_REG2
393 #define STACK_TOP SLJIT_TEMPORARY_REG2
394 #define STACK_LIMIT SLJIT_SAVED_REG3
395 #define ARGUMENTS SLJIT_SAVED_EREG1
396 #define CALL_COUNT SLJIT_SAVED_EREG2
397 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
399 /* Locals layout. */
400 /* These two locals can be used by the current opcode. */
401 #define LOCALS0 (0 * sizeof(sljit_w))
402 #define LOCALS1 (1 * sizeof(sljit_w))
403 /* Two local variables for possessive quantifiers (char1 cannot use them). */
404 #define POSSESSIVE0 (2 * sizeof(sljit_w))
405 #define POSSESSIVE1 (3 * sizeof(sljit_w))
406 /* Max limit of recursions. */
407 #define CALL_LIMIT (4 * sizeof(sljit_w))
408 /* The output vector is stored on the stack, and contains pointers
409 to characters. The vector data is divided into two groups: the first
410 group contains the start / end character pointers, and the second is
411 the start pointers when the end of the capturing group has not yet reached. */
412 #define OVECTOR_START (common->ovector_start)
413 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
414 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
415 #define PRIV_DATA(cc) (common->localptrs[(cc) - common->start])
417 #ifdef COMPILE_PCRE8
418 #define MOV_UCHAR SLJIT_MOV_UB
419 #define MOVU_UCHAR SLJIT_MOVU_UB
420 #else
421 #ifdef COMPILE_PCRE16
422 #define MOV_UCHAR SLJIT_MOV_UH
423 #define MOVU_UCHAR SLJIT_MOVU_UH
424 #else
425 #error Unsupported compiling mode
426 #endif
427 #endif
429 /* Shortcuts. */
430 #define DEFINE_COMPILER \
431 struct sljit_compiler *compiler = common->compiler
432 #define OP1(op, dst, dstw, src, srcw) \
433 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
434 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
435 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
436 #define LABEL() \
437 sljit_emit_label(compiler)
438 #define JUMP(type) \
439 sljit_emit_jump(compiler, (type))
440 #define JUMPTO(type, label) \
441 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
442 #define JUMPHERE(jump) \
443 sljit_set_label((jump), sljit_emit_label(compiler))
444 #define CMP(type, src1, src1w, src2, src2w) \
445 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
446 #define CMPTO(type, src1, src1w, src2, src2w, label) \
447 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
448 #define COND_VALUE(op, dst, dstw, type) \
449 sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
450 #define GET_LOCAL_BASE(dst, dstw, offset) \
451 sljit_get_local_base(compiler, (dst), (dstw), (offset))
453 static pcre_uchar* bracketend(pcre_uchar* cc)
455 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
456 do cc += GET(cc, 1); while (*cc == OP_ALT);
457 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
458 cc += 1 + LINK_SIZE;
459 return cc;
462 /* Functions whose might need modification for all new supported opcodes:
463 next_opcode
464 get_localspace
465 set_localptrs
466 get_framesize
467 init_frame
468 get_localsize
469 copy_locals
470 compile_trypath
471 compile_backtrackpath
474 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
476 SLJIT_UNUSED_ARG(common);
477 switch(*cc)
479 case OP_SOD:
480 case OP_SOM:
481 case OP_SET_SOM:
482 case OP_NOT_WORD_BOUNDARY:
483 case OP_WORD_BOUNDARY:
484 case OP_NOT_DIGIT:
485 case OP_DIGIT:
486 case OP_NOT_WHITESPACE:
487 case OP_WHITESPACE:
488 case OP_NOT_WORDCHAR:
489 case OP_WORDCHAR:
490 case OP_ANY:
491 case OP_ALLANY:
492 case OP_ANYNL:
493 case OP_NOT_HSPACE:
494 case OP_HSPACE:
495 case OP_NOT_VSPACE:
496 case OP_VSPACE:
497 case OP_EXTUNI:
498 case OP_EODN:
499 case OP_EOD:
500 case OP_CIRC:
501 case OP_CIRCM:
502 case OP_DOLL:
503 case OP_DOLLM:
504 case OP_TYPESTAR:
505 case OP_TYPEMINSTAR:
506 case OP_TYPEPLUS:
507 case OP_TYPEMINPLUS:
508 case OP_TYPEQUERY:
509 case OP_TYPEMINQUERY:
510 case OP_TYPEPOSSTAR:
511 case OP_TYPEPOSPLUS:
512 case OP_TYPEPOSQUERY:
513 case OP_CRSTAR:
514 case OP_CRMINSTAR:
515 case OP_CRPLUS:
516 case OP_CRMINPLUS:
517 case OP_CRQUERY:
518 case OP_CRMINQUERY:
519 case OP_DEF:
520 case OP_BRAZERO:
521 case OP_BRAMINZERO:
522 case OP_BRAPOSZERO:
523 case OP_COMMIT:
524 case OP_FAIL:
525 case OP_ACCEPT:
526 case OP_ASSERT_ACCEPT:
527 case OP_SKIPZERO:
528 return cc + 1;
530 case OP_ANYBYTE:
531 #ifdef SUPPORT_UTF
532 if (common->utf) return NULL;
533 #endif
534 return cc + 1;
536 case OP_CHAR:
537 case OP_CHARI:
538 case OP_NOT:
539 case OP_NOTI:
540 case OP_STAR:
541 case OP_MINSTAR:
542 case OP_PLUS:
543 case OP_MINPLUS:
544 case OP_QUERY:
545 case OP_MINQUERY:
546 case OP_POSSTAR:
547 case OP_POSPLUS:
548 case OP_POSQUERY:
549 case OP_STARI:
550 case OP_MINSTARI:
551 case OP_PLUSI:
552 case OP_MINPLUSI:
553 case OP_QUERYI:
554 case OP_MINQUERYI:
555 case OP_POSSTARI:
556 case OP_POSPLUSI:
557 case OP_POSQUERYI:
558 case OP_NOTSTAR:
559 case OP_NOTMINSTAR:
560 case OP_NOTPLUS:
561 case OP_NOTMINPLUS:
562 case OP_NOTQUERY:
563 case OP_NOTMINQUERY:
564 case OP_NOTPOSSTAR:
565 case OP_NOTPOSPLUS:
566 case OP_NOTPOSQUERY:
567 case OP_NOTSTARI:
568 case OP_NOTMINSTARI:
569 case OP_NOTPLUSI:
570 case OP_NOTMINPLUSI:
571 case OP_NOTQUERYI:
572 case OP_NOTMINQUERYI:
573 case OP_NOTPOSSTARI:
574 case OP_NOTPOSPLUSI:
575 case OP_NOTPOSQUERYI:
576 cc += 2;
577 #ifdef SUPPORT_UTF
578 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
579 #endif
580 return cc;
582 case OP_UPTO:
583 case OP_MINUPTO:
584 case OP_EXACT:
585 case OP_POSUPTO:
586 case OP_UPTOI:
587 case OP_MINUPTOI:
588 case OP_EXACTI:
589 case OP_POSUPTOI:
590 case OP_NOTUPTO:
591 case OP_NOTMINUPTO:
592 case OP_NOTEXACT:
593 case OP_NOTPOSUPTO:
594 case OP_NOTUPTOI:
595 case OP_NOTMINUPTOI:
596 case OP_NOTEXACTI:
597 case OP_NOTPOSUPTOI:
598 cc += 2 + IMM2_SIZE;
599 #ifdef SUPPORT_UTF
600 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
601 #endif
602 return cc;
604 case OP_NOTPROP:
605 case OP_PROP:
606 return cc + 1 + 2;
608 case OP_TYPEUPTO:
609 case OP_TYPEMINUPTO:
610 case OP_TYPEEXACT:
611 case OP_TYPEPOSUPTO:
612 case OP_REF:
613 case OP_REFI:
614 case OP_CREF:
615 case OP_NCREF:
616 case OP_RREF:
617 case OP_NRREF:
618 case OP_CLOSE:
619 cc += 1 + IMM2_SIZE;
620 return cc;
622 case OP_CRRANGE:
623 case OP_CRMINRANGE:
624 return cc + 1 + 2 * IMM2_SIZE;
626 case OP_CLASS:
627 case OP_NCLASS:
628 return cc + 1 + 32 / sizeof(pcre_uchar);
630 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
631 case OP_XCLASS:
632 return cc + GET(cc, 1);
633 #endif
635 case OP_RECURSE:
636 case OP_ASSERT:
637 case OP_ASSERT_NOT:
638 case OP_ASSERTBACK:
639 case OP_ASSERTBACK_NOT:
640 case OP_REVERSE:
641 case OP_ONCE:
642 case OP_ONCE_NC:
643 case OP_BRA:
644 case OP_BRAPOS:
645 case OP_COND:
646 case OP_SBRA:
647 case OP_SBRAPOS:
648 case OP_SCOND:
649 case OP_ALT:
650 case OP_KET:
651 case OP_KETRMAX:
652 case OP_KETRMIN:
653 case OP_KETRPOS:
654 return cc + 1 + LINK_SIZE;
656 case OP_CBRA:
657 case OP_CBRAPOS:
658 case OP_SCBRA:
659 case OP_SCBRAPOS:
660 return cc + 1 + LINK_SIZE + IMM2_SIZE;
662 case OP_MARK:
663 return cc + 1 + 2 + cc[1];
665 default:
666 return NULL;
670 static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
672 int localspace = 0;
673 pcre_uchar *alternative;
674 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
675 while (cc < ccend)
677 switch(*cc)
679 case OP_SET_SOM:
680 common->has_set_som = TRUE;
681 cc += 1;
682 break;
684 case OP_ASSERT:
685 case OP_ASSERT_NOT:
686 case OP_ASSERTBACK:
687 case OP_ASSERTBACK_NOT:
688 case OP_ONCE:
689 case OP_ONCE_NC:
690 case OP_BRAPOS:
691 case OP_SBRA:
692 case OP_SBRAPOS:
693 case OP_SCOND:
694 localspace += sizeof(sljit_w);
695 cc += 1 + LINK_SIZE;
696 break;
698 case OP_CBRAPOS:
699 case OP_SCBRAPOS:
700 localspace += sizeof(sljit_w);
701 cc += 1 + LINK_SIZE + IMM2_SIZE;
702 break;
704 case OP_COND:
705 /* Might be a hidden SCOND. */
706 alternative = cc + GET(cc, 1);
707 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
708 localspace += sizeof(sljit_w);
709 cc += 1 + LINK_SIZE;
710 break;
712 case OP_RECURSE:
713 /* Set its value only once. */
714 if (common->recursive_head == 0)
716 common->recursive_head = common->ovector_start;
717 common->ovector_start += sizeof(sljit_w);
719 cc += 1 + LINK_SIZE;
720 break;
722 case OP_MARK:
723 if (common->mark_ptr == 0)
725 common->mark_ptr = common->ovector_start;
726 common->ovector_start += sizeof(sljit_w);
728 cc += 1 + 2 + cc[1];
729 break;
731 default:
732 cc = next_opcode(common, cc);
733 if (cc == NULL)
734 return -1;
735 break;
738 return localspace;
741 static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend)
743 pcre_uchar *cc = common->start;
744 pcre_uchar *alternative;
745 while (cc < ccend)
747 switch(*cc)
749 case OP_ASSERT:
750 case OP_ASSERT_NOT:
751 case OP_ASSERTBACK:
752 case OP_ASSERTBACK_NOT:
753 case OP_ONCE:
754 case OP_ONCE_NC:
755 case OP_BRAPOS:
756 case OP_SBRA:
757 case OP_SBRAPOS:
758 case OP_SCOND:
759 common->localptrs[cc - common->start] = localptr;
760 localptr += sizeof(sljit_w);
761 cc += 1 + LINK_SIZE;
762 break;
764 case OP_CBRAPOS:
765 case OP_SCBRAPOS:
766 common->localptrs[cc - common->start] = localptr;
767 localptr += sizeof(sljit_w);
768 cc += 1 + LINK_SIZE + IMM2_SIZE;
769 break;
771 case OP_COND:
772 /* Might be a hidden SCOND. */
773 alternative = cc + GET(cc, 1);
774 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
776 common->localptrs[cc - common->start] = localptr;
777 localptr += sizeof(sljit_w);
779 cc += 1 + LINK_SIZE;
780 break;
782 default:
783 cc = next_opcode(common, cc);
784 SLJIT_ASSERT(cc != NULL);
785 break;
790 /* Returns with -1 if no need for frame. */
791 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
793 pcre_uchar *ccend = bracketend(cc);
794 int length = 0;
795 BOOL possessive = FALSE;
796 BOOL setsom_found = recursive;
797 BOOL setmark_found = recursive;
799 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
801 length = 3;
802 possessive = TRUE;
805 cc = next_opcode(common, cc);
806 SLJIT_ASSERT(cc != NULL);
807 while (cc < ccend)
808 switch(*cc)
810 case OP_SET_SOM:
811 SLJIT_ASSERT(common->has_set_som);
812 if (!setsom_found)
814 length += 2;
815 setsom_found = TRUE;
817 cc += 1;
818 break;
820 case OP_MARK:
821 SLJIT_ASSERT(common->mark_ptr != 0);
822 if (!setmark_found)
824 length += 2;
825 setmark_found = TRUE;
827 cc += 1 + 2 + cc[1];
828 break;
830 case OP_RECURSE:
831 if (common->has_set_som && !setsom_found)
833 length += 2;
834 setsom_found = TRUE;
836 if (common->mark_ptr != 0 && !setmark_found)
838 length += 2;
839 setmark_found = TRUE;
841 cc += 1 + LINK_SIZE;
842 break;
844 case OP_CBRA:
845 case OP_CBRAPOS:
846 case OP_SCBRA:
847 case OP_SCBRAPOS:
848 length += 3;
849 cc += 1 + LINK_SIZE + IMM2_SIZE;
850 break;
852 default:
853 cc = next_opcode(common, cc);
854 SLJIT_ASSERT(cc != NULL);
855 break;
858 /* Possessive quantifiers can use a special case. */
859 if (SLJIT_UNLIKELY(possessive) && length == 3)
860 return -1;
862 if (length > 0)
863 return length + 1;
864 return -1;
867 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
869 DEFINE_COMPILER;
870 pcre_uchar *ccend = bracketend(cc);
871 BOOL setsom_found = recursive;
872 BOOL setmark_found = recursive;
873 int offset;
875 /* >= 1 + shortest item size (2) */
876 SLJIT_UNUSED_ARG(stacktop);
877 SLJIT_ASSERT(stackpos >= stacktop + 2);
879 stackpos = STACK(stackpos);
880 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
881 cc = next_opcode(common, cc);
882 SLJIT_ASSERT(cc != NULL);
883 while (cc < ccend)
884 switch(*cc)
886 case OP_SET_SOM:
887 SLJIT_ASSERT(common->has_set_som);
888 if (!setsom_found)
890 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
891 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
892 stackpos += (int)sizeof(sljit_w);
893 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
894 stackpos += (int)sizeof(sljit_w);
895 setsom_found = TRUE;
897 cc += 1;
898 break;
900 case OP_MARK:
901 SLJIT_ASSERT(common->mark_ptr != 0);
902 if (!setmark_found)
904 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
905 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
906 stackpos += (int)sizeof(sljit_w);
907 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
908 stackpos += (int)sizeof(sljit_w);
909 setmark_found = TRUE;
911 cc += 1 + 2 + cc[1];
912 break;
914 case OP_RECURSE:
915 if (common->has_set_som && !setsom_found)
917 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
918 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
919 stackpos += (int)sizeof(sljit_w);
920 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
921 stackpos += (int)sizeof(sljit_w);
922 setsom_found = TRUE;
924 if (common->mark_ptr != 0 && !setmark_found)
926 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
927 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
928 stackpos += (int)sizeof(sljit_w);
929 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
930 stackpos += (int)sizeof(sljit_w);
931 setmark_found = TRUE;
933 cc += 1 + LINK_SIZE;
934 break;
936 case OP_CBRA:
937 case OP_CBRAPOS:
938 case OP_SCBRA:
939 case OP_SCBRAPOS:
940 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
941 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
942 stackpos += (int)sizeof(sljit_w);
943 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
944 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
945 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
946 stackpos += (int)sizeof(sljit_w);
947 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
948 stackpos += (int)sizeof(sljit_w);
950 cc += 1 + LINK_SIZE + IMM2_SIZE;
951 break;
953 default:
954 cc = next_opcode(common, cc);
955 SLJIT_ASSERT(cc != NULL);
956 break;
959 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
960 SLJIT_ASSERT(stackpos == STACK(stacktop));
963 static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
965 int localsize = 2;
966 pcre_uchar *alternative;
967 /* Calculate the sum of the local variables. */
968 while (cc < ccend)
970 switch(*cc)
972 case OP_ASSERT:
973 case OP_ASSERT_NOT:
974 case OP_ASSERTBACK:
975 case OP_ASSERTBACK_NOT:
976 case OP_ONCE:
977 case OP_ONCE_NC:
978 case OP_BRAPOS:
979 case OP_SBRA:
980 case OP_SBRAPOS:
981 case OP_SCOND:
982 localsize++;
983 cc += 1 + LINK_SIZE;
984 break;
986 case OP_CBRA:
987 case OP_SCBRA:
988 localsize++;
989 cc += 1 + LINK_SIZE + IMM2_SIZE;
990 break;
992 case OP_CBRAPOS:
993 case OP_SCBRAPOS:
994 localsize += 2;
995 cc += 1 + LINK_SIZE + IMM2_SIZE;
996 break;
998 case OP_COND:
999 /* Might be a hidden SCOND. */
1000 alternative = cc + GET(cc, 1);
1001 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1002 localsize++;
1003 cc += 1 + LINK_SIZE;
1004 break;
1006 default:
1007 cc = next_opcode(common, cc);
1008 SLJIT_ASSERT(cc != NULL);
1009 break;
1012 SLJIT_ASSERT(cc == ccend);
1013 return localsize;
1016 static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1017 BOOL save, int stackptr, int stacktop)
1019 DEFINE_COMPILER;
1020 int srcw[2];
1021 int count;
1022 BOOL tmp1next = TRUE;
1023 BOOL tmp1empty = TRUE;
1024 BOOL tmp2empty = TRUE;
1025 pcre_uchar *alternative;
1026 enum {
1027 start,
1028 loop,
1030 } status;
1032 status = save ? start : loop;
1033 stackptr = STACK(stackptr - 2);
1034 stacktop = STACK(stacktop - 1);
1036 if (!save)
1038 stackptr += sizeof(sljit_w);
1039 if (stackptr < stacktop)
1041 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1042 stackptr += sizeof(sljit_w);
1043 tmp1empty = FALSE;
1045 if (stackptr < stacktop)
1047 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1048 stackptr += sizeof(sljit_w);
1049 tmp2empty = FALSE;
1051 /* The tmp1next must be TRUE in either way. */
1054 while (status != end)
1056 count = 0;
1057 switch(status)
1059 case start:
1060 SLJIT_ASSERT(save && common->recursive_head != 0);
1061 count = 1;
1062 srcw[0] = common->recursive_head;
1063 status = loop;
1064 break;
1066 case loop:
1067 if (cc >= ccend)
1069 status = end;
1070 break;
1073 switch(*cc)
1075 case OP_ASSERT:
1076 case OP_ASSERT_NOT:
1077 case OP_ASSERTBACK:
1078 case OP_ASSERTBACK_NOT:
1079 case OP_ONCE:
1080 case OP_ONCE_NC:
1081 case OP_BRAPOS:
1082 case OP_SBRA:
1083 case OP_SBRAPOS:
1084 case OP_SCOND:
1085 count = 1;
1086 srcw[0] = PRIV_DATA(cc);
1087 SLJIT_ASSERT(srcw[0] != 0);
1088 cc += 1 + LINK_SIZE;
1089 break;
1091 case OP_CBRA:
1092 case OP_SCBRA:
1093 count = 1;
1094 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1095 cc += 1 + LINK_SIZE + IMM2_SIZE;
1096 break;
1098 case OP_CBRAPOS:
1099 case OP_SCBRAPOS:
1100 count = 2;
1101 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1102 srcw[0] = PRIV_DATA(cc);
1103 SLJIT_ASSERT(srcw[0] != 0);
1104 cc += 1 + LINK_SIZE + IMM2_SIZE;
1105 break;
1107 case OP_COND:
1108 /* Might be a hidden SCOND. */
1109 alternative = cc + GET(cc, 1);
1110 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1112 count = 1;
1113 srcw[0] = PRIV_DATA(cc);
1114 SLJIT_ASSERT(srcw[0] != 0);
1116 cc += 1 + LINK_SIZE;
1117 break;
1119 default:
1120 cc = next_opcode(common, cc);
1121 SLJIT_ASSERT(cc != NULL);
1122 break;
1124 break;
1126 case end:
1127 SLJIT_ASSERT_STOP();
1128 break;
1131 while (count > 0)
1133 count--;
1134 if (save)
1136 if (tmp1next)
1138 if (!tmp1empty)
1140 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1141 stackptr += sizeof(sljit_w);
1143 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1144 tmp1empty = FALSE;
1145 tmp1next = FALSE;
1147 else
1149 if (!tmp2empty)
1151 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1152 stackptr += sizeof(sljit_w);
1154 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1155 tmp2empty = FALSE;
1156 tmp1next = TRUE;
1159 else
1161 if (tmp1next)
1163 SLJIT_ASSERT(!tmp1empty);
1164 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1165 tmp1empty = stackptr >= stacktop;
1166 if (!tmp1empty)
1168 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1169 stackptr += sizeof(sljit_w);
1171 tmp1next = FALSE;
1173 else
1175 SLJIT_ASSERT(!tmp2empty);
1176 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1177 tmp2empty = stackptr >= stacktop;
1178 if (!tmp2empty)
1180 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1181 stackptr += sizeof(sljit_w);
1183 tmp1next = TRUE;
1189 if (save)
1191 if (tmp1next)
1193 if (!tmp1empty)
1195 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1196 stackptr += sizeof(sljit_w);
1198 if (!tmp2empty)
1200 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1201 stackptr += sizeof(sljit_w);
1204 else
1206 if (!tmp2empty)
1208 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1209 stackptr += sizeof(sljit_w);
1211 if (!tmp1empty)
1213 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1214 stackptr += sizeof(sljit_w);
1218 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1221 static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1223 return (value & (value - 1)) == 0;
1226 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1228 while (list)
1230 /* sljit_set_label is clever enough to do nothing
1231 if either the jump or the label is NULL */
1232 sljit_set_label(list->jump, label);
1233 list = list->next;
1237 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1239 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1240 if (list_item)
1242 list_item->next = *list;
1243 list_item->jump = jump;
1244 *list = list_item;
1248 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1250 DEFINE_COMPILER;
1251 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1253 if (list_item)
1255 list_item->type = type;
1256 list_item->data = data;
1257 list_item->start = start;
1258 list_item->leave = LABEL();
1259 list_item->next = common->stubs;
1260 common->stubs = list_item;
1264 static void flush_stubs(compiler_common *common)
1266 DEFINE_COMPILER;
1267 stub_list* list_item = common->stubs;
1269 while (list_item)
1271 JUMPHERE(list_item->start);
1272 switch(list_item->type)
1274 case stack_alloc:
1275 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1276 break;
1278 JUMPTO(SLJIT_JUMP, list_item->leave);
1279 list_item = list_item->next;
1281 common->stubs = NULL;
1284 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1286 DEFINE_COMPILER;
1288 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1289 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1292 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1294 /* May destroy all locals and registers except TMP2. */
1295 DEFINE_COMPILER;
1297 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1298 #ifdef DESTROY_REGISTERS
1299 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1300 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1301 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1302 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1303 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1304 #endif
1305 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1308 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1310 DEFINE_COMPILER;
1311 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1314 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1316 DEFINE_COMPILER;
1317 struct sljit_label *loop;
1318 int i;
1319 /* At this point we can freely use all temporary registers. */
1320 /* TMP1 returns with begin - 1. */
1321 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1322 if (length < 8)
1324 for (i = 0; i < length; i++)
1325 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1327 else
1329 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, OVECTOR_START - sizeof(sljit_w));
1330 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1331 loop = LABEL();
1332 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1333 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1334 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1338 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1340 DEFINE_COMPILER;
1341 struct sljit_label *loop;
1342 struct sljit_jump *earlyexit;
1344 /* At this point we can freely use all registers. */
1345 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1346 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1348 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1349 if (common->mark_ptr != 0)
1350 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1351 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1352 if (common->mark_ptr != 0)
1353 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_TEMPORARY_REG3, 0);
1354 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1355 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1356 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1357 /* Unlikely, but possible */
1358 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1359 loop = LABEL();
1360 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1361 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1362 /* Copy the integer value to the output buffer */
1363 #ifdef COMPILE_PCRE16
1364 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1365 #endif
1366 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1367 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1368 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1369 JUMPHERE(earlyexit);
1371 /* Calculate the return value, which is the maximum ovector value. */
1372 if (topbracket > 1)
1374 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1375 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1377 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1378 loop = LABEL();
1379 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1380 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1381 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1382 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1384 else
1385 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1388 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *leave)
1390 DEFINE_COMPILER;
1392 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1393 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1395 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
1396 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1397 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1398 CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, leave);
1400 /* Store match begin and end. */
1401 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1402 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1403 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1404 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1405 #ifdef COMPILE_PCRE16
1406 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1407 #endif
1408 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1410 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0);
1411 #ifdef COMPILE_PCRE16
1412 OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1413 #endif
1414 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0);
1416 JUMPTO(SLJIT_JUMP, leave);
1419 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1421 /* May destroy TMP1. */
1422 DEFINE_COMPILER;
1423 struct sljit_jump *jump;
1425 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1427 /* The value of -1 must be kept for start_used_ptr! */
1428 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1429 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1430 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1431 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1432 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1433 JUMPHERE(jump);
1435 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1437 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1438 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1439 JUMPHERE(jump);
1443 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1445 /* Detects if the character has an othercase. */
1446 unsigned int c;
1448 #ifdef SUPPORT_UTF
1449 if (common->utf)
1451 GETCHAR(c, cc);
1452 if (c > 127)
1454 #ifdef SUPPORT_UCP
1455 return c != UCD_OTHERCASE(c);
1456 #else
1457 return FALSE;
1458 #endif
1460 #ifndef COMPILE_PCRE8
1461 return common->fcc[c] != c;
1462 #endif
1464 else
1465 #endif
1466 c = *cc;
1467 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1470 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1472 /* Returns with the othercase. */
1473 #ifdef SUPPORT_UTF
1474 if (common->utf && c > 127)
1476 #ifdef SUPPORT_UCP
1477 return UCD_OTHERCASE(c);
1478 #else
1479 return c;
1480 #endif
1482 #endif
1483 return TABLE_GET(c, common->fcc, c);
1486 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1488 /* Detects if the character and its othercase has only 1 bit difference. */
1489 unsigned int c, oc, bit;
1490 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1491 int n;
1492 #endif
1494 #ifdef SUPPORT_UTF
1495 if (common->utf)
1497 GETCHAR(c, cc);
1498 if (c <= 127)
1499 oc = common->fcc[c];
1500 else
1502 #ifdef SUPPORT_UCP
1503 oc = UCD_OTHERCASE(c);
1504 #else
1505 oc = c;
1506 #endif
1509 else
1511 c = *cc;
1512 oc = TABLE_GET(c, common->fcc, c);
1514 #else
1515 c = *cc;
1516 oc = TABLE_GET(c, common->fcc, c);
1517 #endif
1519 SLJIT_ASSERT(c != oc);
1521 bit = c ^ oc;
1522 /* Optimized for English alphabet. */
1523 if (c <= 127 && bit == 0x20)
1524 return (0 << 8) | 0x20;
1526 /* Since c != oc, they must have at least 1 bit difference. */
1527 if (!ispowerof2(bit))
1528 return 0;
1530 #ifdef COMPILE_PCRE8
1532 #ifdef SUPPORT_UTF
1533 if (common->utf && c > 127)
1535 n = GET_EXTRALEN(*cc);
1536 while ((bit & 0x3f) == 0)
1538 n--;
1539 bit >>= 6;
1541 return (n << 8) | bit;
1543 #endif /* SUPPORT_UTF */
1544 return (0 << 8) | bit;
1546 #else /* COMPILE_PCRE8 */
1548 #ifdef COMPILE_PCRE16
1549 #ifdef SUPPORT_UTF
1550 if (common->utf && c > 65535)
1552 if (bit >= (1 << 10))
1553 bit >>= 10;
1554 else
1555 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
1557 #endif /* SUPPORT_UTF */
1558 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
1559 #endif /* COMPILE_PCRE16 */
1561 #endif /* COMPILE_PCRE8 */
1564 static void check_partial(compiler_common *common, BOOL force)
1566 /* Checks whether a partial matching is occured. Does not modify registers. */
1567 DEFINE_COMPILER;
1568 struct sljit_jump *jump = NULL;
1570 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
1572 if (common->mode == JIT_COMPILE)
1573 return;
1575 if (!force)
1576 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1577 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1578 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
1580 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1581 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1582 else
1584 if (common->partialmatchlabel != NULL)
1585 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1586 else
1587 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1590 if (jump != NULL)
1591 JUMPHERE(jump);
1594 static struct sljit_jump *check_str_end(compiler_common *common)
1596 /* Does not affect registers. Usually used in a tight spot. */
1597 DEFINE_COMPILER;
1598 struct sljit_jump *jump;
1599 struct sljit_jump *nohit;
1600 struct sljit_jump *return_value;
1602 if (common->mode == JIT_COMPILE)
1603 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1605 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
1606 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1608 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1609 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1610 JUMPHERE(nohit);
1611 return_value = JUMP(SLJIT_JUMP);
1613 else
1615 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1616 if (common->partialmatchlabel != NULL)
1617 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1618 else
1619 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1621 JUMPHERE(jump);
1622 return return_value;
1625 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
1627 DEFINE_COMPILER;
1628 struct sljit_jump *jump;
1630 if (common->mode == JIT_COMPILE)
1632 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1633 return;
1636 /* Partial matching mode. */
1637 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
1638 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
1639 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1641 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1642 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
1644 else
1646 if (common->partialmatchlabel != NULL)
1647 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1648 else
1649 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1651 JUMPHERE(jump);
1654 static void read_char(compiler_common *common)
1656 /* Reads the character into TMP1, updates STR_PTR.
1657 Does not check STR_END. TMP2 Destroyed. */
1658 DEFINE_COMPILER;
1659 #ifdef SUPPORT_UTF
1660 struct sljit_jump *jump;
1661 #endif
1663 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1664 #ifdef SUPPORT_UTF
1665 if (common->utf)
1667 #ifdef COMPILE_PCRE8
1668 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1669 #else
1670 #ifdef COMPILE_PCRE16
1671 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1672 #endif
1673 #endif /* COMPILE_PCRE8 */
1674 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1675 JUMPHERE(jump);
1677 #endif
1678 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1681 static void peek_char(compiler_common *common)
1683 /* Reads the character into TMP1, keeps STR_PTR.
1684 Does not check STR_END. TMP2 Destroyed. */
1685 DEFINE_COMPILER;
1686 #ifdef SUPPORT_UTF
1687 struct sljit_jump *jump;
1688 #endif
1690 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1691 #ifdef SUPPORT_UTF
1692 if (common->utf)
1694 #ifdef COMPILE_PCRE8
1695 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1696 #else
1697 #ifdef COMPILE_PCRE16
1698 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1699 #endif
1700 #endif /* COMPILE_PCRE8 */
1701 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1702 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1703 JUMPHERE(jump);
1705 #endif
1708 static void read_char8_type(compiler_common *common)
1710 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
1711 DEFINE_COMPILER;
1712 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
1713 struct sljit_jump *jump;
1714 #endif
1716 #ifdef SUPPORT_UTF
1717 if (common->utf)
1719 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1720 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1721 #ifdef COMPILE_PCRE8
1722 /* This can be an extra read in some situations, but hopefully
1723 it is needed in most cases. */
1724 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1725 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
1726 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
1727 JUMPHERE(jump);
1728 #else
1729 #ifdef COMPILE_PCRE16
1730 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1731 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1732 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1733 JUMPHERE(jump);
1734 /* Skip low surrogate if necessary. */
1735 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
1736 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
1737 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1738 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
1739 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1740 #endif
1741 #endif /* COMPILE_PCRE8 */
1742 return;
1744 #endif
1745 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1746 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1747 #ifdef COMPILE_PCRE16
1748 /* The ctypes array contains only 256 values. */
1749 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1750 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1751 #endif
1752 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1753 #ifdef COMPILE_PCRE16
1754 JUMPHERE(jump);
1755 #endif
1758 static void skip_char_back(compiler_common *common)
1760 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
1761 DEFINE_COMPILER;
1762 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1763 struct sljit_label *label;
1765 if (common->utf)
1767 label = LABEL();
1768 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1769 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1770 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
1771 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
1772 return;
1774 #endif
1775 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1776 if (common->utf)
1778 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1779 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1780 /* Skip low surrogate if necessary. */
1781 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1782 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
1783 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1784 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1785 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1786 return;
1788 #endif
1789 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1792 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
1794 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
1795 DEFINE_COMPILER;
1797 if (nltype == NLTYPE_ANY)
1799 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
1800 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1802 else if (nltype == NLTYPE_ANYCRLF)
1804 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
1805 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1806 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1807 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
1808 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1810 else
1812 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
1813 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
1817 #ifdef SUPPORT_UTF
1819 #ifdef COMPILE_PCRE8
1820 static void do_utfreadchar(compiler_common *common)
1822 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
1823 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
1824 DEFINE_COMPILER;
1825 struct sljit_jump *jump;
1827 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1828 /* Searching for the first zero. */
1829 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
1830 jump = JUMP(SLJIT_C_NOT_ZERO);
1831 /* Two byte sequence. */
1832 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1833 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1834 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
1835 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
1836 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1837 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1838 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1839 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1840 JUMPHERE(jump);
1842 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
1843 jump = JUMP(SLJIT_C_NOT_ZERO);
1844 /* Three byte sequence. */
1845 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1846 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
1847 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
1848 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1849 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1850 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1851 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1852 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
1853 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1854 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1855 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
1856 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1857 JUMPHERE(jump);
1859 /* Four byte sequence. */
1860 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1861 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
1862 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
1863 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1864 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
1865 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1866 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1867 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1868 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1869 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1870 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
1871 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
1872 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1873 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1874 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
1875 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1878 static void do_utfreadtype8(compiler_common *common)
1880 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
1881 of the character (>= 0xc0). Return value in TMP1. */
1882 DEFINE_COMPILER;
1883 struct sljit_jump *jump;
1884 struct sljit_jump *compare;
1886 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1888 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
1889 jump = JUMP(SLJIT_C_NOT_ZERO);
1890 /* Two byte sequence. */
1891 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1892 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1893 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
1894 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1895 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
1896 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
1897 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1898 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1899 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1901 JUMPHERE(compare);
1902 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1903 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1904 JUMPHERE(jump);
1906 /* We only have types for characters less than 256. */
1907 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
1908 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1909 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1910 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1913 #else /* COMPILE_PCRE8 */
1915 #ifdef COMPILE_PCRE16
1916 static void do_utfreadchar(compiler_common *common)
1918 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
1919 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
1920 DEFINE_COMPILER;
1921 struct sljit_jump *jump;
1923 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1924 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
1925 /* Do nothing, only return. */
1926 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1928 JUMPHERE(jump);
1929 /* Combine two 16 bit characters. */
1930 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1931 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1932 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
1933 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
1934 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
1935 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1936 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1937 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
1938 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1940 #endif /* COMPILE_PCRE16 */
1942 #endif /* COMPILE_PCRE8 */
1944 #endif /* SUPPORT_UTF */
1946 #ifdef SUPPORT_UCP
1948 static sljit_w SLJIT_CALL getunichartype(sljit_w c)
1950 return (sljit_w)(unsigned int)UCD_CHARTYPE((unsigned int)c);
1953 static sljit_w SLJIT_CALL getunicharscript(sljit_w c)
1955 return (sljit_w)(unsigned int)UCD_SCRIPT((unsigned int)c);
1958 static void do_getunichartype(compiler_common *common)
1960 /* Character comes in TMP1. Returns chartype in TMP1 */
1961 DEFINE_COMPILER;
1963 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1964 /* Save registers */
1965 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
1966 sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(getunichartype));
1967 /* Restore registers */
1968 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
1969 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1972 static void do_getunichartype_2(compiler_common *common)
1974 /* Character comes in TMP1. Returns chartype in TMP1 */
1975 DEFINE_COMPILER;
1977 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1978 /* Save registers */
1979 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STACK_TOP, 0);
1980 sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(getunichartype));
1981 /* Restore registers */
1982 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1983 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1986 static void do_getunicharscript(compiler_common *common)
1988 /* Character comes in TMP1. Returns chartype in TMP1 */
1989 DEFINE_COMPILER;
1991 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1992 /* Save registers */
1993 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
1994 sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(getunicharscript));
1995 /* Restore registers */
1996 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
1997 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1999 #endif
2001 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2003 DEFINE_COMPILER;
2004 struct sljit_label *mainloop;
2005 struct sljit_label *newlinelabel = NULL;
2006 struct sljit_jump *start;
2007 struct sljit_jump *end = NULL;
2008 struct sljit_jump *nl = NULL;
2009 #ifdef SUPPORT_UTF
2010 struct sljit_jump *singlechar;
2011 #endif
2012 jump_list *newline = NULL;
2013 BOOL newlinecheck = FALSE;
2014 BOOL readuchar = FALSE;
2016 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2017 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2018 newlinecheck = TRUE;
2020 if (firstline)
2022 /* Search for the end of the first line. */
2023 SLJIT_ASSERT(common->first_line_end != 0);
2024 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);
2025 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_END, 0);
2027 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2029 mainloop = LABEL();
2030 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2031 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2032 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2033 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2034 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2035 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2036 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2038 else
2040 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2041 mainloop = LABEL();
2042 /* Continual stores does not cause data dependency. */
2043 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2044 read_char(common);
2045 check_newlinechar(common, common->nltype, &newline, TRUE);
2046 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2047 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2048 set_jumps(newline, LABEL());
2051 JUMPHERE(end);
2052 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2055 start = JUMP(SLJIT_JUMP);
2057 if (newlinecheck)
2059 newlinelabel = LABEL();
2060 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2061 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2062 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2063 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2064 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2065 #ifdef COMPILE_PCRE16
2066 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2067 #endif
2068 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2069 nl = JUMP(SLJIT_JUMP);
2072 mainloop = LABEL();
2074 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2075 #ifdef SUPPORT_UTF
2076 if (common->utf) readuchar = TRUE;
2077 #endif
2078 if (newlinecheck) readuchar = TRUE;
2080 if (readuchar)
2081 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2083 if (newlinecheck)
2084 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2086 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2087 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2088 if (common->utf)
2090 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2091 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2092 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2093 JUMPHERE(singlechar);
2095 #endif
2096 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2097 if (common->utf)
2099 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2100 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2101 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2102 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2103 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2104 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2105 JUMPHERE(singlechar);
2107 #endif
2108 JUMPHERE(start);
2110 if (newlinecheck)
2112 JUMPHERE(end);
2113 JUMPHERE(nl);
2116 return mainloop;
2119 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2121 DEFINE_COMPILER;
2122 struct sljit_label *start;
2123 struct sljit_jump *leave;
2124 struct sljit_jump *found;
2125 pcre_uchar oc, bit;
2127 if (firstline)
2129 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2130 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2133 start = LABEL();
2134 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2135 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2137 oc = first_char;
2138 if (caseless)
2140 oc = TABLE_GET(first_char, common->fcc, first_char);
2141 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2142 if (first_char > 127 && common->utf)
2143 oc = UCD_OTHERCASE(first_char);
2144 #endif
2146 if (first_char == oc)
2147 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2148 else
2150 bit = first_char ^ oc;
2151 if (ispowerof2(bit))
2153 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2154 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2156 else
2158 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2159 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2160 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2161 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2162 found = JUMP(SLJIT_C_NOT_ZERO);
2166 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2167 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2168 if (common->utf)
2170 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2171 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2172 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2174 #endif
2175 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2176 if (common->utf)
2178 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2179 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2180 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2181 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2182 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2183 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2185 #endif
2186 JUMPTO(SLJIT_JUMP, start);
2187 JUMPHERE(found);
2188 JUMPHERE(leave);
2190 if (firstline)
2191 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2194 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2196 DEFINE_COMPILER;
2197 struct sljit_label *loop;
2198 struct sljit_jump *lastchar;
2199 struct sljit_jump *firstchar;
2200 struct sljit_jump *leave;
2201 struct sljit_jump *foundcr = NULL;
2202 struct sljit_jump *notfoundnl;
2203 jump_list *newline = NULL;
2205 if (firstline)
2207 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2208 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2211 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2213 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2214 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2215 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2216 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2217 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2219 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2220 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2221 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
2222 #ifdef COMPILE_PCRE16
2223 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2224 #endif
2225 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2227 loop = LABEL();
2228 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2229 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2230 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2231 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2232 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2233 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2235 JUMPHERE(leave);
2236 JUMPHERE(firstchar);
2237 JUMPHERE(lastchar);
2239 if (firstline)
2240 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2241 return;
2244 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2245 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2246 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2247 skip_char_back(common);
2249 loop = LABEL();
2250 read_char(common);
2251 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2252 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2253 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2254 check_newlinechar(common, common->nltype, &newline, FALSE);
2255 set_jumps(newline, loop);
2257 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2259 leave = JUMP(SLJIT_JUMP);
2260 JUMPHERE(foundcr);
2261 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2262 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2263 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2264 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2265 #ifdef COMPILE_PCRE16
2266 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2267 #endif
2268 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2269 JUMPHERE(notfoundnl);
2270 JUMPHERE(leave);
2272 JUMPHERE(lastchar);
2273 JUMPHERE(firstchar);
2275 if (firstline)
2276 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2279 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2281 DEFINE_COMPILER;
2282 struct sljit_label *start;
2283 struct sljit_jump *leave;
2284 struct sljit_jump *found;
2285 #ifndef COMPILE_PCRE8
2286 struct sljit_jump *jump;
2287 #endif
2289 if (firstline)
2291 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2292 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2295 start = LABEL();
2296 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2297 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2298 #ifdef SUPPORT_UTF
2299 if (common->utf)
2300 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2301 #endif
2302 #ifndef COMPILE_PCRE8
2303 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2304 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2305 JUMPHERE(jump);
2306 #endif
2307 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2308 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2309 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2310 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2311 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2312 found = JUMP(SLJIT_C_NOT_ZERO);
2314 #ifdef SUPPORT_UTF
2315 if (common->utf)
2316 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2317 #endif
2318 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2319 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2320 if (common->utf)
2322 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2323 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2324 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2326 #endif
2327 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2328 if (common->utf)
2330 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2331 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2332 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2333 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2334 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2335 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2337 #endif
2338 JUMPTO(SLJIT_JUMP, start);
2339 JUMPHERE(found);
2340 JUMPHERE(leave);
2342 if (firstline)
2343 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2346 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2348 DEFINE_COMPILER;
2349 struct sljit_label *loop;
2350 struct sljit_jump *toolong;
2351 struct sljit_jump *alreadyfound;
2352 struct sljit_jump *found;
2353 struct sljit_jump *foundoc = NULL;
2354 struct sljit_jump *notfound;
2355 pcre_uchar oc, bit;
2357 SLJIT_ASSERT(common->req_char_ptr != 0);
2358 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
2359 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2360 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2361 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2363 if (has_firstchar)
2364 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2365 else
2366 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2368 loop = LABEL();
2369 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2371 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2372 oc = req_char;
2373 if (caseless)
2375 oc = TABLE_GET(req_char, common->fcc, req_char);
2376 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2377 if (req_char > 127 && common->utf)
2378 oc = UCD_OTHERCASE(req_char);
2379 #endif
2381 if (req_char == oc)
2382 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2383 else
2385 bit = req_char ^ oc;
2386 if (ispowerof2(bit))
2388 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
2389 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
2391 else
2393 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2394 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
2397 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2398 JUMPTO(SLJIT_JUMP, loop);
2400 JUMPHERE(found);
2401 if (foundoc)
2402 JUMPHERE(foundoc);
2403 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
2404 JUMPHERE(alreadyfound);
2405 JUMPHERE(toolong);
2406 return notfound;
2409 static void do_revertframes(compiler_common *common)
2411 DEFINE_COMPILER;
2412 struct sljit_jump *jump;
2413 struct sljit_label *mainloop;
2415 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2416 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
2417 GET_LOCAL_BASE(TMP3, 0, 0);
2419 /* Drop frames until we reach STACK_TOP. */
2420 mainloop = LABEL();
2421 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2422 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2423 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
2424 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2425 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
2426 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
2427 JUMPTO(SLJIT_JUMP, mainloop);
2429 JUMPHERE(jump);
2430 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2431 /* End of dropping frames. */
2432 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2434 JUMPHERE(jump);
2435 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
2436 /* Set string begin. */
2437 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2438 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2439 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
2440 JUMPTO(SLJIT_JUMP, mainloop);
2442 JUMPHERE(jump);
2443 if (common->mark_ptr != 0)
2445 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark);
2446 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2447 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2448 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
2449 JUMPTO(SLJIT_JUMP, mainloop);
2451 JUMPHERE(jump);
2454 /* Unknown command. */
2455 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2456 JUMPTO(SLJIT_JUMP, mainloop);
2459 static void check_wordboundary(compiler_common *common)
2461 DEFINE_COMPILER;
2462 struct sljit_jump *skipread;
2463 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
2464 struct sljit_jump *jump;
2465 #endif
2467 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
2469 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2470 /* Get type of the previous char, and put it to LOCALS1. */
2471 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2472 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2473 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
2474 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
2475 skip_char_back(common);
2476 check_start_used_ptr(common);
2477 read_char(common);
2479 /* Testing char type. */
2480 #ifdef SUPPORT_UCP
2481 if (common->use_ucp)
2483 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2484 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2485 add_jump(compiler, &common->getunichartype_2, JUMP(SLJIT_FAST_CALL));
2486 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2487 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2488 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2489 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2490 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2491 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2492 JUMPHERE(jump);
2493 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
2495 else
2496 #endif
2498 #ifndef COMPILE_PCRE8
2499 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2500 #elif defined SUPPORT_UTF
2501 /* Here LOCALS1 has already been zeroed. */
2502 jump = NULL;
2503 if (common->utf)
2504 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2505 #endif /* COMPILE_PCRE8 */
2506 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
2507 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
2508 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2509 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2510 #ifndef COMPILE_PCRE8
2511 JUMPHERE(jump);
2512 #elif defined SUPPORT_UTF
2513 if (jump != NULL)
2514 JUMPHERE(jump);
2515 #endif /* COMPILE_PCRE8 */
2517 JUMPHERE(skipread);
2519 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2520 skipread = check_str_end(common);
2521 peek_char(common);
2523 /* Testing char type. This is a code duplication. */
2524 #ifdef SUPPORT_UCP
2525 if (common->use_ucp)
2527 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2528 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2529 add_jump(compiler, &common->getunichartype_2, JUMP(SLJIT_FAST_CALL));
2530 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2531 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2532 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2533 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2534 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2535 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2536 JUMPHERE(jump);
2538 else
2539 #endif
2541 #ifndef COMPILE_PCRE8
2542 /* TMP2 may be destroyed by peek_char. */
2543 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2544 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2545 #elif defined SUPPORT_UTF
2546 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2547 jump = NULL;
2548 if (common->utf)
2549 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2550 #endif
2551 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
2552 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
2553 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2554 #ifndef COMPILE_PCRE8
2555 JUMPHERE(jump);
2556 #elif defined SUPPORT_UTF
2557 if (jump != NULL)
2558 JUMPHERE(jump);
2559 #endif /* COMPILE_PCRE8 */
2561 JUMPHERE(skipread);
2563 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2564 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2567 static void check_anynewline(compiler_common *common)
2569 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2570 DEFINE_COMPILER;
2572 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2574 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2575 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2576 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2577 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2578 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2579 #ifdef COMPILE_PCRE8
2580 if (common->utf)
2582 #endif
2583 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2584 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2585 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2586 #ifdef COMPILE_PCRE8
2588 #endif
2589 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2590 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2591 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2594 static void check_hspace(compiler_common *common)
2596 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2597 DEFINE_COMPILER;
2599 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2601 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
2602 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2603 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2604 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2605 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
2606 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2607 #ifdef COMPILE_PCRE8
2608 if (common->utf)
2610 #endif
2611 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2612 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
2613 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2614 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
2615 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2616 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
2617 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
2618 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2619 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
2620 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2621 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
2622 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2623 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
2624 #ifdef COMPILE_PCRE8
2626 #endif
2627 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2628 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2630 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2633 static void check_vspace(compiler_common *common)
2635 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2636 DEFINE_COMPILER;
2638 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2640 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2641 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2642 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2643 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2644 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2645 #ifdef COMPILE_PCRE8
2646 if (common->utf)
2648 #endif
2649 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2650 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2651 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2652 #ifdef COMPILE_PCRE8
2654 #endif
2655 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2656 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2658 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2661 #define CHAR1 STR_END
2662 #define CHAR2 STACK_TOP
2664 static void do_casefulcmp(compiler_common *common)
2666 DEFINE_COMPILER;
2667 struct sljit_jump *jump;
2668 struct sljit_label *label;
2670 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2671 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2672 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
2673 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
2674 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2675 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2677 label = LABEL();
2678 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
2679 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2680 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2681 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2682 JUMPTO(SLJIT_C_NOT_ZERO, label);
2684 JUMPHERE(jump);
2685 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2686 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
2687 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2688 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2691 #define LCC_TABLE STACK_LIMIT
2693 static void do_caselesscmp(compiler_common *common)
2695 DEFINE_COMPILER;
2696 struct sljit_jump *jump;
2697 struct sljit_label *label;
2699 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2700 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2702 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
2703 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
2704 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
2705 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
2706 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2707 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2709 label = LABEL();
2710 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
2711 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2712 #ifndef COMPILE_PCRE8
2713 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
2714 #endif
2715 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
2716 #ifndef COMPILE_PCRE8
2717 JUMPHERE(jump);
2718 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
2719 #endif
2720 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
2721 #ifndef COMPILE_PCRE8
2722 JUMPHERE(jump);
2723 #endif
2724 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2725 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2726 JUMPTO(SLJIT_C_NOT_ZERO, label);
2728 JUMPHERE(jump);
2729 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2730 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
2731 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2732 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2733 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2736 #undef LCC_TABLE
2737 #undef CHAR1
2738 #undef CHAR2
2740 #if defined SUPPORT_UTF && defined SUPPORT_UCP
2742 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
2744 /* This function would be ineffective to do in JIT level. */
2745 int c1, c2;
2746 const pcre_uchar *src2 = args->uchar_ptr;
2747 const pcre_uchar *end2 = args->end;
2749 while (src1 < end1)
2751 if (src2 >= end2)
2752 return (pcre_uchar*)1;
2753 GETCHARINC(c1, src1);
2754 GETCHARINC(c2, src2);
2755 if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return NULL;
2757 return src2;
2760 #endif /* SUPPORT_UTF && SUPPORT_UCP */
2762 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
2763 compare_context* context, jump_list **backtracks)
2765 DEFINE_COMPILER;
2766 unsigned int othercasebit = 0;
2767 pcre_uchar *othercasechar = NULL;
2768 #ifdef SUPPORT_UTF
2769 int utflength;
2770 #endif
2772 if (caseless && char_has_othercase(common, cc))
2774 othercasebit = char_get_othercase_bit(common, cc);
2775 SLJIT_ASSERT(othercasebit);
2776 /* Extracting bit difference info. */
2777 #ifdef COMPILE_PCRE8
2778 othercasechar = cc + (othercasebit >> 8);
2779 othercasebit &= 0xff;
2780 #else
2781 #ifdef COMPILE_PCRE16
2782 othercasechar = cc + (othercasebit >> 9);
2783 if ((othercasebit & 0x100) != 0)
2784 othercasebit = (othercasebit & 0xff) << 8;
2785 else
2786 othercasebit &= 0xff;
2787 #endif
2788 #endif
2791 if (context->sourcereg == -1)
2793 #ifdef COMPILE_PCRE8
2794 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2795 if (context->length >= 4)
2796 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2797 else if (context->length >= 2)
2798 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2799 else
2800 #endif
2801 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2802 #else
2803 #ifdef COMPILE_PCRE16
2804 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2805 if (context->length >= 4)
2806 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2807 else
2808 #endif
2809 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2810 #endif
2811 #endif /* COMPILE_PCRE8 */
2812 context->sourcereg = TMP2;
2815 #ifdef SUPPORT_UTF
2816 utflength = 1;
2817 if (common->utf && HAS_EXTRALEN(*cc))
2818 utflength += GET_EXTRALEN(*cc);
2822 #endif
2824 context->length -= IN_UCHARS(1);
2825 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2827 /* Unaligned read is supported. */
2828 if (othercasebit != 0 && othercasechar == cc)
2830 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
2831 context->oc.asuchars[context->ucharptr] = othercasebit;
2833 else
2835 context->c.asuchars[context->ucharptr] = *cc;
2836 context->oc.asuchars[context->ucharptr] = 0;
2838 context->ucharptr++;
2840 #ifdef COMPILE_PCRE8
2841 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
2842 #else
2843 if (context->ucharptr >= 2 || context->length == 0)
2844 #endif
2846 if (context->length >= 4)
2847 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2848 #ifdef COMPILE_PCRE8
2849 else if (context->length >= 2)
2850 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2851 else if (context->length >= 1)
2852 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2853 #else
2854 else if (context->length >= 2)
2855 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2856 #endif
2857 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2859 switch(context->ucharptr)
2861 case 4 / sizeof(pcre_uchar):
2862 if (context->oc.asint != 0)
2863 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
2864 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
2865 break;
2867 case 2 / sizeof(pcre_uchar):
2868 if (context->oc.asushort != 0)
2869 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
2870 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
2871 break;
2873 #ifdef COMPILE_PCRE8
2874 case 1:
2875 if (context->oc.asbyte != 0)
2876 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
2877 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
2878 break;
2879 #endif
2881 default:
2882 SLJIT_ASSERT_STOP();
2883 break;
2885 context->ucharptr = 0;
2888 #else
2890 /* Unaligned read is unsupported. */
2891 #ifdef COMPILE_PCRE8
2892 if (context->length > 0)
2893 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2894 #else
2895 if (context->length > 0)
2896 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2897 #endif
2898 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2900 if (othercasebit != 0 && othercasechar == cc)
2902 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
2903 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
2905 else
2906 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
2908 #endif
2910 cc++;
2911 #ifdef SUPPORT_UTF
2912 utflength--;
2914 while (utflength > 0);
2915 #endif
2917 return cc;
2920 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2922 #define SET_TYPE_OFFSET(value) \
2923 if ((value) != typeoffset) \
2925 if ((value) > typeoffset) \
2926 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
2927 else \
2928 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
2930 typeoffset = (value);
2932 #define SET_CHAR_OFFSET(value) \
2933 if ((value) != charoffset) \
2935 if ((value) > charoffset) \
2936 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
2937 else \
2938 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
2940 charoffset = (value);
2942 static void compile_xclass_trypath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
2944 DEFINE_COMPILER;
2945 jump_list *found = NULL;
2946 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
2947 unsigned int c;
2948 int compares;
2949 struct sljit_jump *jump = NULL;
2950 pcre_uchar *ccbegin;
2951 #ifdef SUPPORT_UCP
2952 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
2953 BOOL charsaved = FALSE;
2954 int typereg = TMP1, scriptreg = TMP1;
2955 unsigned int typeoffset;
2956 #endif
2957 int invertcmp, numberofcmps;
2958 unsigned int charoffset;
2960 /* Although SUPPORT_UTF must be defined, we are not necessary in utf mode. */
2961 detect_partial_match(common, backtracks);
2962 read_char(common);
2964 if ((*cc++ & XCL_MAP) != 0)
2966 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2967 #ifndef COMPILE_PCRE8
2968 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2969 #elif defined SUPPORT_UTF
2970 if (common->utf)
2971 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2972 #endif
2974 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2975 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2976 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
2977 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2978 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2979 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
2981 #ifndef COMPILE_PCRE8
2982 JUMPHERE(jump);
2983 #elif defined SUPPORT_UTF
2984 if (common->utf)
2985 JUMPHERE(jump);
2986 #endif
2987 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2988 #ifdef SUPPORT_UCP
2989 charsaved = TRUE;
2990 #endif
2991 cc += 32 / sizeof(pcre_uchar);
2994 /* Scanning the necessary info. */
2995 ccbegin = cc;
2996 compares = 0;
2997 while (*cc != XCL_END)
2999 compares++;
3000 if (*cc == XCL_SINGLE)
3002 cc += 2;
3003 #ifdef SUPPORT_UTF
3004 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3005 #endif
3006 #ifdef SUPPORT_UCP
3007 needschar = TRUE;
3008 #endif
3010 else if (*cc == XCL_RANGE)
3012 cc += 2;
3013 #ifdef SUPPORT_UTF
3014 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3015 #endif
3016 cc++;
3017 #ifdef SUPPORT_UTF
3018 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3019 #endif
3020 #ifdef SUPPORT_UCP
3021 needschar = TRUE;
3022 #endif
3024 #ifdef SUPPORT_UCP
3025 else
3027 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
3028 cc++;
3029 switch(*cc)
3031 case PT_ANY:
3032 break;
3034 case PT_LAMP:
3035 case PT_GC:
3036 case PT_PC:
3037 case PT_ALNUM:
3038 needstype = TRUE;
3039 break;
3041 case PT_SC:
3042 needsscript = TRUE;
3043 break;
3045 case PT_SPACE:
3046 case PT_PXSPACE:
3047 case PT_WORD:
3048 needstype = TRUE;
3049 needschar = TRUE;
3050 break;
3052 default:
3053 SLJIT_ASSERT_STOP();
3054 break;
3056 cc += 2;
3058 #endif
3061 #ifdef SUPPORT_UCP
3062 /* Simple register allocation. TMP1 is preferred if possible. */
3063 if (needstype || needsscript)
3065 if ((needschar || needsscript) && !charsaved)
3066 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3068 /* Needed to save important temporary registers. */
3069 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 &&
3070 STACK_TOP == SLJIT_TEMPORARY_REG2 &&
3071 TMP2 == SLJIT_TEMPORARY_REG3);
3073 if (needschar)
3075 if (needstype)
3076 typereg = RETURN_ADDR;
3077 if (needsscript)
3078 scriptreg = TMP3;
3080 else if (needstype && needsscript)
3081 scriptreg = TMP3;
3082 /* In all other cases only one of them was specified, and that can goes to TMP1. */
3084 if (needstype)
3086 add_jump(compiler, &common->getunichartype, JUMP(SLJIT_FAST_CALL));
3087 if (typereg != TMP1)
3088 OP1(SLJIT_MOV, typereg, 0, TMP1, 0);
3091 if (needsscript)
3093 /* Get the char again */
3094 if (needstype)
3095 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3097 add_jump(compiler, &common->getunicharscript, JUMP(SLJIT_FAST_CALL));
3098 if (scriptreg != TMP1)
3099 OP1(SLJIT_MOV, scriptreg, 0, TMP1, 0);
3102 if (needschar)
3103 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3105 #endif
3107 /* Generating code. */
3108 cc = ccbegin;
3109 charoffset = 0;
3110 numberofcmps = 0;
3111 #ifdef SUPPORT_UCP
3112 typeoffset = 0;
3113 #endif
3115 while (*cc != XCL_END)
3117 compares--;
3118 invertcmp = (compares == 0 && list != backtracks);
3119 jump = NULL;
3121 if (*cc == XCL_SINGLE)
3123 cc ++;
3124 #ifdef SUPPORT_UTF
3125 if (common->utf)
3127 GETCHARINC(c, cc);
3129 else
3130 #endif
3131 c = *cc++;
3133 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3135 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3136 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3137 numberofcmps++;
3139 else if (numberofcmps > 0)
3141 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3142 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3143 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3144 numberofcmps = 0;
3146 else
3148 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3149 numberofcmps = 0;
3152 else if (*cc == XCL_RANGE)
3154 cc ++;
3155 #ifdef SUPPORT_UTF
3156 if (common->utf)
3158 GETCHARINC(c, cc);
3160 else
3161 #endif
3162 c = *cc++;
3163 SET_CHAR_OFFSET(c);
3164 #ifdef SUPPORT_UTF
3165 if (common->utf)
3167 GETCHARINC(c, cc);
3169 else
3170 #endif
3171 c = *cc++;
3172 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3174 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3175 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3176 numberofcmps++;
3178 else if (numberofcmps > 0)
3180 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3181 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3182 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3183 numberofcmps = 0;
3185 else
3187 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3188 numberofcmps = 0;
3191 #ifdef SUPPORT_UCP
3192 else
3194 if (*cc == XCL_NOTPROP)
3195 invertcmp ^= 0x1;
3196 cc++;
3197 switch(*cc)
3199 case PT_ANY:
3200 if (list != backtracks)
3202 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
3203 continue;
3205 else if (cc[-1] == XCL_NOTPROP)
3206 continue;
3207 jump = JUMP(SLJIT_JUMP);
3208 break;
3210 case PT_LAMP:
3211 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
3212 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3213 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
3214 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3215 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
3216 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3217 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3218 break;
3220 case PT_GC:
3221 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
3222 SET_TYPE_OFFSET(c);
3223 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
3224 break;
3226 case PT_PC:
3227 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
3228 break;
3230 case PT_SC:
3231 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
3232 break;
3234 case PT_SPACE:
3235 case PT_PXSPACE:
3236 if (*cc == PT_SPACE)
3238 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3239 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
3241 SET_CHAR_OFFSET(9);
3242 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
3243 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3244 if (*cc == PT_SPACE)
3245 JUMPHERE(jump);
3247 SET_TYPE_OFFSET(ucp_Zl);
3248 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
3249 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3250 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3251 break;
3253 case PT_WORD:
3254 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
3255 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3256 /* ... fall through */
3258 case PT_ALNUM:
3259 SET_TYPE_OFFSET(ucp_Ll);
3260 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3261 COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3262 SET_TYPE_OFFSET(ucp_Nd);
3263 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3264 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3265 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3266 break;
3268 cc += 2;
3270 #endif
3272 if (jump != NULL)
3273 add_jump(compiler, compares > 0 ? list : backtracks, jump);
3276 if (found != NULL)
3277 set_jumps(found, LABEL());
3280 #undef SET_TYPE_OFFSET
3281 #undef SET_CHAR_OFFSET
3283 #endif
3285 static pcre_uchar *compile_char1_trypath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
3287 DEFINE_COMPILER;
3288 int length;
3289 unsigned int c, oc, bit;
3290 compare_context context;
3291 struct sljit_jump *jump[4];
3292 #ifdef SUPPORT_UTF
3293 struct sljit_label *label;
3294 #ifdef SUPPORT_UCP
3295 pcre_uchar propdata[5];
3296 #endif
3297 #endif
3299 switch(type)
3301 case OP_SOD:
3302 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3303 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3304 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3305 return cc;
3307 case OP_SOM:
3308 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3309 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3310 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3311 return cc;
3313 case OP_NOT_WORD_BOUNDARY:
3314 case OP_WORD_BOUNDARY:
3315 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
3316 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3317 return cc;
3319 case OP_NOT_DIGIT:
3320 case OP_DIGIT:
3321 detect_partial_match(common, backtracks);
3322 read_char8_type(common);
3323 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
3324 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3325 return cc;
3327 case OP_NOT_WHITESPACE:
3328 case OP_WHITESPACE:
3329 detect_partial_match(common, backtracks);
3330 read_char8_type(common);
3331 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
3332 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3333 return cc;
3335 case OP_NOT_WORDCHAR:
3336 case OP_WORDCHAR:
3337 detect_partial_match(common, backtracks);
3338 read_char8_type(common);
3339 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
3340 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3341 return cc;
3343 case OP_ANY:
3344 detect_partial_match(common, backtracks);
3345 read_char(common);
3346 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3348 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
3349 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
3350 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3351 else
3352 jump[1] = check_str_end(common);
3354 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3355 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
3356 if (jump[1] != NULL)
3357 JUMPHERE(jump[1]);
3358 JUMPHERE(jump[0]);
3360 else
3361 check_newlinechar(common, common->nltype, backtracks, TRUE);
3362 return cc;
3364 case OP_ALLANY:
3365 detect_partial_match(common, backtracks);
3366 #ifdef SUPPORT_UTF
3367 if (common->utf)
3369 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3370 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3371 #ifdef COMPILE_PCRE8
3372 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3373 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3374 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3375 #else /* COMPILE_PCRE8 */
3376 #ifdef COMPILE_PCRE16
3377 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3378 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3379 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3380 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
3381 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3382 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3383 #endif /* COMPILE_PCRE16 */
3384 #endif /* COMPILE_PCRE8 */
3385 JUMPHERE(jump[0]);
3386 return cc;
3388 #endif
3389 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3390 return cc;
3392 case OP_ANYBYTE:
3393 detect_partial_match(common, backtracks);
3394 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3395 return cc;
3397 #ifdef SUPPORT_UTF
3398 #ifdef SUPPORT_UCP
3399 case OP_NOTPROP:
3400 case OP_PROP:
3401 propdata[0] = 0;
3402 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
3403 propdata[2] = cc[0];
3404 propdata[3] = cc[1];
3405 propdata[4] = XCL_END;
3406 compile_xclass_trypath(common, propdata, backtracks);
3407 return cc + 2;
3408 #endif
3409 #endif
3411 case OP_ANYNL:
3412 detect_partial_match(common, backtracks);
3413 read_char(common);
3414 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3415 /* We don't need to handle soft partial matching case. */
3416 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
3417 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3418 else
3419 jump[1] = check_str_end(common);
3420 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3421 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3422 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3423 jump[3] = JUMP(SLJIT_JUMP);
3424 JUMPHERE(jump[0]);
3425 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
3426 JUMPHERE(jump[1]);
3427 JUMPHERE(jump[2]);
3428 JUMPHERE(jump[3]);
3429 return cc;
3431 case OP_NOT_HSPACE:
3432 case OP_HSPACE:
3433 detect_partial_match(common, backtracks);
3434 read_char(common);
3435 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
3436 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3437 return cc;
3439 case OP_NOT_VSPACE:
3440 case OP_VSPACE:
3441 detect_partial_match(common, backtracks);
3442 read_char(common);
3443 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
3444 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3445 return cc;
3447 #ifdef SUPPORT_UCP
3448 case OP_EXTUNI:
3449 detect_partial_match(common, backtracks);
3450 read_char(common);
3451 add_jump(compiler, &common->getunichartype, JUMP(SLJIT_FAST_CALL));
3452 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3453 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc));
3455 label = LABEL();
3456 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3457 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3458 read_char(common);
3459 add_jump(compiler, &common->getunichartype, JUMP(SLJIT_FAST_CALL));
3460 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3461 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label);
3463 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3464 JUMPHERE(jump[0]);
3465 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
3467 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3468 /* Since we successfully read a char above, partial matching must occure. */
3469 check_partial(common, TRUE);
3470 JUMPHERE(jump[0]);
3472 return cc;
3473 #endif
3475 case OP_EODN:
3476 /* Requires rather complex checks. */
3477 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3478 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3480 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3481 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3482 if (common->mode == JIT_COMPILE)
3483 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3484 else
3486 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
3487 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
3488 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS);
3489 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
3490 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_NOT_EQUAL);
3491 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
3492 check_partial(common, TRUE);
3493 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3494 JUMPHERE(jump[1]);
3496 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3497 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3498 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3500 else if (common->nltype == NLTYPE_FIXED)
3502 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3503 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3504 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3505 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3507 else
3509 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3510 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3511 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3512 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
3513 jump[2] = JUMP(SLJIT_C_GREATER);
3514 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
3515 /* Equal. */
3516 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3517 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3518 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3520 JUMPHERE(jump[1]);
3521 if (common->nltype == NLTYPE_ANYCRLF)
3523 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3524 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
3525 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3527 else
3529 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
3530 read_char(common);
3531 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
3532 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3533 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
3534 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3536 JUMPHERE(jump[2]);
3537 JUMPHERE(jump[3]);
3539 JUMPHERE(jump[0]);
3540 check_partial(common, FALSE);
3541 return cc;
3543 case OP_EOD:
3544 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
3545 check_partial(common, FALSE);
3546 return cc;
3548 case OP_CIRC:
3549 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3550 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3551 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
3552 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3553 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3554 return cc;
3556 case OP_CIRCM:
3557 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3558 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3559 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
3560 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3561 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3562 jump[0] = JUMP(SLJIT_JUMP);
3563 JUMPHERE(jump[1]);
3565 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3566 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3568 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3569 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
3570 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3571 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3572 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3573 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3575 else
3577 skip_char_back(common);
3578 read_char(common);
3579 check_newlinechar(common, common->nltype, backtracks, FALSE);
3581 JUMPHERE(jump[0]);
3582 return cc;
3584 case OP_DOLL:
3585 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3586 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3587 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3589 if (!common->endonly)
3590 compile_char1_trypath(common, OP_EODN, cc, backtracks);
3591 else
3593 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
3594 check_partial(common, FALSE);
3596 return cc;
3598 case OP_DOLLM:
3599 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3600 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3601 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3602 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3603 check_partial(common, FALSE);
3604 jump[0] = JUMP(SLJIT_JUMP);
3605 JUMPHERE(jump[1]);
3607 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3609 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3610 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3611 if (common->mode == JIT_COMPILE)
3612 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
3613 else
3615 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
3616 /* STR_PTR = STR_END - IN_UCHARS(1) */
3617 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3618 check_partial(common, TRUE);
3619 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3620 JUMPHERE(jump[1]);
3623 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3624 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3625 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3627 else
3629 peek_char(common);
3630 check_newlinechar(common, common->nltype, backtracks, FALSE);
3632 JUMPHERE(jump[0]);
3633 return cc;
3635 case OP_CHAR:
3636 case OP_CHARI:
3637 length = 1;
3638 #ifdef SUPPORT_UTF
3639 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
3640 #endif
3641 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
3643 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3644 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3646 context.length = IN_UCHARS(length);
3647 context.sourcereg = -1;
3648 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3649 context.ucharptr = 0;
3650 #endif
3651 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
3653 detect_partial_match(common, backtracks);
3654 read_char(common);
3655 #ifdef SUPPORT_UTF
3656 if (common->utf)
3658 GETCHAR(c, cc);
3660 else
3661 #endif
3662 c = *cc;
3663 if (type == OP_CHAR || !char_has_othercase(common, cc))
3665 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
3666 return cc + length;
3668 oc = char_othercase(common, c);
3669 bit = c ^ oc;
3670 if (ispowerof2(bit))
3672 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3673 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3674 return cc + length;
3676 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
3677 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3678 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_othercase(common, c));
3679 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3680 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
3681 return cc + length;
3683 case OP_NOT:
3684 case OP_NOTI:
3685 detect_partial_match(common, backtracks);
3686 length = 1;
3687 #ifdef SUPPORT_UTF
3688 if (common->utf)
3690 #ifdef COMPILE_PCRE8
3691 c = *cc;
3692 if (c < 128)
3694 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3695 if (type == OP_NOT || !char_has_othercase(common, cc))
3696 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3697 else
3699 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
3700 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
3701 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
3703 /* Skip the variable-length character. */
3704 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3705 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3706 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3707 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3708 JUMPHERE(jump[0]);
3709 return cc + 1;
3711 else
3712 #endif /* COMPILE_PCRE8 */
3714 GETCHARLEN(c, cc, length);
3715 read_char(common);
3718 else
3719 #endif /* SUPPORT_UTF */
3721 read_char(common);
3722 c = *cc;
3725 if (type == OP_NOT || !char_has_othercase(common, cc))
3726 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3727 else
3729 oc = char_othercase(common, c);
3730 bit = c ^ oc;
3731 if (ispowerof2(bit))
3733 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3734 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3736 else
3738 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3739 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
3742 return cc + length;
3744 case OP_CLASS:
3745 case OP_NCLASS:
3746 detect_partial_match(common, backtracks);
3747 read_char(common);
3748 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3749 jump[0] = NULL;
3750 #ifdef COMPILE_PCRE8
3751 /* This check only affects 8 bit mode. In other modes, we
3752 always need to compare the value with 255. */
3753 if (common->utf)
3754 #endif /* COMPILE_PCRE8 */
3756 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3757 if (type == OP_CLASS)
3759 add_jump(compiler, backtracks, jump[0]);
3760 jump[0] = NULL;
3763 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
3764 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3765 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3766 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3767 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3768 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3769 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
3770 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3771 if (jump[0] != NULL)
3772 JUMPHERE(jump[0]);
3773 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
3774 return cc + 32 / sizeof(pcre_uchar);
3776 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3777 case OP_XCLASS:
3778 compile_xclass_trypath(common, cc + LINK_SIZE, backtracks);
3779 return cc + GET(cc, 0) - 1;
3780 #endif
3782 case OP_REVERSE:
3783 length = GET(cc, 0);
3784 if (length == 0)
3785 return cc + LINK_SIZE;
3786 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3787 #ifdef SUPPORT_UTF
3788 if (common->utf)
3790 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3791 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
3792 label = LABEL();
3793 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
3794 skip_char_back(common);
3795 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3796 JUMPTO(SLJIT_C_NOT_ZERO, label);
3798 else
3799 #endif
3801 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3802 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3803 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
3805 check_start_used_ptr(common);
3806 return cc + LINK_SIZE;
3808 SLJIT_ASSERT_STOP();
3809 return cc;
3812 static SLJIT_INLINE pcre_uchar *compile_charn_trypath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
3814 /* This function consumes at least one input character. */
3815 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
3816 DEFINE_COMPILER;
3817 pcre_uchar *ccbegin = cc;
3818 compare_context context;
3819 int size;
3821 context.length = 0;
3824 if (cc >= ccend)
3825 break;
3827 if (*cc == OP_CHAR)
3829 size = 1;
3830 #ifdef SUPPORT_UTF
3831 if (common->utf && HAS_EXTRALEN(cc[1]))
3832 size += GET_EXTRALEN(cc[1]);
3833 #endif
3835 else if (*cc == OP_CHARI)
3837 size = 1;
3838 #ifdef SUPPORT_UTF
3839 if (common->utf)
3841 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3842 size = 0;
3843 else if (HAS_EXTRALEN(cc[1]))
3844 size += GET_EXTRALEN(cc[1]);
3846 else
3847 #endif
3848 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3849 size = 0;
3851 else
3852 size = 0;
3854 cc += 1 + size;
3855 context.length += IN_UCHARS(size);
3857 while (size > 0 && context.length <= 128);
3859 cc = ccbegin;
3860 if (context.length > 0)
3862 /* We have a fixed-length byte sequence. */
3863 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
3864 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3866 context.sourcereg = -1;
3867 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3868 context.ucharptr = 0;
3869 #endif
3870 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
3871 return cc;
3874 /* A non-fixed length character will be checked if length == 0. */
3875 return compile_char1_trypath(common, *cc, cc + 1, backtracks);
3878 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3880 DEFINE_COMPILER;
3881 int offset = GET2(cc, 1) << 1;
3883 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3884 if (!common->jscript_compat)
3886 if (backtracks == NULL)
3888 /* OVECTOR(1) contains the "string begin - 1" constant. */
3889 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
3890 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3891 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3892 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3893 return JUMP(SLJIT_C_NOT_ZERO);
3895 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3897 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3900 /* Forward definitions. */
3901 static void compile_trypath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
3902 static void compile_backtrackpath(compiler_common *, struct backtrack_common *);
3904 #define PUSH_BACKTRACK(size, ccstart, error) \
3905 do \
3907 backtrack = sljit_alloc_memory(compiler, (size)); \
3908 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3909 return error; \
3910 memset(backtrack, 0, size); \
3911 backtrack->prev = parent->top; \
3912 backtrack->cc = (ccstart); \
3913 parent->top = backtrack; \
3915 while (0)
3917 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
3918 do \
3920 backtrack = sljit_alloc_memory(compiler, (size)); \
3921 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3922 return; \
3923 memset(backtrack, 0, size); \
3924 backtrack->prev = parent->top; \
3925 backtrack->cc = (ccstart); \
3926 parent->top = backtrack; \
3928 while (0)
3930 #define BACKTRACK_AS(type) ((type *)backtrack)
3932 static pcre_uchar *compile_ref_trypath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
3934 DEFINE_COMPILER;
3935 int offset = GET2(cc, 1) << 1;
3936 struct sljit_jump *jump = NULL;
3937 struct sljit_jump *partial;
3938 struct sljit_jump *nopartial;
3940 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3941 /* OVECTOR(1) contains the "string begin - 1" constant. */
3942 if (withchecks && !common->jscript_compat)
3943 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3945 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3946 if (common->utf && *cc == OP_REFI)
3948 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
3949 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3950 if (withchecks)
3951 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
3953 /* Needed to save important temporary registers. */
3954 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
3955 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
3956 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
3957 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
3958 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3959 if (common->mode == JIT_COMPILE)
3960 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
3961 else
3963 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
3964 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3965 check_partial(common, FALSE);
3966 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3967 JUMPHERE(nopartial);
3969 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
3971 else
3972 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3974 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
3975 if (withchecks)
3976 jump = JUMP(SLJIT_C_ZERO);
3978 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3979 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
3980 if (common->mode == JIT_COMPILE)
3981 add_jump(compiler, backtracks, partial);
3983 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3984 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3986 if (common->mode != JIT_COMPILE)
3988 nopartial = JUMP(SLJIT_JUMP);
3989 JUMPHERE(partial);
3990 /* TMP2 -= STR_END - STR_PTR */
3991 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
3992 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
3993 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
3994 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
3995 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3996 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3997 JUMPHERE(partial);
3998 check_partial(common, FALSE);
3999 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4000 JUMPHERE(nopartial);
4004 if (jump != NULL)
4006 if (emptyfail)
4007 add_jump(compiler, backtracks, jump);
4008 else
4009 JUMPHERE(jump);
4011 return cc + 1 + IMM2_SIZE;
4014 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4016 DEFINE_COMPILER;
4017 backtrack_common *backtrack;
4018 pcre_uchar type;
4019 struct sljit_label *label;
4020 struct sljit_jump *zerolength;
4021 struct sljit_jump *jump = NULL;
4022 pcre_uchar *ccbegin = cc;
4023 int min = 0, max = 0;
4024 BOOL minimize;
4026 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
4028 type = cc[1 + IMM2_SIZE];
4029 minimize = (type & 0x1) != 0;
4030 switch(type)
4032 case OP_CRSTAR:
4033 case OP_CRMINSTAR:
4034 min = 0;
4035 max = 0;
4036 cc += 1 + IMM2_SIZE + 1;
4037 break;
4038 case OP_CRPLUS:
4039 case OP_CRMINPLUS:
4040 min = 1;
4041 max = 0;
4042 cc += 1 + IMM2_SIZE + 1;
4043 break;
4044 case OP_CRQUERY:
4045 case OP_CRMINQUERY:
4046 min = 0;
4047 max = 1;
4048 cc += 1 + IMM2_SIZE + 1;
4049 break;
4050 case OP_CRRANGE:
4051 case OP_CRMINRANGE:
4052 min = GET2(cc, 1 + IMM2_SIZE + 1);
4053 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
4054 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
4055 break;
4056 default:
4057 SLJIT_ASSERT_STOP();
4058 break;
4061 if (!minimize)
4063 if (min == 0)
4065 allocate_stack(common, 2);
4066 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4067 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4068 /* Temporary release of STR_PTR. */
4069 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4070 zerolength = compile_ref_checks(common, ccbegin, NULL);
4071 /* Restore if not zero length. */
4072 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4074 else
4076 allocate_stack(common, 1);
4077 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4078 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4081 if (min > 1 || max > 1)
4082 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4084 label = LABEL();
4085 compile_ref_trypath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
4087 if (min > 1 || max > 1)
4089 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4090 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4091 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4092 if (min > 1)
4093 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
4094 if (max > 1)
4096 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
4097 allocate_stack(common, 1);
4098 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4099 JUMPTO(SLJIT_JUMP, label);
4100 JUMPHERE(jump);
4104 if (max == 0)
4106 /* Includes min > 1 case as well. */
4107 allocate_stack(common, 1);
4108 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4109 JUMPTO(SLJIT_JUMP, label);
4112 JUMPHERE(zerolength);
4113 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
4115 decrease_call_count(common);
4116 return cc;
4119 allocate_stack(common, 2);
4120 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4121 if (type != OP_CRMINSTAR)
4122 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4124 if (min == 0)
4126 zerolength = compile_ref_checks(common, ccbegin, NULL);
4127 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4128 jump = JUMP(SLJIT_JUMP);
4130 else
4131 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4133 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
4134 if (max > 0)
4135 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
4137 compile_ref_trypath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
4138 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4140 if (min > 1)
4142 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4143 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4144 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4145 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->trypath);
4147 else if (max > 0)
4148 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4150 if (jump != NULL)
4151 JUMPHERE(jump);
4152 JUMPHERE(zerolength);
4154 decrease_call_count(common);
4155 return cc;
4158 static SLJIT_INLINE pcre_uchar *compile_recurse_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4160 DEFINE_COMPILER;
4161 backtrack_common *backtrack;
4162 recurse_entry *entry = common->entries;
4163 recurse_entry *prev = NULL;
4164 int start = GET(cc, 1);
4166 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
4167 while (entry != NULL)
4169 if (entry->start == start)
4170 break;
4171 prev = entry;
4172 entry = entry->next;
4175 if (entry == NULL)
4177 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
4178 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4179 return NULL;
4180 entry->next = NULL;
4181 entry->entry = NULL;
4182 entry->calls = NULL;
4183 entry->start = start;
4185 if (prev != NULL)
4186 prev->next = entry;
4187 else
4188 common->entries = entry;
4191 if (common->has_set_som && common->mark_ptr != 0)
4193 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
4194 allocate_stack(common, 2);
4195 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
4196 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4197 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4199 else if (common->has_set_som || common->mark_ptr != 0)
4201 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
4202 allocate_stack(common, 1);
4203 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4206 if (entry->entry == NULL)
4207 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
4208 else
4209 JUMPTO(SLJIT_FAST_CALL, entry->entry);
4210 /* Leave if the match is failed. */
4211 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4212 return cc + 1 + LINK_SIZE;
4215 static pcre_uchar *compile_assert_trypath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
4217 DEFINE_COMPILER;
4218 int framesize;
4219 int localptr;
4220 backtrack_common altbacktrack;
4221 pcre_uchar *ccbegin;
4222 pcre_uchar opcode;
4223 pcre_uchar bra = OP_BRA;
4224 jump_list *tmp = NULL;
4225 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
4226 jump_list **found;
4227 /* Saving previous accept variables. */
4228 struct sljit_label *save_leavelabel = common->leavelabel;
4229 struct sljit_label *save_acceptlabel = common->acceptlabel;
4230 jump_list *save_leave = common->leave;
4231 jump_list *save_accept = common->accept;
4232 struct sljit_jump *jump;
4233 struct sljit_jump *brajump = NULL;
4235 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4237 SLJIT_ASSERT(!conditional);
4238 bra = *cc;
4239 cc++;
4241 localptr = PRIV_DATA(cc);
4242 SLJIT_ASSERT(localptr != 0);
4243 framesize = get_framesize(common, cc, FALSE);
4244 backtrack->framesize = framesize;
4245 backtrack->localptr = localptr;
4246 opcode = *cc;
4247 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
4248 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
4249 ccbegin = cc;
4250 cc += GET(cc, 1);
4252 if (bra == OP_BRAMINZERO)
4254 /* This is a braminzero backtrack path. */
4255 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4256 free_stack(common, 1);
4257 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4260 if (framesize < 0)
4262 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4263 allocate_stack(common, 1);
4264 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4266 else
4268 allocate_stack(common, framesize + 2);
4269 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4270 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
4271 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4272 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4273 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4274 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
4277 memset(&altbacktrack, 0, sizeof(backtrack_common));
4278 common->leavelabel = NULL;
4279 common->leave = NULL;
4280 while (1)
4282 common->acceptlabel = NULL;
4283 common->accept = NULL;
4284 altbacktrack.top = NULL;
4285 altbacktrack.topbacktracks = NULL;
4287 if (*ccbegin == OP_ALT)
4288 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4290 altbacktrack.cc = ccbegin;
4291 compile_trypath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
4292 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4294 common->leavelabel = save_leavelabel;
4295 common->acceptlabel = save_acceptlabel;
4296 common->leave = save_leave;
4297 common->accept = save_accept;
4298 return NULL;
4300 common->acceptlabel = LABEL();
4301 if (common->accept != NULL)
4302 set_jumps(common->accept, common->acceptlabel);
4304 /* Reset stack. */
4305 if (framesize < 0)
4306 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4307 else {
4308 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
4310 /* We don't need to keep the STR_PTR, only the previous localptr. */
4311 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
4313 else
4315 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4316 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4320 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
4322 /* We know that STR_PTR was stored on the top of the stack. */
4323 if (conditional)
4324 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4325 else if (bra == OP_BRAZERO)
4327 if (framesize < 0)
4328 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4329 else
4331 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4332 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w));
4333 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4335 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4336 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4338 else if (framesize >= 0)
4340 /* For OP_BRA and OP_BRAMINZERO. */
4341 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4344 add_jump(compiler, found, JUMP(SLJIT_JUMP));
4346 compile_backtrackpath(common, altbacktrack.top);
4347 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4349 common->leavelabel = save_leavelabel;
4350 common->acceptlabel = save_acceptlabel;
4351 common->leave = save_leave;
4352 common->accept = save_accept;
4353 return NULL;
4355 set_jumps(altbacktrack.topbacktracks, LABEL());
4357 if (*cc != OP_ALT)
4358 break;
4360 ccbegin = cc;
4361 cc += GET(cc, 1);
4363 /* None of them matched. */
4364 if (common->leave != NULL)
4365 set_jumps(common->leave, LABEL());
4367 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
4369 /* Assert is failed. */
4370 if (conditional || bra == OP_BRAZERO)
4371 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4373 if (framesize < 0)
4375 /* The topmost item should be 0. */
4376 if (bra == OP_BRAZERO)
4377 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4378 else
4379 free_stack(common, 1);
4381 else
4383 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4384 /* The topmost item should be 0. */
4385 if (bra == OP_BRAZERO)
4387 free_stack(common, framesize + 1);
4388 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4390 else
4391 free_stack(common, framesize + 2);
4392 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4394 jump = JUMP(SLJIT_JUMP);
4395 if (bra != OP_BRAZERO)
4396 add_jump(compiler, target, jump);
4398 /* Assert is successful. */
4399 set_jumps(tmp, LABEL());
4400 if (framesize < 0)
4402 /* We know that STR_PTR was stored on the top of the stack. */
4403 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4404 /* Keep the STR_PTR on the top of the stack. */
4405 if (bra == OP_BRAZERO)
4406 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4407 else if (bra == OP_BRAMINZERO)
4409 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4410 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4413 else
4415 if (bra == OP_BRA)
4417 /* We don't need to keep the STR_PTR, only the previous localptr. */
4418 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
4419 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4421 else
4423 /* We don't need to keep the STR_PTR, only the previous localptr. */
4424 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w));
4425 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4426 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
4430 if (bra == OP_BRAZERO)
4432 backtrack->trypath = LABEL();
4433 sljit_set_label(jump, backtrack->trypath);
4435 else if (bra == OP_BRAMINZERO)
4437 JUMPTO(SLJIT_JUMP, backtrack->trypath);
4438 JUMPHERE(brajump);
4439 if (framesize >= 0)
4441 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4442 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4443 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4445 set_jumps(backtrack->common.topbacktracks, LABEL());
4448 else
4450 /* AssertNot is successful. */
4451 if (framesize < 0)
4453 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4454 if (bra != OP_BRA)
4455 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4456 else
4457 free_stack(common, 1);
4459 else
4461 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4462 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4463 /* The topmost item should be 0. */
4464 if (bra != OP_BRA)
4466 free_stack(common, framesize + 1);
4467 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4469 else
4470 free_stack(common, framesize + 2);
4471 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4474 if (bra == OP_BRAZERO)
4475 backtrack->trypath = LABEL();
4476 else if (bra == OP_BRAMINZERO)
4478 JUMPTO(SLJIT_JUMP, backtrack->trypath);
4479 JUMPHERE(brajump);
4482 if (bra != OP_BRA)
4484 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
4485 set_jumps(backtrack->common.topbacktracks, LABEL());
4486 backtrack->common.topbacktracks = NULL;
4490 common->leavelabel = save_leavelabel;
4491 common->acceptlabel = save_acceptlabel;
4492 common->leave = save_leave;
4493 common->accept = save_accept;
4494 return cc + 1 + LINK_SIZE;
4497 static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table)
4499 int condition = FALSE;
4500 pcre_uchar *slotA = name_table;
4501 pcre_uchar *slotB;
4502 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
4503 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
4504 sljit_w no_capture;
4505 int i;
4507 locals += refno & 0xff;
4508 refno >>= 8;
4509 no_capture = locals[1];
4511 for (i = 0; i < name_count; i++)
4513 if (GET2(slotA, 0) == refno) break;
4514 slotA += name_entry_size;
4517 if (i < name_count)
4519 /* Found a name for the number - there can be only one; duplicate names
4520 for different numbers are allowed, but not vice versa. First scan down
4521 for duplicates. */
4523 slotB = slotA;
4524 while (slotB > name_table)
4526 slotB -= name_entry_size;
4527 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4529 condition = locals[GET2(slotB, 0) << 1] != no_capture;
4530 if (condition) break;
4532 else break;
4535 /* Scan up for duplicates */
4536 if (!condition)
4538 slotB = slotA;
4539 for (i++; i < name_count; i++)
4541 slotB += name_entry_size;
4542 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4544 condition = locals[GET2(slotB, 0) << 1] != no_capture;
4545 if (condition) break;
4547 else break;
4551 return condition;
4554 static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table)
4556 int condition = FALSE;
4557 pcre_uchar *slotA = name_table;
4558 pcre_uchar *slotB;
4559 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
4560 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
4561 sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)];
4562 int i;
4564 for (i = 0; i < name_count; i++)
4566 if (GET2(slotA, 0) == recno) break;
4567 slotA += name_entry_size;
4570 if (i < name_count)
4572 /* Found a name for the number - there can be only one; duplicate
4573 names for different numbers are allowed, but not vice versa. First
4574 scan down for duplicates. */
4576 slotB = slotA;
4577 while (slotB > name_table)
4579 slotB -= name_entry_size;
4580 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4582 condition = GET2(slotB, 0) == group_num;
4583 if (condition) break;
4585 else break;
4588 /* Scan up for duplicates */
4589 if (!condition)
4591 slotB = slotA;
4592 for (i++; i < name_count; i++)
4594 slotB += name_entry_size;
4595 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4597 condition = GET2(slotB, 0) == group_num;
4598 if (condition) break;
4600 else break;
4604 return condition;
4608 Handling bracketed expressions is probably the most complex part.
4610 Stack layout naming characters:
4611 S - Push the current STR_PTR
4612 0 - Push a 0 (NULL)
4613 A - Push the current STR_PTR. Needed for restoring the STR_PTR
4614 before the next alternative. Not pushed if there are no alternatives.
4615 M - Any values pushed by the current alternative. Can be empty, or anything.
4616 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
4617 L - Push the previous local (pointed by localptr) to the stack
4618 () - opional values stored on the stack
4619 ()* - optonal, can be stored multiple times
4621 The following list shows the regular expression templates, their PCRE byte codes
4622 and stack layout supported by pcre-sljit.
4624 (?:) OP_BRA | OP_KET A M
4625 () OP_CBRA | OP_KET C M
4626 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
4627 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
4628 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
4629 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
4630 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
4631 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
4632 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
4633 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
4634 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
4635 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
4636 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
4637 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
4638 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
4639 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
4640 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
4641 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
4642 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
4643 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
4644 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
4645 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
4648 Stack layout naming characters:
4649 A - Push the alternative index (starting from 0) on the stack.
4650 Not pushed if there is no alternatives.
4651 M - Any values pushed by the current alternative. Can be empty, or anything.
4653 The next list shows the possible content of a bracket:
4654 (|) OP_*BRA | OP_ALT ... M A
4655 (?()|) OP_*COND | OP_ALT M A
4656 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
4657 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
4658 Or nothing, if trace is unnecessary
4661 static pcre_uchar *compile_bracket_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4663 DEFINE_COMPILER;
4664 backtrack_common *backtrack;
4665 pcre_uchar opcode;
4666 int localptr = 0;
4667 int offset = 0;
4668 int stacksize;
4669 pcre_uchar *ccbegin;
4670 pcre_uchar *trypath;
4671 pcre_uchar bra = OP_BRA;
4672 pcre_uchar ket;
4673 assert_backtrack *assert;
4674 BOOL has_alternatives;
4675 struct sljit_jump *jump;
4676 struct sljit_jump *skip;
4677 struct sljit_label *rmaxlabel = NULL;
4678 struct sljit_jump *braminzerojump = NULL;
4680 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
4682 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4684 bra = *cc;
4685 cc++;
4686 opcode = *cc;
4689 opcode = *cc;
4690 ccbegin = cc;
4691 trypath = ccbegin + 1 + LINK_SIZE;
4693 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
4695 /* Drop this bracket_backtrack. */
4696 parent->top = backtrack->prev;
4697 return bracketend(cc);
4700 ket = *(bracketend(cc) - 1 - LINK_SIZE);
4701 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
4702 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
4703 cc += GET(cc, 1);
4705 has_alternatives = *cc == OP_ALT;
4706 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
4708 has_alternatives = (*trypath == OP_RREF) ? FALSE : TRUE;
4709 if (*trypath == OP_NRREF)
4711 stacksize = GET2(trypath, 1);
4712 if (common->currententry == NULL || stacksize == RREF_ANY)
4713 has_alternatives = FALSE;
4714 else if (common->currententry->start == 0)
4715 has_alternatives = stacksize != 0;
4716 else
4717 has_alternatives = stacksize != GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4721 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
4722 opcode = OP_SCOND;
4723 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
4724 opcode = OP_ONCE;
4726 if (opcode == OP_CBRA || opcode == OP_SCBRA)
4728 /* Capturing brackets has a pre-allocated space. */
4729 offset = GET2(ccbegin, 1 + LINK_SIZE);
4730 localptr = OVECTOR_PRIV(offset);
4731 offset <<= 1;
4732 BACKTRACK_AS(bracket_backtrack)->localptr = localptr;
4733 trypath += IMM2_SIZE;
4735 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
4737 /* Other brackets simply allocate the next entry. */
4738 localptr = PRIV_DATA(ccbegin);
4739 SLJIT_ASSERT(localptr != 0);
4740 BACKTRACK_AS(bracket_backtrack)->localptr = localptr;
4741 if (opcode == OP_ONCE)
4742 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
4745 /* Instructions before the first alternative. */
4746 stacksize = 0;
4747 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4748 stacksize++;
4749 if (bra == OP_BRAZERO)
4750 stacksize++;
4752 if (stacksize > 0)
4753 allocate_stack(common, stacksize);
4755 stacksize = 0;
4756 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4758 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4759 stacksize++;
4762 if (bra == OP_BRAZERO)
4763 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
4765 if (bra == OP_BRAMINZERO)
4767 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
4768 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4769 if (ket != OP_KETRMIN)
4771 free_stack(common, 1);
4772 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4774 else
4776 if (opcode == OP_ONCE || opcode >= OP_SBRA)
4778 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4779 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4780 /* Nothing stored during the first run. */
4781 skip = JUMP(SLJIT_JUMP);
4782 JUMPHERE(jump);
4783 /* Checking zero-length iteration. */
4784 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
4786 /* When we come from outside, localptr contains the previous STR_PTR. */
4787 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4789 else
4791 /* Except when the whole stack frame must be saved. */
4792 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4793 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_w));
4795 JUMPHERE(skip);
4797 else
4799 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4800 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4801 JUMPHERE(jump);
4806 if (ket == OP_KETRMIN)
4807 BACKTRACK_AS(bracket_backtrack)->recursivetrypath = LABEL();
4809 if (ket == OP_KETRMAX)
4811 rmaxlabel = LABEL();
4812 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
4813 BACKTRACK_AS(bracket_backtrack)->alttrypath = rmaxlabel;
4816 /* Handling capturing brackets and alternatives. */
4817 if (opcode == OP_ONCE)
4819 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
4821 /* Neither capturing brackets nor recursions are not found in the block. */
4822 if (ket == OP_KETRMIN)
4824 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4825 allocate_stack(common, 2);
4826 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4827 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4828 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4830 else if (ket == OP_KETRMAX || has_alternatives)
4832 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4833 allocate_stack(common, 1);
4834 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4836 else
4837 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4839 else
4841 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
4843 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
4844 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4845 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
4846 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4847 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4848 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4849 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
4851 else
4853 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
4854 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4855 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
4856 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4857 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4858 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
4862 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
4864 /* Saving the previous values. */
4865 allocate_stack(common, 3);
4866 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4867 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4868 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4869 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4870 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4871 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4872 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
4874 else if (opcode == OP_SBRA || opcode == OP_SCOND)
4876 /* Saving the previous value. */
4877 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4878 allocate_stack(common, 1);
4879 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4880 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4882 else if (has_alternatives)
4884 /* Pushing the starting string pointer. */
4885 allocate_stack(common, 1);
4886 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4889 /* Generating code for the first alternative. */
4890 if (opcode == OP_COND || opcode == OP_SCOND)
4892 if (*trypath == OP_CREF)
4894 SLJIT_ASSERT(has_alternatives);
4895 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
4896 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(trypath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4897 trypath += 1 + IMM2_SIZE;
4899 else if (*trypath == OP_NCREF)
4901 SLJIT_ASSERT(has_alternatives);
4902 stacksize = GET2(trypath, 1);
4903 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4905 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4906 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4907 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4908 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_w)));
4909 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
4910 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4911 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
4912 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4913 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4915 JUMPHERE(jump);
4916 trypath += 1 + IMM2_SIZE;
4918 else if (*trypath == OP_RREF || *trypath == OP_NRREF)
4920 /* Never has other case. */
4921 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
4923 stacksize = GET2(trypath, 1);
4924 if (common->currententry == NULL)
4925 stacksize = 0;
4926 else if (stacksize == RREF_ANY)
4927 stacksize = 1;
4928 else if (common->currententry->start == 0)
4929 stacksize = stacksize == 0;
4930 else
4931 stacksize = stacksize == GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4933 if (*trypath == OP_RREF || stacksize || common->currententry == NULL)
4935 SLJIT_ASSERT(!has_alternatives);
4936 if (stacksize != 0)
4937 trypath += 1 + IMM2_SIZE;
4938 else
4940 if (*cc == OP_ALT)
4942 trypath = cc + 1 + LINK_SIZE;
4943 cc += GET(cc, 1);
4945 else
4946 trypath = cc;
4949 else
4951 SLJIT_ASSERT(has_alternatives);
4953 stacksize = GET2(trypath, 1);
4954 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4955 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4956 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4957 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
4958 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
4959 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
4960 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4961 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
4962 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4963 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4964 trypath += 1 + IMM2_SIZE;
4967 else
4969 SLJIT_ASSERT(has_alternatives && *trypath >= OP_ASSERT && *trypath <= OP_ASSERTBACK_NOT);
4970 /* Similar code as PUSH_BACKTRACK macro. */
4971 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
4972 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4973 return NULL;
4974 memset(assert, 0, sizeof(assert_backtrack));
4975 assert->common.cc = trypath;
4976 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
4977 trypath = compile_assert_trypath(common, trypath, assert, TRUE);
4981 compile_trypath(common, trypath, cc, backtrack);
4982 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4983 return NULL;
4985 if (opcode == OP_ONCE)
4987 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
4989 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4990 /* TMP2 which is set here used by OP_KETRMAX below. */
4991 if (ket == OP_KETRMAX)
4992 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
4993 else if (ket == OP_KETRMIN)
4995 /* Move the STR_PTR to the localptr. */
4996 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
4999 else
5001 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
5002 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_w));
5003 if (ket == OP_KETRMAX)
5005 /* TMP2 which is set here used by OP_KETRMAX below. */
5006 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5011 stacksize = 0;
5012 if (ket != OP_KET || bra != OP_BRA)
5013 stacksize++;
5014 if (has_alternatives && opcode != OP_ONCE)
5015 stacksize++;
5017 if (stacksize > 0)
5018 allocate_stack(common, stacksize);
5020 stacksize = 0;
5021 if (ket != OP_KET)
5023 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5024 stacksize++;
5026 else if (bra != OP_BRA)
5028 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5029 stacksize++;
5032 if (has_alternatives)
5034 if (opcode != OP_ONCE)
5035 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5036 if (ket != OP_KETRMAX)
5037 BACKTRACK_AS(bracket_backtrack)->alttrypath = LABEL();
5040 /* Must be after the trypath label. */
5041 if (offset != 0)
5043 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5044 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5045 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
5048 if (ket == OP_KETRMAX)
5050 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5052 if (has_alternatives)
5053 BACKTRACK_AS(bracket_backtrack)->alttrypath = LABEL();
5054 /* Checking zero-length iteration. */
5055 if (opcode != OP_ONCE)
5057 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0, rmaxlabel);
5058 /* Drop STR_PTR for greedy plus quantifier. */
5059 if (bra != OP_BRAZERO)
5060 free_stack(common, 1);
5062 else
5063 /* TMP2 must contain the starting STR_PTR. */
5064 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
5066 else
5067 JUMPTO(SLJIT_JUMP, rmaxlabel);
5068 BACKTRACK_AS(bracket_backtrack)->recursivetrypath = LABEL();
5071 if (bra == OP_BRAZERO)
5072 BACKTRACK_AS(bracket_backtrack)->zerotrypath = LABEL();
5074 if (bra == OP_BRAMINZERO)
5076 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
5077 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->trypath);
5078 if (braminzerojump != NULL)
5080 JUMPHERE(braminzerojump);
5081 /* We need to release the end pointer to perform the
5082 backtrack for the zero-length iteration. When
5083 framesize is < 0, OP_ONCE will do the release itself. */
5084 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
5086 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5087 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5089 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
5090 free_stack(common, 1);
5092 /* Continue to the normal backtrack. */
5095 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
5096 decrease_call_count(common);
5098 /* Skip the other alternatives. */
5099 while (*cc == OP_ALT)
5100 cc += GET(cc, 1);
5101 cc += 1 + LINK_SIZE;
5102 return cc;
5105 static pcre_uchar *compile_bracketpos_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5107 DEFINE_COMPILER;
5108 backtrack_common *backtrack;
5109 pcre_uchar opcode;
5110 int localptr;
5111 int cbraprivptr = 0;
5112 int framesize;
5113 int stacksize;
5114 int offset = 0;
5115 BOOL zero = FALSE;
5116 pcre_uchar *ccbegin = NULL;
5117 int stack;
5118 struct sljit_label *loop = NULL;
5119 struct jump_list *emptymatch = NULL;
5121 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
5122 if (*cc == OP_BRAPOSZERO)
5124 zero = TRUE;
5125 cc++;
5128 opcode = *cc;
5129 localptr = PRIV_DATA(cc);
5130 SLJIT_ASSERT(localptr != 0);
5131 BACKTRACK_AS(bracketpos_backtrack)->localptr = localptr;
5132 switch(opcode)
5134 case OP_BRAPOS:
5135 case OP_SBRAPOS:
5136 ccbegin = cc + 1 + LINK_SIZE;
5137 break;
5139 case OP_CBRAPOS:
5140 case OP_SCBRAPOS:
5141 offset = GET2(cc, 1 + LINK_SIZE);
5142 cbraprivptr = OVECTOR_PRIV(offset);
5143 offset <<= 1;
5144 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
5145 break;
5147 default:
5148 SLJIT_ASSERT_STOP();
5149 break;
5152 framesize = get_framesize(common, cc, FALSE);
5153 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
5154 if (framesize < 0)
5156 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
5157 if (!zero)
5158 stacksize++;
5159 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
5160 allocate_stack(common, stacksize);
5161 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
5163 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5165 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5166 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5167 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5168 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5170 else
5171 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5173 if (!zero)
5174 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
5176 else
5178 stacksize = framesize + 1;
5179 if (!zero)
5180 stacksize++;
5181 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5182 stacksize++;
5183 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
5184 allocate_stack(common, stacksize);
5186 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5187 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
5188 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
5189 stack = 0;
5190 if (!zero)
5192 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
5193 stack++;
5195 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5197 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
5198 stack++;
5200 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
5201 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
5204 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5205 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5207 loop = LABEL();
5208 while (*cc != OP_KETRPOS)
5210 backtrack->top = NULL;
5211 backtrack->topbacktracks = NULL;
5212 cc += GET(cc, 1);
5214 compile_trypath(common, ccbegin, cc, backtrack);
5215 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5216 return NULL;
5218 if (framesize < 0)
5220 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5222 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5224 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5225 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5226 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5227 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5229 else
5231 if (opcode == OP_SBRAPOS)
5232 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5233 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5236 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5237 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5239 if (!zero)
5240 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5242 else
5244 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5246 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, stacksize * sizeof(sljit_w));
5247 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5248 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5249 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5250 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5252 else
5254 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5255 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_w));
5256 if (opcode == OP_SBRAPOS)
5257 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
5258 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w), STR_PTR, 0);
5261 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5262 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5264 if (!zero)
5266 if (framesize < 0)
5267 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5268 else
5269 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5272 JUMPTO(SLJIT_JUMP, loop);
5273 flush_stubs(common);
5275 compile_backtrackpath(common, backtrack->top);
5276 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5277 return NULL;
5278 set_jumps(backtrack->topbacktracks, LABEL());
5280 if (framesize < 0)
5282 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5283 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5284 else
5285 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5287 else
5289 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5291 /* Last alternative. */
5292 if (*cc == OP_KETRPOS)
5293 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5294 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5296 else
5298 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5299 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
5303 if (*cc == OP_KETRPOS)
5304 break;
5305 ccbegin = cc + 1 + LINK_SIZE;
5308 backtrack->topbacktracks = NULL;
5309 if (!zero)
5311 if (framesize < 0)
5312 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
5313 else /* TMP2 is set to [localptr] above. */
5314 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0));
5317 /* None of them matched. */
5318 set_jumps(emptymatch, LABEL());
5319 decrease_call_count(common);
5320 return cc + 1 + LINK_SIZE;
5323 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
5325 int class_len;
5327 *opcode = *cc;
5328 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
5330 cc++;
5331 *type = OP_CHAR;
5333 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
5335 cc++;
5336 *type = OP_CHARI;
5337 *opcode -= OP_STARI - OP_STAR;
5339 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
5341 cc++;
5342 *type = OP_NOT;
5343 *opcode -= OP_NOTSTAR - OP_STAR;
5345 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
5347 cc++;
5348 *type = OP_NOTI;
5349 *opcode -= OP_NOTSTARI - OP_STAR;
5351 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
5353 cc++;
5354 *opcode -= OP_TYPESTAR - OP_STAR;
5355 *type = 0;
5357 else
5359 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
5360 *type = *opcode;
5361 cc++;
5362 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
5363 *opcode = cc[class_len - 1];
5364 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
5366 *opcode -= OP_CRSTAR - OP_STAR;
5367 if (end != NULL)
5368 *end = cc + class_len;
5370 else
5372 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
5373 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
5374 *arg2 = GET2(cc, class_len);
5376 if (*arg2 == 0)
5378 SLJIT_ASSERT(*arg1 != 0);
5379 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
5381 if (*arg1 == *arg2)
5382 *opcode = OP_EXACT;
5384 if (end != NULL)
5385 *end = cc + class_len + 2 * IMM2_SIZE;
5387 return cc;
5390 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
5392 *arg1 = GET2(cc, 0);
5393 cc += IMM2_SIZE;
5396 if (*type == 0)
5398 *type = *cc;
5399 if (end != NULL)
5400 *end = next_opcode(common, cc);
5401 cc++;
5402 return cc;
5405 if (end != NULL)
5407 *end = cc + 1;
5408 #ifdef SUPPORT_UTF
5409 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
5410 #endif
5412 return cc;
5415 static pcre_uchar *compile_iterator_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5417 DEFINE_COMPILER;
5418 backtrack_common *backtrack;
5419 pcre_uchar opcode;
5420 pcre_uchar type;
5421 int arg1 = -1, arg2 = -1;
5422 pcre_uchar* end;
5423 jump_list *nomatch = NULL;
5424 struct sljit_jump *jump = NULL;
5425 struct sljit_label *label;
5427 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5429 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
5431 switch(opcode)
5433 case OP_STAR:
5434 case OP_PLUS:
5435 case OP_UPTO:
5436 case OP_CRRANGE:
5437 if (type == OP_ANYNL || type == OP_EXTUNI)
5439 if (opcode == OP_STAR || opcode == OP_UPTO)
5441 allocate_stack(common, 2);
5442 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5443 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5445 else
5447 allocate_stack(common, 1);
5448 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5450 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
5451 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5453 label = LABEL();
5454 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
5455 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
5457 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5458 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5459 if (opcode == OP_CRRANGE && arg2 > 0)
5460 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
5461 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
5462 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
5463 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5466 allocate_stack(common, 1);
5467 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5468 JUMPTO(SLJIT_JUMP, label);
5469 if (jump != NULL)
5470 JUMPHERE(jump);
5472 else
5474 if (opcode == OP_PLUS)
5475 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
5476 allocate_stack(common, 2);
5477 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5478 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5479 label = LABEL();
5480 compile_char1_trypath(common, type, cc, &nomatch);
5481 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5482 if (opcode <= OP_PLUS || (opcode == OP_CRRANGE && arg1 == 0))
5484 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5485 JUMPTO(SLJIT_JUMP, label);
5487 else
5489 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5490 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5491 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5492 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5494 set_jumps(nomatch, LABEL());
5495 if (opcode == OP_CRRANGE)
5496 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, arg2 + 1));
5497 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5499 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
5500 break;
5502 case OP_MINSTAR:
5503 case OP_MINPLUS:
5504 if (opcode == OP_MINPLUS)
5505 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
5506 allocate_stack(common, 1);
5507 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5508 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
5509 break;
5511 case OP_MINUPTO:
5512 case OP_CRMINRANGE:
5513 allocate_stack(common, 2);
5514 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5515 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5516 if (opcode == OP_CRMINRANGE)
5517 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
5518 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
5519 break;
5521 case OP_QUERY:
5522 case OP_MINQUERY:
5523 allocate_stack(common, 1);
5524 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5525 if (opcode == OP_QUERY)
5526 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
5527 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
5528 break;
5530 case OP_EXACT:
5531 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
5532 label = LABEL();
5533 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
5534 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5535 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5536 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5537 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5538 break;
5540 case OP_POSSTAR:
5541 case OP_POSPLUS:
5542 case OP_POSUPTO:
5543 if (opcode != OP_POSSTAR)
5544 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
5545 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5546 label = LABEL();
5547 compile_char1_trypath(common, type, cc, &nomatch);
5548 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5549 if (opcode != OP_POSUPTO)
5551 if (opcode == OP_POSPLUS)
5552 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2);
5553 JUMPTO(SLJIT_JUMP, label);
5555 else
5557 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5558 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5559 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5560 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5562 set_jumps(nomatch, LABEL());
5563 if (opcode == OP_POSPLUS)
5564 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2));
5565 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5566 break;
5568 case OP_POSQUERY:
5569 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5570 compile_char1_trypath(common, type, cc, &nomatch);
5571 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5572 set_jumps(nomatch, LABEL());
5573 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5574 break;
5576 default:
5577 SLJIT_ASSERT_STOP();
5578 break;
5581 decrease_call_count(common);
5582 return end;
5585 static SLJIT_INLINE pcre_uchar *compile_fail_accept_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5587 DEFINE_COMPILER;
5588 backtrack_common *backtrack;
5590 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5592 if (*cc == OP_FAIL)
5594 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
5595 return cc + 1;
5598 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
5600 /* No need to check notempty conditions. */
5601 if (common->acceptlabel == NULL)
5602 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
5603 else
5604 JUMPTO(SLJIT_JUMP, common->acceptlabel);
5605 return cc + 1;
5608 if (common->acceptlabel == NULL)
5609 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
5610 else
5611 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
5612 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5613 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
5614 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5615 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
5616 if (common->acceptlabel == NULL)
5617 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5618 else
5619 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
5620 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5621 if (common->acceptlabel == NULL)
5622 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
5623 else
5624 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
5625 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
5626 return cc + 1;
5629 static SLJIT_INLINE pcre_uchar *compile_close_trypath(compiler_common *common, pcre_uchar *cc)
5631 DEFINE_COMPILER;
5632 int offset = GET2(cc, 1);
5634 /* Data will be discarded anyway... */
5635 if (common->currententry != NULL)
5636 return cc + 1 + IMM2_SIZE;
5638 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
5639 offset <<= 1;
5640 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5641 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5642 return cc + 1 + IMM2_SIZE;
5645 static void compile_trypath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
5647 DEFINE_COMPILER;
5648 backtrack_common *backtrack;
5650 while (cc < ccend)
5652 switch(*cc)
5654 case OP_SOD:
5655 case OP_SOM:
5656 case OP_NOT_WORD_BOUNDARY:
5657 case OP_WORD_BOUNDARY:
5658 case OP_NOT_DIGIT:
5659 case OP_DIGIT:
5660 case OP_NOT_WHITESPACE:
5661 case OP_WHITESPACE:
5662 case OP_NOT_WORDCHAR:
5663 case OP_WORDCHAR:
5664 case OP_ANY:
5665 case OP_ALLANY:
5666 case OP_ANYBYTE:
5667 case OP_NOTPROP:
5668 case OP_PROP:
5669 case OP_ANYNL:
5670 case OP_NOT_HSPACE:
5671 case OP_HSPACE:
5672 case OP_NOT_VSPACE:
5673 case OP_VSPACE:
5674 case OP_EXTUNI:
5675 case OP_EODN:
5676 case OP_EOD:
5677 case OP_CIRC:
5678 case OP_CIRCM:
5679 case OP_DOLL:
5680 case OP_DOLLM:
5681 case OP_NOT:
5682 case OP_NOTI:
5683 case OP_REVERSE:
5684 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
5685 break;
5687 case OP_SET_SOM:
5688 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
5689 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5690 allocate_stack(common, 1);
5691 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
5692 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5693 cc++;
5694 break;
5696 case OP_CHAR:
5697 case OP_CHARI:
5698 if (common->mode == JIT_COMPILE)
5699 cc = compile_charn_trypath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
5700 else
5701 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
5702 break;
5704 case OP_STAR:
5705 case OP_MINSTAR:
5706 case OP_PLUS:
5707 case OP_MINPLUS:
5708 case OP_QUERY:
5709 case OP_MINQUERY:
5710 case OP_UPTO:
5711 case OP_MINUPTO:
5712 case OP_EXACT:
5713 case OP_POSSTAR:
5714 case OP_POSPLUS:
5715 case OP_POSQUERY:
5716 case OP_POSUPTO:
5717 case OP_STARI:
5718 case OP_MINSTARI:
5719 case OP_PLUSI:
5720 case OP_MINPLUSI:
5721 case OP_QUERYI:
5722 case OP_MINQUERYI:
5723 case OP_UPTOI:
5724 case OP_MINUPTOI:
5725 case OP_EXACTI:
5726 case OP_POSSTARI:
5727 case OP_POSPLUSI:
5728 case OP_POSQUERYI:
5729 case OP_POSUPTOI:
5730 case OP_NOTSTAR:
5731 case OP_NOTMINSTAR:
5732 case OP_NOTPLUS:
5733 case OP_NOTMINPLUS:
5734 case OP_NOTQUERY:
5735 case OP_NOTMINQUERY:
5736 case OP_NOTUPTO:
5737 case OP_NOTMINUPTO:
5738 case OP_NOTEXACT:
5739 case OP_NOTPOSSTAR:
5740 case OP_NOTPOSPLUS:
5741 case OP_NOTPOSQUERY:
5742 case OP_NOTPOSUPTO:
5743 case OP_NOTSTARI:
5744 case OP_NOTMINSTARI:
5745 case OP_NOTPLUSI:
5746 case OP_NOTMINPLUSI:
5747 case OP_NOTQUERYI:
5748 case OP_NOTMINQUERYI:
5749 case OP_NOTUPTOI:
5750 case OP_NOTMINUPTOI:
5751 case OP_NOTEXACTI:
5752 case OP_NOTPOSSTARI:
5753 case OP_NOTPOSPLUSI:
5754 case OP_NOTPOSQUERYI:
5755 case OP_NOTPOSUPTOI:
5756 case OP_TYPESTAR:
5757 case OP_TYPEMINSTAR:
5758 case OP_TYPEPLUS:
5759 case OP_TYPEMINPLUS:
5760 case OP_TYPEQUERY:
5761 case OP_TYPEMINQUERY:
5762 case OP_TYPEUPTO:
5763 case OP_TYPEMINUPTO:
5764 case OP_TYPEEXACT:
5765 case OP_TYPEPOSSTAR:
5766 case OP_TYPEPOSPLUS:
5767 case OP_TYPEPOSQUERY:
5768 case OP_TYPEPOSUPTO:
5769 cc = compile_iterator_trypath(common, cc, parent);
5770 break;
5772 case OP_CLASS:
5773 case OP_NCLASS:
5774 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
5775 cc = compile_iterator_trypath(common, cc, parent);
5776 else
5777 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
5778 break;
5780 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
5781 case OP_XCLASS:
5782 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
5783 cc = compile_iterator_trypath(common, cc, parent);
5784 else
5785 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
5786 break;
5787 #endif
5789 case OP_REF:
5790 case OP_REFI:
5791 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
5792 cc = compile_ref_iterator_trypath(common, cc, parent);
5793 else
5794 cc = compile_ref_trypath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
5795 break;
5797 case OP_RECURSE:
5798 cc = compile_recurse_trypath(common, cc, parent);
5799 break;
5801 case OP_ASSERT:
5802 case OP_ASSERT_NOT:
5803 case OP_ASSERTBACK:
5804 case OP_ASSERTBACK_NOT:
5805 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
5806 cc = compile_assert_trypath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
5807 break;
5809 case OP_BRAMINZERO:
5810 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
5811 cc = bracketend(cc + 1);
5812 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
5814 allocate_stack(common, 1);
5815 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5817 else
5819 allocate_stack(common, 2);
5820 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5821 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
5823 BACKTRACK_AS(braminzero_backtrack)->trypath = LABEL();
5824 if (cc[1] > OP_ASSERTBACK_NOT)
5825 decrease_call_count(common);
5826 break;
5828 case OP_ONCE:
5829 case OP_ONCE_NC:
5830 case OP_BRA:
5831 case OP_CBRA:
5832 case OP_COND:
5833 case OP_SBRA:
5834 case OP_SCBRA:
5835 case OP_SCOND:
5836 cc = compile_bracket_trypath(common, cc, parent);
5837 break;
5839 case OP_BRAZERO:
5840 if (cc[1] > OP_ASSERTBACK_NOT)
5841 cc = compile_bracket_trypath(common, cc, parent);
5842 else
5844 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
5845 cc = compile_assert_trypath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
5847 break;
5849 case OP_BRAPOS:
5850 case OP_CBRAPOS:
5851 case OP_SBRAPOS:
5852 case OP_SCBRAPOS:
5853 case OP_BRAPOSZERO:
5854 cc = compile_bracketpos_trypath(common, cc, parent);
5855 break;
5857 case OP_MARK:
5858 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
5859 SLJIT_ASSERT(common->mark_ptr != 0);
5860 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5861 allocate_stack(common, 1);
5862 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5863 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5864 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)(cc + 2));
5865 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
5866 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
5867 cc += 1 + 2 + cc[1];
5868 break;
5870 case OP_COMMIT:
5871 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
5872 cc += 1;
5873 break;
5875 case OP_FAIL:
5876 case OP_ACCEPT:
5877 case OP_ASSERT_ACCEPT:
5878 cc = compile_fail_accept_trypath(common, cc, parent);
5879 break;
5881 case OP_CLOSE:
5882 cc = compile_close_trypath(common, cc);
5883 break;
5885 case OP_SKIPZERO:
5886 cc = bracketend(cc + 1);
5887 break;
5889 default:
5890 SLJIT_ASSERT_STOP();
5891 return;
5893 if (cc == NULL)
5894 return;
5896 SLJIT_ASSERT(cc == ccend);
5899 #undef PUSH_BACKTRACK
5900 #undef PUSH_BACKTRACK_NOVALUE
5901 #undef BACKTRACK_AS
5903 #define COMPILE_BACKTRACKPATH(current) \
5904 do \
5906 compile_backtrackpath(common, (current)); \
5907 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5908 return; \
5910 while (0)
5912 #define CURRENT_AS(type) ((type *)current)
5914 static void compile_iterator_backtrackpath(compiler_common *common, struct backtrack_common *current)
5916 DEFINE_COMPILER;
5917 pcre_uchar *cc = current->cc;
5918 pcre_uchar opcode;
5919 pcre_uchar type;
5920 int arg1 = -1, arg2 = -1;
5921 struct sljit_label *label = NULL;
5922 struct sljit_jump *jump = NULL;
5923 jump_list *jumplist = NULL;
5925 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);
5927 switch(opcode)
5929 case OP_STAR:
5930 case OP_PLUS:
5931 case OP_UPTO:
5932 case OP_CRRANGE:
5933 if (type == OP_ANYNL || type == OP_EXTUNI)
5935 set_jumps(current->topbacktracks, LABEL());
5936 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5937 free_stack(common, 1);
5938 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
5940 else
5942 if (opcode <= OP_PLUS || opcode == OP_UPTO)
5943 arg2 = 0;
5944 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5945 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, arg2 + 1);
5946 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, SLJIT_IMM, 1);
5947 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5948 skip_char_back(common);
5949 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5950 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
5951 if (opcode == OP_CRRANGE)
5952 set_jumps(current->topbacktracks, LABEL());
5953 JUMPHERE(jump);
5954 free_stack(common, 2);
5955 if (opcode == OP_PLUS)
5956 set_jumps(current->topbacktracks, LABEL());
5958 break;
5960 case OP_MINSTAR:
5961 case OP_MINPLUS:
5962 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5963 compile_char1_trypath(common, type, cc, &jumplist);
5964 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5965 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
5966 set_jumps(jumplist, LABEL());
5967 free_stack(common, 1);
5968 if (opcode == OP_MINPLUS)
5969 set_jumps(current->topbacktracks, LABEL());
5970 break;
5972 case OP_MINUPTO:
5973 case OP_CRMINRANGE:
5974 if (opcode == OP_CRMINRANGE)
5976 label = LABEL();
5977 set_jumps(current->topbacktracks, label);
5979 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5980 compile_char1_trypath(common, type, cc, &jumplist);
5982 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5983 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5984 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5985 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5987 if (opcode == OP_CRMINRANGE)
5988 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label);
5990 if (opcode == OP_CRMINRANGE && arg1 == 0)
5991 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
5992 else
5993 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_backtrack)->trypath);
5995 set_jumps(jumplist, LABEL());
5996 free_stack(common, 2);
5997 break;
5999 case OP_QUERY:
6000 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6001 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6002 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
6003 jump = JUMP(SLJIT_JUMP);
6004 set_jumps(current->topbacktracks, LABEL());
6005 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6006 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6007 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6008 JUMPHERE(jump);
6009 free_stack(common, 1);
6010 break;
6012 case OP_MINQUERY:
6013 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6014 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6015 jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6016 compile_char1_trypath(common, type, cc, &jumplist);
6017 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6018 set_jumps(jumplist, LABEL());
6019 JUMPHERE(jump);
6020 free_stack(common, 1);
6021 break;
6023 case OP_EXACT:
6024 case OP_POSPLUS:
6025 set_jumps(current->topbacktracks, LABEL());
6026 break;
6028 case OP_POSSTAR:
6029 case OP_POSQUERY:
6030 case OP_POSUPTO:
6031 break;
6033 default:
6034 SLJIT_ASSERT_STOP();
6035 break;
6039 static void compile_ref_iterator_backtrackpath(compiler_common *common, struct backtrack_common *current)
6041 DEFINE_COMPILER;
6042 pcre_uchar *cc = current->cc;
6043 pcre_uchar type;
6045 type = cc[1 + IMM2_SIZE];
6046 if ((type & 0x1) == 0)
6048 set_jumps(current->topbacktracks, LABEL());
6049 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6050 free_stack(common, 1);
6051 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
6052 return;
6055 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6056 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
6057 set_jumps(current->topbacktracks, LABEL());
6058 free_stack(common, 2);
6061 static void compile_recurse_backtrackpath(compiler_common *common, struct backtrack_common *current)
6063 DEFINE_COMPILER;
6065 set_jumps(current->topbacktracks, LABEL());
6067 if (common->has_set_som && common->mark_ptr != 0)
6069 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6070 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6071 free_stack(common, 2);
6072 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
6073 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0);
6075 else if (common->has_set_som || common->mark_ptr != 0)
6077 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6078 free_stack(common, 1);
6079 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
6083 static void compile_assert_backtrackpath(compiler_common *common, struct backtrack_common *current)
6085 DEFINE_COMPILER;
6086 pcre_uchar *cc = current->cc;
6087 pcre_uchar bra = OP_BRA;
6088 struct sljit_jump *brajump = NULL;
6090 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
6091 if (*cc == OP_BRAZERO)
6093 bra = *cc;
6094 cc++;
6097 if (bra == OP_BRAZERO)
6099 SLJIT_ASSERT(current->topbacktracks == NULL);
6100 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6103 if (CURRENT_AS(assert_backtrack)->framesize < 0)
6105 set_jumps(current->topbacktracks, LABEL());
6107 if (bra == OP_BRAZERO)
6109 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6110 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->trypath);
6111 free_stack(common, 1);
6113 return;
6116 if (bra == OP_BRAZERO)
6118 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
6120 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6121 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->trypath);
6122 free_stack(common, 1);
6123 return;
6125 free_stack(common, 1);
6126 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6129 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
6131 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->localptr);
6132 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6133 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_w));
6135 set_jumps(current->topbacktracks, LABEL());
6137 else
6138 set_jumps(current->topbacktracks, LABEL());
6140 if (bra == OP_BRAZERO)
6142 /* We know there is enough place on the stack. */
6143 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
6144 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6145 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->trypath);
6146 JUMPHERE(brajump);
6150 static void compile_bracket_backtrackpath(compiler_common *common, struct backtrack_common *current)
6152 DEFINE_COMPILER;
6153 int opcode;
6154 int offset = 0;
6155 int localptr = CURRENT_AS(bracket_backtrack)->localptr;
6156 int stacksize;
6157 int count;
6158 pcre_uchar *cc = current->cc;
6159 pcre_uchar *ccbegin;
6160 pcre_uchar *ccprev;
6161 jump_list *jumplist = NULL;
6162 jump_list *jumplistitem = NULL;
6163 pcre_uchar bra = OP_BRA;
6164 pcre_uchar ket;
6165 assert_backtrack *assert;
6166 BOOL has_alternatives;
6167 struct sljit_jump *brazero = NULL;
6168 struct sljit_jump *once = NULL;
6169 struct sljit_jump *cond = NULL;
6170 struct sljit_label *rminlabel = NULL;
6172 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6174 bra = *cc;
6175 cc++;
6178 opcode = *cc;
6179 ccbegin = cc;
6180 ket = *(bracketend(ccbegin) - 1 - LINK_SIZE);
6181 cc += GET(cc, 1);
6182 has_alternatives = *cc == OP_ALT;
6183 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6184 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
6185 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6186 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
6187 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6188 opcode = OP_SCOND;
6189 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6190 opcode = OP_ONCE;
6192 if (ket == OP_KETRMAX)
6194 if (bra == OP_BRAZERO)
6196 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6197 free_stack(common, 1);
6198 brazero = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0);
6201 else if (ket == OP_KETRMIN)
6203 if (bra != OP_BRAMINZERO)
6205 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6206 if (opcode >= OP_SBRA || opcode == OP_ONCE)
6208 /* Checking zero-length iteration. */
6209 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
6210 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, CURRENT_AS(bracket_backtrack)->recursivetrypath);
6211 else
6213 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6214 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_w), CURRENT_AS(bracket_backtrack)->recursivetrypath);
6216 if (opcode != OP_ONCE)
6217 free_stack(common, 1);
6219 else
6220 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursivetrypath);
6222 rminlabel = LABEL();
6224 else if (bra == OP_BRAZERO)
6226 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6227 free_stack(common, 1);
6228 brazero = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
6231 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
6233 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
6235 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6236 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6238 once = JUMP(SLJIT_JUMP);
6240 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6242 if (has_alternatives)
6244 /* Always exactly one alternative. */
6245 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6246 free_stack(common, 1);
6248 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
6249 if (SLJIT_UNLIKELY(!jumplistitem))
6250 return;
6251 jumplist = jumplistitem;
6252 jumplistitem->next = NULL;
6253 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 1);
6256 else if (*cc == OP_ALT)
6258 /* Build a jump list. Get the last successfully matched branch index. */
6259 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6260 free_stack(common, 1);
6261 count = 1;
6264 /* Append as the last item. */
6265 if (jumplist != NULL)
6267 jumplistitem->next = sljit_alloc_memory(compiler, sizeof(jump_list));
6268 jumplistitem = jumplistitem->next;
6270 else
6272 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
6273 jumplist = jumplistitem;
6276 if (SLJIT_UNLIKELY(!jumplistitem))
6277 return;
6279 jumplistitem->next = NULL;
6280 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, count++);
6281 cc += GET(cc, 1);
6283 while (*cc == OP_ALT);
6285 cc = ccbegin + GET(ccbegin, 1);
6288 COMPILE_BACKTRACKPATH(current->top);
6289 if (current->topbacktracks)
6290 set_jumps(current->topbacktracks, LABEL());
6292 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6294 /* Conditional block always has at most one alternative. */
6295 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
6297 SLJIT_ASSERT(has_alternatives);
6298 assert = CURRENT_AS(bracket_backtrack)->u.assert;
6299 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
6301 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
6302 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6303 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
6305 cond = JUMP(SLJIT_JUMP);
6306 set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
6308 else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
6310 SLJIT_ASSERT(has_alternatives);
6311 cond = JUMP(SLJIT_JUMP);
6312 set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
6314 else
6315 SLJIT_ASSERT(!has_alternatives);
6318 if (has_alternatives)
6320 count = 1;
6323 current->top = NULL;
6324 current->topbacktracks = NULL;
6325 current->nextbacktracks = NULL;
6326 if (*cc == OP_ALT)
6328 ccprev = cc + 1 + LINK_SIZE;
6329 cc += GET(cc, 1);
6330 if (opcode != OP_COND && opcode != OP_SCOND)
6332 if (localptr != 0 && opcode != OP_ONCE)
6333 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6334 else
6335 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6337 compile_trypath(common, ccprev, cc, current);
6338 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6339 return;
6342 /* Instructions after the current alternative is succesfully matched. */
6343 /* There is a similar code in compile_bracket_trypath. */
6344 if (opcode == OP_ONCE)
6346 if (CURRENT_AS(bracket_backtrack)->u.framesize < 0)
6348 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6349 /* TMP2 which is set here used by OP_KETRMAX below. */
6350 if (ket == OP_KETRMAX)
6351 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6352 else if (ket == OP_KETRMIN)
6354 /* Move the STR_PTR to the localptr. */
6355 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
6358 else
6360 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize + 2) * sizeof(sljit_w));
6361 if (ket == OP_KETRMAX)
6363 /* TMP2 which is set here used by OP_KETRMAX below. */
6364 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6369 stacksize = 0;
6370 if (opcode != OP_ONCE)
6371 stacksize++;
6372 if (ket != OP_KET || bra != OP_BRA)
6373 stacksize++;
6375 if (stacksize > 0) {
6376 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
6377 allocate_stack(common, stacksize);
6378 else
6380 /* We know we have place at least for one item on the top of the stack. */
6381 SLJIT_ASSERT(stacksize == 1);
6382 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
6386 stacksize = 0;
6387 if (ket != OP_KET || bra != OP_BRA)
6389 if (ket != OP_KET)
6390 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6391 else
6392 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6393 stacksize++;
6396 if (opcode != OP_ONCE)
6397 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, count++);
6399 if (offset != 0)
6401 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6402 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6403 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
6406 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alttrypath);
6408 if (opcode != OP_ONCE)
6410 SLJIT_ASSERT(jumplist);
6411 JUMPHERE(jumplist->jump);
6412 jumplist = jumplist->next;
6415 COMPILE_BACKTRACKPATH(current->top);
6416 if (current->topbacktracks)
6417 set_jumps(current->topbacktracks, LABEL());
6418 SLJIT_ASSERT(!current->nextbacktracks);
6420 while (*cc == OP_ALT);
6421 SLJIT_ASSERT(!jumplist);
6423 if (cond != NULL)
6425 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
6426 assert = CURRENT_AS(bracket_backtrack)->u.assert;
6427 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
6430 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
6431 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6432 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
6434 JUMPHERE(cond);
6437 /* Free the STR_PTR. */
6438 if (localptr == 0)
6439 free_stack(common, 1);
6442 if (offset != 0)
6444 /* Using both tmp register is better for instruction scheduling. */
6445 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6446 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6447 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6448 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
6449 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(2));
6450 free_stack(common, 3);
6452 else if (opcode == OP_SBRA || opcode == OP_SCOND)
6454 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(0));
6455 free_stack(common, 1);
6457 else if (opcode == OP_ONCE)
6459 cc = ccbegin + GET(ccbegin, 1);
6460 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
6462 /* Reset head and drop saved frame. */
6463 stacksize = (ket == OP_KETRMAX || ket == OP_KETRMIN || *cc == OP_ALT) ? 2 : 1;
6464 free_stack(common, CURRENT_AS(bracket_backtrack)->u.framesize + stacksize);
6466 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
6468 /* The STR_PTR must be released. */
6469 free_stack(common, 1);
6472 JUMPHERE(once);
6473 /* Restore previous localptr */
6474 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
6475 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_w));
6476 else if (ket == OP_KETRMIN)
6478 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6479 /* See the comment below. */
6480 free_stack(common, 2);
6481 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
6485 if (ket == OP_KETRMAX)
6487 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6488 if (bra != OP_BRAZERO)
6489 free_stack(common, 1);
6490 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursivetrypath);
6491 if (bra == OP_BRAZERO)
6493 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6494 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zerotrypath);
6495 JUMPHERE(brazero);
6496 free_stack(common, 1);
6499 else if (ket == OP_KETRMIN)
6501 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6503 /* OP_ONCE removes everything in case of a backtrack, so we don't
6504 need to explicitly release the STR_PTR. The extra release would
6505 affect badly the free_stack(2) above. */
6506 if (opcode != OP_ONCE)
6507 free_stack(common, 1);
6508 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rminlabel);
6509 if (opcode == OP_ONCE)
6510 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
6511 else if (bra == OP_BRAMINZERO)
6512 free_stack(common, 1);
6514 else if (bra == OP_BRAZERO)
6516 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6517 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zerotrypath);
6518 JUMPHERE(brazero);
6522 static void compile_bracketpos_backtrackpath(compiler_common *common, struct backtrack_common *current)
6524 DEFINE_COMPILER;
6525 int offset;
6526 struct sljit_jump *jump;
6528 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
6530 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
6532 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
6533 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6534 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6535 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6536 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
6538 set_jumps(current->topbacktracks, LABEL());
6539 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
6540 return;
6543 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_backtrack)->localptr);
6544 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6546 if (current->topbacktracks)
6548 jump = JUMP(SLJIT_JUMP);
6549 set_jumps(current->topbacktracks, LABEL());
6550 /* Drop the stack frame. */
6551 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
6552 JUMPHERE(jump);
6554 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_backtrack)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_w));
6557 static void compile_braminzero_backtrackpath(compiler_common *common, struct backtrack_common *current)
6559 assert_backtrack backtrack;
6561 current->top = NULL;
6562 current->topbacktracks = NULL;
6563 current->nextbacktracks = NULL;
6564 if (current->cc[1] > OP_ASSERTBACK_NOT)
6566 /* Manual call of compile_bracket_trypath and compile_bracket_backtrackpath. */
6567 compile_bracket_trypath(common, current->cc, current);
6568 compile_bracket_backtrackpath(common, current->top);
6570 else
6572 memset(&backtrack, 0, sizeof(backtrack));
6573 backtrack.common.cc = current->cc;
6574 backtrack.trypath = CURRENT_AS(braminzero_backtrack)->trypath;
6575 /* Manual call of compile_assert_trypath. */
6576 compile_assert_trypath(common, current->cc, &backtrack, FALSE);
6578 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
6581 static void compile_backtrackpath(compiler_common *common, struct backtrack_common *current)
6583 DEFINE_COMPILER;
6585 while (current)
6587 if (current->nextbacktracks != NULL)
6588 set_jumps(current->nextbacktracks, LABEL());
6589 switch(*current->cc)
6591 case OP_SET_SOM:
6592 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6593 free_stack(common, 1);
6594 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP1, 0);
6595 break;
6597 case OP_STAR:
6598 case OP_MINSTAR:
6599 case OP_PLUS:
6600 case OP_MINPLUS:
6601 case OP_QUERY:
6602 case OP_MINQUERY:
6603 case OP_UPTO:
6604 case OP_MINUPTO:
6605 case OP_EXACT:
6606 case OP_POSSTAR:
6607 case OP_POSPLUS:
6608 case OP_POSQUERY:
6609 case OP_POSUPTO:
6610 case OP_STARI:
6611 case OP_MINSTARI:
6612 case OP_PLUSI:
6613 case OP_MINPLUSI:
6614 case OP_QUERYI:
6615 case OP_MINQUERYI:
6616 case OP_UPTOI:
6617 case OP_MINUPTOI:
6618 case OP_EXACTI:
6619 case OP_POSSTARI:
6620 case OP_POSPLUSI:
6621 case OP_POSQUERYI:
6622 case OP_POSUPTOI:
6623 case OP_NOTSTAR:
6624 case OP_NOTMINSTAR:
6625 case OP_NOTPLUS:
6626 case OP_NOTMINPLUS:
6627 case OP_NOTQUERY:
6628 case OP_NOTMINQUERY:
6629 case OP_NOTUPTO:
6630 case OP_NOTMINUPTO:
6631 case OP_NOTEXACT:
6632 case OP_NOTPOSSTAR:
6633 case OP_NOTPOSPLUS:
6634 case OP_NOTPOSQUERY:
6635 case OP_NOTPOSUPTO:
6636 case OP_NOTSTARI:
6637 case OP_NOTMINSTARI:
6638 case OP_NOTPLUSI:
6639 case OP_NOTMINPLUSI:
6640 case OP_NOTQUERYI:
6641 case OP_NOTMINQUERYI:
6642 case OP_NOTUPTOI:
6643 case OP_NOTMINUPTOI:
6644 case OP_NOTEXACTI:
6645 case OP_NOTPOSSTARI:
6646 case OP_NOTPOSPLUSI:
6647 case OP_NOTPOSQUERYI:
6648 case OP_NOTPOSUPTOI:
6649 case OP_TYPESTAR:
6650 case OP_TYPEMINSTAR:
6651 case OP_TYPEPLUS:
6652 case OP_TYPEMINPLUS:
6653 case OP_TYPEQUERY:
6654 case OP_TYPEMINQUERY:
6655 case OP_TYPEUPTO:
6656 case OP_TYPEMINUPTO:
6657 case OP_TYPEEXACT:
6658 case OP_TYPEPOSSTAR:
6659 case OP_TYPEPOSPLUS:
6660 case OP_TYPEPOSQUERY:
6661 case OP_TYPEPOSUPTO:
6662 case OP_CLASS:
6663 case OP_NCLASS:
6664 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6665 case OP_XCLASS:
6666 #endif
6667 compile_iterator_backtrackpath(common, current);
6668 break;
6670 case OP_REF:
6671 case OP_REFI:
6672 compile_ref_iterator_backtrackpath(common, current);
6673 break;
6675 case OP_RECURSE:
6676 compile_recurse_backtrackpath(common, current);
6677 break;
6679 case OP_ASSERT:
6680 case OP_ASSERT_NOT:
6681 case OP_ASSERTBACK:
6682 case OP_ASSERTBACK_NOT:
6683 compile_assert_backtrackpath(common, current);
6684 break;
6686 case OP_ONCE:
6687 case OP_ONCE_NC:
6688 case OP_BRA:
6689 case OP_CBRA:
6690 case OP_COND:
6691 case OP_SBRA:
6692 case OP_SCBRA:
6693 case OP_SCOND:
6694 compile_bracket_backtrackpath(common, current);
6695 break;
6697 case OP_BRAZERO:
6698 if (current->cc[1] > OP_ASSERTBACK_NOT)
6699 compile_bracket_backtrackpath(common, current);
6700 else
6701 compile_assert_backtrackpath(common, current);
6702 break;
6704 case OP_BRAPOS:
6705 case OP_CBRAPOS:
6706 case OP_SBRAPOS:
6707 case OP_SCBRAPOS:
6708 case OP_BRAPOSZERO:
6709 compile_bracketpos_backtrackpath(common, current);
6710 break;
6712 case OP_BRAMINZERO:
6713 compile_braminzero_backtrackpath(common, current);
6714 break;
6716 case OP_MARK:
6717 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6718 free_stack(common, 1);
6719 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0);
6720 break;
6722 case OP_COMMIT:
6723 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
6724 if (common->leavelabel == NULL)
6725 add_jump(compiler, &common->leave, JUMP(SLJIT_JUMP));
6726 else
6727 JUMPTO(SLJIT_JUMP, common->leavelabel);
6728 break;
6730 case OP_FAIL:
6731 case OP_ACCEPT:
6732 case OP_ASSERT_ACCEPT:
6733 set_jumps(current->topbacktracks, LABEL());
6734 break;
6736 default:
6737 SLJIT_ASSERT_STOP();
6738 break;
6740 current = current->prev;
6744 static SLJIT_INLINE void compile_recurse(compiler_common *common)
6746 DEFINE_COMPILER;
6747 pcre_uchar *cc = common->start + common->currententry->start;
6748 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
6749 pcre_uchar *ccend = bracketend(cc);
6750 int localsize = get_localsize(common, ccbegin, ccend);
6751 int framesize = get_framesize(common, cc, TRUE);
6752 int alternativesize;
6753 BOOL needsframe;
6754 backtrack_common altbacktrack;
6755 struct sljit_label *save_leavelabel = common->leavelabel;
6756 jump_list *save_leave = common->leave;
6757 struct sljit_jump *jump;
6759 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
6760 needsframe = framesize >= 0;
6761 if (!needsframe)
6762 framesize = 0;
6763 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
6765 SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head != 0);
6766 common->currententry->entry = LABEL();
6767 set_jumps(common->currententry->calls, common->currententry->entry);
6769 sljit_emit_fast_enter(compiler, TMP2, 0);
6770 allocate_stack(common, localsize + framesize + alternativesize);
6771 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(localsize + framesize + alternativesize - 1), TMP2, 0);
6772 copy_locals(common, ccbegin, ccend, TRUE, localsize + framesize + alternativesize, framesize + alternativesize);
6773 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head, STACK_TOP, 0);
6774 if (needsframe)
6775 init_frame(common, cc, framesize + alternativesize - 1, alternativesize, TRUE);
6777 if (alternativesize > 0)
6778 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6780 memset(&altbacktrack, 0, sizeof(backtrack_common));
6781 common->leavelabel = NULL;
6782 common->acceptlabel = NULL;
6783 common->leave = NULL;
6784 common->accept = NULL;
6785 altbacktrack.cc = ccbegin;
6786 cc += GET(cc, 1);
6787 while (1)
6789 altbacktrack.top = NULL;
6790 altbacktrack.topbacktracks = NULL;
6792 if (altbacktrack.cc != ccbegin)
6793 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6795 compile_trypath(common, altbacktrack.cc, cc, &altbacktrack);
6796 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6798 common->leavelabel = save_leavelabel;
6799 common->leave = save_leave;
6800 return;
6803 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6805 compile_backtrackpath(common, altbacktrack.top);
6806 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6808 common->leavelabel = save_leavelabel;
6809 common->leave = save_leave;
6810 return;
6812 set_jumps(altbacktrack.topbacktracks, LABEL());
6814 if (*cc != OP_ALT)
6815 break;
6817 altbacktrack.cc = cc + 1 + LINK_SIZE;
6818 cc += GET(cc, 1);
6820 /* None of them matched. */
6821 if (common->leave != NULL)
6822 set_jumps(common->leave, LABEL());
6824 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6825 jump = JUMP(SLJIT_JUMP);
6827 set_jumps(common->accept, LABEL());
6828 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head);
6829 if (needsframe)
6831 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w));
6832 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6833 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w));
6835 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
6837 JUMPHERE(jump);
6838 copy_locals(common, ccbegin, ccend, FALSE, localsize + framesize + alternativesize, framesize + alternativesize);
6839 free_stack(common, localsize + framesize + alternativesize);
6840 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_w));
6841 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
6842 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head, TMP2, 0);
6843 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
6845 common->leavelabel = save_leavelabel;
6846 common->leave = save_leave;
6849 #undef COMPILE_BACKTRACKPATH
6850 #undef CURRENT_AS
6852 void
6853 PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode)
6855 struct sljit_compiler *compiler;
6856 backtrack_common rootbacktrack;
6857 compiler_common common_data;
6858 compiler_common *common = &common_data;
6859 const pcre_uint8 *tables = re->tables;
6860 pcre_study_data *study;
6861 int localsize;
6862 pcre_uchar *ccend;
6863 executable_functions *functions;
6864 void *executable_func;
6865 sljit_uw executable_size;
6866 struct sljit_label *mainloop = NULL;
6867 struct sljit_label *empty_match_found;
6868 struct sljit_label *empty_match_backtrack;
6869 struct sljit_jump *jump;
6870 struct sljit_jump *reqbyte_notfound = NULL;
6871 struct sljit_jump *empty_match;
6873 SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
6874 study = extra->study_data;
6876 if (!tables)
6877 tables = PRIV(default_tables);
6879 memset(&rootbacktrack, 0, sizeof(backtrack_common));
6880 memset(common, 0, sizeof(compiler_common));
6881 rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
6883 common->start = rootbacktrack.cc;
6884 common->fcc = tables + fcc_offset;
6885 common->lcc = (sljit_w)(tables + lcc_offset);
6886 common->mode = mode;
6887 common->nltype = NLTYPE_FIXED;
6888 switch(re->options & PCRE_NEWLINE_BITS)
6890 case 0:
6891 /* Compile-time default */
6892 switch (NEWLINE)
6894 case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
6895 case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
6896 default: common->newline = NEWLINE; break;
6898 break;
6899 case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
6900 case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
6901 case PCRE_NEWLINE_CR+
6902 PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
6903 case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
6904 case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
6905 default: return;
6907 if ((re->options & PCRE_BSR_ANYCRLF) != 0)
6908 common->bsr_nltype = NLTYPE_ANYCRLF;
6909 else if ((re->options & PCRE_BSR_UNICODE) != 0)
6910 common->bsr_nltype = NLTYPE_ANY;
6911 else
6913 #ifdef BSR_ANYCRLF
6914 common->bsr_nltype = NLTYPE_ANYCRLF;
6915 #else
6916 common->bsr_nltype = NLTYPE_ANY;
6917 #endif
6919 common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6920 common->ctypes = (sljit_w)(tables + ctypes_offset);
6921 common->name_table = (sljit_w)((pcre_uchar *)re + re->name_table_offset);
6922 common->name_count = re->name_count;
6923 common->name_entry_size = re->name_entry_size;
6924 common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6925 #ifdef SUPPORT_UTF
6926 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
6927 common->utf = (re->options & PCRE_UTF8) != 0;
6928 #ifdef SUPPORT_UCP
6929 common->use_ucp = (re->options & PCRE_UCP) != 0;
6930 #endif
6931 #endif /* SUPPORT_UTF */
6932 ccend = bracketend(rootbacktrack.cc);
6934 /* Calculate the local space size on the stack. */
6935 common->ovector_start = CALL_LIMIT + sizeof(sljit_w);
6937 SLJIT_ASSERT(*rootbacktrack.cc == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
6938 localsize = get_localspace(common, rootbacktrack.cc, ccend);
6939 if (localsize < 0)
6940 return;
6942 /* Checking flags and updating ovector_start. */
6943 if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
6945 common->req_char_ptr = common->ovector_start;
6946 common->ovector_start += sizeof(sljit_w);
6948 if (mode != JIT_COMPILE)
6950 common->start_used_ptr = common->ovector_start;
6951 common->ovector_start += sizeof(sljit_w);
6952 if (mode == JIT_PARTIAL_SOFT_COMPILE)
6954 common->hit_start = common->ovector_start;
6955 common->ovector_start += sizeof(sljit_w);
6958 if ((re->options & PCRE_FIRSTLINE) != 0)
6960 common->first_line_end = common->ovector_start;
6961 common->ovector_start += sizeof(sljit_w);
6964 /* Aligning ovector to even number of sljit words. */
6965 if ((common->ovector_start & sizeof(sljit_w)) != 0)
6966 common->ovector_start += sizeof(sljit_w);
6968 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
6969 common->cbraptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_w);
6970 localsize += common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w);
6971 if (localsize > SLJIT_MAX_LOCAL_SIZE)
6972 return;
6973 common->localptrs = (int *)SLJIT_MALLOC((ccend - rootbacktrack.cc) * sizeof(int));
6974 if (!common->localptrs)
6975 return;
6976 memset(common->localptrs, 0, (ccend - rootbacktrack.cc) * sizeof(int));
6977 set_localptrs(common, common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w), ccend);
6979 compiler = sljit_create_compiler();
6980 if (!compiler)
6982 SLJIT_FREE(common->localptrs);
6983 return;
6985 common->compiler = compiler;
6987 /* Main pcre_jit_exec entry. */
6988 sljit_emit_enter(compiler, 1, 5, 5, localsize);
6990 /* Register init. */
6991 reset_ovector(common, (re->top_bracket + 1) * 2);
6992 if (common->req_char_ptr != 0)
6993 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, SLJIT_TEMPORARY_REG1, 0);
6995 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_SAVED_REG1, 0);
6996 OP1(SLJIT_MOV, TMP1, 0, SLJIT_SAVED_REG1, 0);
6997 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6998 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
6999 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
7000 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, calllimit));
7001 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
7002 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
7003 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT, TMP1, 0);
7005 if (mode == JIT_PARTIAL_SOFT_COMPILE)
7006 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
7008 /* Main part of the matching */
7009 if ((re->options & PCRE_ANCHORED) == 0)
7011 mainloop = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
7012 /* Forward search if possible. */
7013 if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
7015 if ((re->flags & PCRE_FIRSTSET) != 0)
7016 fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);
7017 else if ((re->flags & PCRE_STARTLINE) != 0)
7018 fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
7019 else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
7020 fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
7023 if (common->req_char_ptr != 0)
7024 reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
7026 /* Store the current STR_PTR in OVECTOR(0). */
7027 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
7028 /* Copy the limit of allowed recursions. */
7029 OP1(SLJIT_MOV, CALL_COUNT, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT);
7030 if (common->mark_ptr != 0)
7031 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
7032 /* Copy the beginning of the string. */
7033 if (mode == JIT_PARTIAL_SOFT_COMPILE)
7035 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
7036 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
7037 JUMPHERE(jump);
7039 else if (mode == JIT_PARTIAL_HARD_COMPILE)
7040 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
7042 compile_trypath(common, rootbacktrack.cc, ccend, &rootbacktrack);
7043 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7045 sljit_free_compiler(compiler);
7046 SLJIT_FREE(common->localptrs);
7047 return;
7050 empty_match = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
7051 empty_match_found = LABEL();
7053 common->acceptlabel = LABEL();
7054 if (common->accept != NULL)
7055 set_jumps(common->accept, common->acceptlabel);
7057 /* This means we have a match. Update the ovector. */
7058 copy_ovector(common, re->top_bracket + 1);
7059 common->leavelabel = LABEL();
7060 if (common->leave != NULL)
7061 set_jumps(common->leave, common->leavelabel);
7062 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
7064 if (mode != JIT_COMPILE)
7066 common->partialmatchlabel = LABEL();
7067 set_jumps(common->partialmatch, common->partialmatchlabel);
7068 return_with_partial_match(common, common->leavelabel);
7071 empty_match_backtrack = LABEL();
7072 compile_backtrackpath(common, rootbacktrack.top);
7073 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7075 sljit_free_compiler(compiler);
7076 SLJIT_FREE(common->localptrs);
7077 return;
7080 SLJIT_ASSERT(rootbacktrack.prev == NULL);
7082 if (mode == JIT_PARTIAL_SOFT_COMPILE)
7084 /* Update hit_start only in the first time. */
7085 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
7086 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr);
7087 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
7088 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, TMP1, 0);
7089 JUMPHERE(jump);
7092 /* Check we have remaining characters. */
7093 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
7095 if ((re->options & PCRE_ANCHORED) == 0)
7097 if ((re->options & PCRE_FIRSTLINE) == 0)
7099 if (mode == JIT_COMPILE && study != NULL && study->minlength > 1 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
7101 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength + 1));
7102 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_END, 0, mainloop);
7104 else
7105 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
7107 else
7109 SLJIT_ASSERT(common->first_line_end != 0);
7110 if (mode == JIT_COMPILE && study != NULL && study->minlength > 1 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
7112 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength + 1));
7113 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
7114 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER);
7115 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
7116 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_GREATER_EQUAL);
7117 JUMPTO(SLJIT_C_ZERO, mainloop);
7119 else
7120 CMPTO(SLJIT_C_LESS, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, mainloop);
7124 /* No more remaining characters. */
7125 if (reqbyte_notfound != NULL)
7126 JUMPHERE(reqbyte_notfound);
7128 if (mode == JIT_PARTIAL_SOFT_COMPILE)
7129 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0, common->partialmatchlabel);
7131 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
7132 JUMPTO(SLJIT_JUMP, common->leavelabel);
7134 flush_stubs(common);
7136 JUMPHERE(empty_match);
7137 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7138 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
7139 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack);
7140 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
7141 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found);
7142 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
7143 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found);
7144 JUMPTO(SLJIT_JUMP, empty_match_backtrack);
7146 common->currententry = common->entries;
7147 while (common->currententry != NULL)
7149 /* Might add new entries. */
7150 compile_recurse(common);
7151 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7153 sljit_free_compiler(compiler);
7154 SLJIT_FREE(common->localptrs);
7155 return;
7157 flush_stubs(common);
7158 common->currententry = common->currententry->next;
7161 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
7162 /* This is a (really) rare case. */
7163 set_jumps(common->stackalloc, LABEL());
7164 /* RETURN_ADDR is not a saved register. */
7165 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
7166 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
7167 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7168 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
7169 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
7170 OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
7172 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
7173 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
7174 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7175 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
7176 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top));
7177 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit));
7178 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
7179 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
7181 /* Allocation failed. */
7182 JUMPHERE(jump);
7183 /* We break the return address cache here, but this is a really rare case. */
7184 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
7185 JUMPTO(SLJIT_JUMP, common->leavelabel);
7187 /* Call limit reached. */
7188 set_jumps(common->calllimit, LABEL());
7189 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
7190 JUMPTO(SLJIT_JUMP, common->leavelabel);
7192 if (common->revertframes != NULL)
7194 set_jumps(common->revertframes, LABEL());
7195 do_revertframes(common);
7197 if (common->wordboundary != NULL)
7199 set_jumps(common->wordboundary, LABEL());
7200 check_wordboundary(common);
7202 if (common->anynewline != NULL)
7204 set_jumps(common->anynewline, LABEL());
7205 check_anynewline(common);
7207 if (common->hspace != NULL)
7209 set_jumps(common->hspace, LABEL());
7210 check_hspace(common);
7212 if (common->vspace != NULL)
7214 set_jumps(common->vspace, LABEL());
7215 check_vspace(common);
7217 if (common->casefulcmp != NULL)
7219 set_jumps(common->casefulcmp, LABEL());
7220 do_casefulcmp(common);
7222 if (common->caselesscmp != NULL)
7224 set_jumps(common->caselesscmp, LABEL());
7225 do_caselesscmp(common);
7227 #ifdef SUPPORT_UTF
7228 if (common->utfreadchar != NULL)
7230 set_jumps(common->utfreadchar, LABEL());
7231 do_utfreadchar(common);
7233 #ifdef COMPILE_PCRE8
7234 if (common->utfreadtype8 != NULL)
7236 set_jumps(common->utfreadtype8, LABEL());
7237 do_utfreadtype8(common);
7239 #endif
7240 #endif /* COMPILE_PCRE8 */
7241 #ifdef SUPPORT_UCP
7242 if (common->getunichartype != NULL)
7244 set_jumps(common->getunichartype, LABEL());
7245 do_getunichartype(common);
7247 if (common->getunichartype_2 != NULL)
7249 set_jumps(common->getunichartype_2, LABEL());
7250 do_getunichartype_2(common);
7252 if (common->getunicharscript != NULL)
7254 set_jumps(common->getunicharscript, LABEL());
7255 do_getunicharscript(common);
7257 #endif
7259 SLJIT_FREE(common->localptrs);
7260 executable_func = sljit_generate_code(compiler);
7261 executable_size = sljit_get_generated_code_size(compiler);
7262 sljit_free_compiler(compiler);
7263 if (executable_func == NULL)
7264 return;
7266 /* Reuse the function descriptor if possible. */
7267 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL)
7268 functions = (executable_functions *)extra->executable_jit;
7269 else
7271 functions = SLJIT_MALLOC(sizeof(executable_functions));
7272 if (functions == NULL)
7274 /* This case is highly unlikely since we just recently
7275 freed a lot of memory. Although not impossible. */
7276 sljit_free_code(executable_func);
7277 return;
7279 memset(functions, 0, sizeof(executable_functions));
7280 extra->executable_jit = functions;
7281 extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
7284 functions->executable_funcs[mode] = executable_func;
7285 functions->executable_sizes[mode] = executable_size;
7288 static int jit_machine_stack_exec(jit_arguments *arguments, void* executable_func)
7290 union {
7291 void* executable_func;
7292 jit_function call_executable_func;
7293 } convert_executable_func;
7294 pcre_uint8 local_area[LOCAL_SPACE_SIZE];
7295 struct sljit_stack local_stack;
7297 local_stack.top = (sljit_w)&local_area;
7298 local_stack.base = local_stack.top;
7299 local_stack.limit = local_stack.base + LOCAL_SPACE_SIZE;
7300 local_stack.max_limit = local_stack.limit;
7301 arguments->stack = &local_stack;
7302 convert_executable_func.executable_func = executable_func;
7303 return convert_executable_func.call_executable_func(arguments);
7307 PRIV(jit_exec)(const REAL_PCRE *re, const PUBL(extra) *extra_data, const pcre_uchar *subject,
7308 int length, int start_offset, int options, int *offsets, int offsetcount)
7310 executable_functions *functions = (executable_functions *)extra_data->executable_jit;
7311 union {
7312 void* executable_func;
7313 jit_function call_executable_func;
7314 } convert_executable_func;
7315 jit_arguments arguments;
7316 int maxoffsetcount;
7317 int retval;
7318 int mode = JIT_COMPILE;
7320 if ((options & PCRE_PARTIAL_HARD) != 0)
7321 mode = JIT_PARTIAL_HARD_COMPILE;
7322 else if ((options & PCRE_PARTIAL_SOFT) != 0)
7323 mode = JIT_PARTIAL_SOFT_COMPILE;
7325 if (functions->executable_funcs[mode] == NULL)
7326 return PCRE_ERROR_NULL;
7328 /* Sanity checks should be handled by pcre_exec. */
7329 arguments.stack = NULL;
7330 arguments.str = subject + start_offset;
7331 arguments.begin = subject;
7332 arguments.end = subject + length;
7333 arguments.mark_ptr = NULL;
7334 /* JIT decreases this value less frequently than the interpreter. */
7335 arguments.calllimit = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : extra_data->match_limit;
7336 arguments.notbol = (options & PCRE_NOTBOL) != 0;
7337 arguments.noteol = (options & PCRE_NOTEOL) != 0;
7338 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
7339 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
7340 arguments.offsets = offsets;
7342 /* pcre_exec() rounds offsetcount to a multiple of 3, and then uses only 2/3 of
7343 the output vector for storing captured strings, with the remainder used as
7344 workspace. We don't need the workspace here. For compatibility, we limit the
7345 number of captured strings in the same way as pcre_exec(), so that the user
7346 gets the same result with and without JIT. */
7348 if (offsetcount != 2)
7349 offsetcount = ((offsetcount - (offsetcount % 3)) * 2) / 3;
7350 maxoffsetcount = (re->top_bracket + 1) * 2;
7351 if (offsetcount > maxoffsetcount)
7352 offsetcount = maxoffsetcount;
7353 arguments.offsetcount = offsetcount;
7355 if (functions->callback)
7356 arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata);
7357 else
7358 arguments.stack = (struct sljit_stack *)functions->userdata;
7360 if (arguments.stack == NULL)
7361 retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]);
7362 else
7364 convert_executable_func.executable_func = functions->executable_funcs[mode];
7365 retval = convert_executable_func.call_executable_func(&arguments);
7368 if (retval * 2 > offsetcount)
7369 retval = 0;
7370 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
7371 *(extra_data->mark) = arguments.mark_ptr;
7373 return retval;
7376 void
7377 PRIV(jit_free)(void *executable_funcs)
7379 int i;
7380 executable_functions *functions = (executable_functions *)executable_funcs;
7381 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
7383 if (functions->executable_funcs[i] != NULL)
7384 sljit_free_code(functions->executable_funcs[i]);
7386 SLJIT_FREE(functions);
7390 PRIV(jit_get_size)(void *executable_funcs)
7392 int i;
7393 sljit_uw size = 0;
7394 sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes;
7395 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
7396 size += executable_sizes[i];
7397 return (int)size;
7400 const char*
7401 PRIV(jit_get_target)(void)
7403 return sljit_get_platform_name();
7406 #ifdef COMPILE_PCRE8
7407 PCRE_EXP_DECL pcre_jit_stack *
7408 pcre_jit_stack_alloc(int startsize, int maxsize)
7409 #else
7410 PCRE_EXP_DECL pcre16_jit_stack *
7411 pcre16_jit_stack_alloc(int startsize, int maxsize)
7412 #endif
7414 if (startsize < 1 || maxsize < 1)
7415 return NULL;
7416 if (startsize > maxsize)
7417 startsize = maxsize;
7418 startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
7419 maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
7420 return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize);
7423 #ifdef COMPILE_PCRE8
7424 PCRE_EXP_DECL void
7425 pcre_jit_stack_free(pcre_jit_stack *stack)
7426 #else
7427 PCRE_EXP_DECL void
7428 pcre16_jit_stack_free(pcre16_jit_stack *stack)
7429 #endif
7431 sljit_free_stack((struct sljit_stack *)stack);
7434 #ifdef COMPILE_PCRE8
7435 PCRE_EXP_DECL void
7436 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
7437 #else
7438 PCRE_EXP_DECL void
7439 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
7440 #endif
7442 executable_functions *functions;
7443 if (extra != NULL &&
7444 (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
7445 extra->executable_jit != NULL)
7447 functions = (executable_functions *)extra->executable_jit;
7448 functions->callback = callback;
7449 functions->userdata = userdata;
7453 #else /* SUPPORT_JIT */
7455 /* These are dummy functions to avoid linking errors when JIT support is not
7456 being compiled. */
7458 #ifdef COMPILE_PCRE8
7459 PCRE_EXP_DECL pcre_jit_stack *
7460 pcre_jit_stack_alloc(int startsize, int maxsize)
7461 #else
7462 PCRE_EXP_DECL pcre16_jit_stack *
7463 pcre16_jit_stack_alloc(int startsize, int maxsize)
7464 #endif
7466 (void)startsize;
7467 (void)maxsize;
7468 return NULL;
7471 #ifdef COMPILE_PCRE8
7472 PCRE_EXP_DECL void
7473 pcre_jit_stack_free(pcre_jit_stack *stack)
7474 #else
7475 PCRE_EXP_DECL void
7476 pcre16_jit_stack_free(pcre16_jit_stack *stack)
7477 #endif
7479 (void)stack;
7482 #ifdef COMPILE_PCRE8
7483 PCRE_EXP_DECL void
7484 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
7485 #else
7486 PCRE_EXP_DECL void
7487 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
7488 #endif
7490 (void)extra;
7491 (void)callback;
7492 (void)userdata;
7495 #endif
7497 /* End of pcre_jit_compile.c */