regex: unicode: Update to Unicode 6.1.0
[glib.git] / glib / pcre / pcre_jit_compile.c
blob97d227c8f06f47c0c90a72253afac54a7c7439d7
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
47 #include "pcre_internal.h"
49 #ifdef SUPPORT_JIT
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
62 #include "sljit/sljitLir.c"
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
68 /* Allocate memory on the stack. Fast, but limited size. */
69 #define LOCAL_SPACE_SIZE 32768
71 #define STACK_GROWTH_RATE 8192
73 /* Enable to check that the allocation could destroy temporaries. */
74 #if defined SLJIT_DEBUG && SLJIT_DEBUG
75 #define DESTROY_REGISTERS 1
76 #endif
79 Short summary about the backtracking mechanism empolyed by the jit code generator:
81 The code generator follows the recursive nature of the PERL compatible regular
82 expressions. The basic blocks of regular expressions are condition checkers
83 whose execute different commands depending on the result of the condition check.
84 The relationship between the operators can be horizontal (concatenation) and
85 vertical (sub-expression) (See struct fallback_common for more details).
87 'ab' - 'a' and 'b' regexps are concatenated
88 'a+' - 'a' is the sub-expression of the '+' operator
90 The condition checkers are boolean (true/false) checkers. Machine code is generated
91 for the checker itself and for the actions depending on the result of the checker.
92 The 'true' case is called as the hot path (expected path), and the other is called as
93 the 'fallback' path. Branch instructions are expesive for all CPUs, so we avoid taken
94 branches on the hot path.
96 Greedy star operator (*) :
97 Hot path: match happens.
98 Fallback path: match failed.
99 Non-greedy star operator (*?) :
100 Hot path: no need to perform a match.
101 Fallback path: match is required.
103 The following example shows how the code generated for a capturing bracket
104 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
105 we have the following regular expression:
107 A(B|C)D
109 The generated code will be the following:
111 A hot path
112 '(' hot path (pushing arguments to the stack)
113 B hot path
114 ')' hot path (pushing arguments to the stack)
115 D hot path
116 return with successful match
118 D fallback path
119 ')' fallback path (If we arrived from "C" jump to the fallback of "C")
120 B fallback path
121 C expected path
122 jump to D hot path
123 C fallback path
124 A fallback path
126 Notice, that the order of fallback code paths are the opposite of the fast
127 code paths. In this way the topmost value on the stack is always belong
128 to the current fallback code path. The fallback code path must check
129 whether there is a next alternative. If so, it needs to jump back to
130 the hot path eventually. Otherwise it needs to clear out its own stack
131 frame and continue the execution on the fallback code paths.
135 Saved stack frames:
137 Atomic blocks and asserts require reloading the values of local variables
138 when the fallback mechanism performed. Because of OP_RECURSE, the locals
139 are not necessarly known in compile time, thus we need a dynamic restore
140 mechanism.
142 The stack frames are stored in a chain list, and have the following format:
143 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
145 Thus we can restore the locals to a particular point in the stack.
148 typedef struct jit_arguments {
149 /* Pointers first. */
150 struct sljit_stack *stack;
151 const pcre_uchar *str;
152 const pcre_uchar *begin;
153 const pcre_uchar *end;
154 int *offsets;
155 pcre_uchar *ptr;
156 /* Everything else after. */
157 int offsetcount;
158 int calllimit;
159 pcre_uint8 notbol;
160 pcre_uint8 noteol;
161 pcre_uint8 notempty;
162 pcre_uint8 notempty_atstart;
163 } jit_arguments;
165 typedef struct executable_function {
166 void *executable_func;
167 PUBL(jit_callback) callback;
168 void *userdata;
169 sljit_uw executable_size;
170 } executable_function;
172 typedef struct jump_list {
173 struct sljit_jump *jump;
174 struct jump_list *next;
175 } jump_list;
177 enum stub_types { stack_alloc };
179 typedef struct stub_list {
180 enum stub_types type;
181 int data;
182 struct sljit_jump *start;
183 struct sljit_label *leave;
184 struct stub_list *next;
185 } stub_list;
187 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
189 /* The following structure is the key data type for the recursive
190 code generator. It is allocated by compile_hotpath, and contains
191 the aguments for compile_fallbackpath. Must be the first member
192 of its descendants. */
193 typedef struct fallback_common {
194 /* Concatenation stack. */
195 struct fallback_common *prev;
196 jump_list *nextfallbacks;
197 /* Internal stack (for component operators). */
198 struct fallback_common *top;
199 jump_list *topfallbacks;
200 /* Opcode pointer. */
201 pcre_uchar *cc;
202 } fallback_common;
204 typedef struct assert_fallback {
205 fallback_common common;
206 jump_list *condfailed;
207 /* Less than 0 (-1) if a frame is not needed. */
208 int framesize;
209 /* Points to our private memory word on the stack. */
210 int localptr;
211 /* For iterators. */
212 struct sljit_label *hotpath;
213 } assert_fallback;
215 typedef struct bracket_fallback {
216 fallback_common common;
217 /* Where to coninue if an alternative is successfully matched. */
218 struct sljit_label *althotpath;
219 /* For rmin and rmax iterators. */
220 struct sljit_label *recursivehotpath;
221 /* For greedy ? operator. */
222 struct sljit_label *zerohotpath;
223 /* Contains the branches of a failed condition. */
224 union {
225 /* Both for OP_COND, OP_SCOND. */
226 jump_list *condfailed;
227 assert_fallback *assert;
228 /* For OP_ONCE. -1 if not needed. */
229 int framesize;
230 } u;
231 /* Points to our private memory word on the stack. */
232 int localptr;
233 } bracket_fallback;
235 typedef struct bracketpos_fallback {
236 fallback_common common;
237 /* Points to our private memory word on the stack. */
238 int localptr;
239 /* Reverting stack is needed. */
240 int framesize;
241 /* Allocated stack size. */
242 int stacksize;
243 } bracketpos_fallback;
245 typedef struct braminzero_fallback {
246 fallback_common common;
247 struct sljit_label *hotpath;
248 } braminzero_fallback;
250 typedef struct iterator_fallback {
251 fallback_common common;
252 /* Next iteration. */
253 struct sljit_label *hotpath;
254 } iterator_fallback;
256 typedef struct recurse_entry {
257 struct recurse_entry *next;
258 /* Contains the function entry. */
259 struct sljit_label *entry;
260 /* Collects the calls until the function is not created. */
261 jump_list *calls;
262 /* Points to the starting opcode. */
263 int start;
264 } recurse_entry;
266 typedef struct recurse_fallback {
267 fallback_common common;
268 } recurse_fallback;
270 typedef struct compiler_common {
271 struct sljit_compiler *compiler;
272 pcre_uchar *start;
273 int localsize;
274 int *localptrs;
275 const pcre_uint8 *fcc;
276 sljit_w lcc;
277 int cbraptr;
278 int nltype;
279 int newline;
280 int bsr_nltype;
281 int endonly;
282 sljit_w ctypes;
283 sljit_uw name_table;
284 sljit_w name_count;
285 sljit_w name_entry_size;
286 struct sljit_label *acceptlabel;
287 stub_list *stubs;
288 recurse_entry *entries;
289 recurse_entry *currententry;
290 jump_list *accept;
291 jump_list *calllimit;
292 jump_list *stackalloc;
293 jump_list *revertframes;
294 jump_list *wordboundary;
295 jump_list *anynewline;
296 jump_list *hspace;
297 jump_list *vspace;
298 jump_list *casefulcmp;
299 jump_list *caselesscmp;
300 BOOL jscript_compat;
301 #ifdef SUPPORT_UTF
302 BOOL utf;
303 #ifdef SUPPORT_UCP
304 BOOL use_ucp;
305 #endif
306 jump_list *utfreadchar;
307 #ifdef COMPILE_PCRE8
308 jump_list *utfreadtype8;
309 #endif
310 #endif /* SUPPORT_UTF */
311 #ifdef SUPPORT_UCP
312 jump_list *getucd;
313 #endif
314 } compiler_common;
316 /* For byte_sequence_compare. */
318 typedef struct compare_context {
319 int length;
320 int sourcereg;
321 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
322 int ucharptr;
323 union {
324 sljit_i asint;
325 sljit_uh asushort;
326 #ifdef COMPILE_PCRE8
327 sljit_ub asbyte;
328 sljit_ub asuchars[4];
329 #else
330 #ifdef COMPILE_PCRE16
331 sljit_uh asuchars[2];
332 #endif
333 #endif
334 } c;
335 union {
336 sljit_i asint;
337 sljit_uh asushort;
338 #ifdef COMPILE_PCRE8
339 sljit_ub asbyte;
340 sljit_ub asuchars[4];
341 #else
342 #ifdef COMPILE_PCRE16
343 sljit_uh asuchars[2];
344 #endif
345 #endif
346 } oc;
347 #endif
348 } compare_context;
350 enum {
351 frame_end = 0,
352 frame_setstrbegin = -1
355 /* Undefine sljit macros. */
356 #undef CMP
358 /* Used for accessing the elements of the stack. */
359 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
361 #define TMP1 SLJIT_TEMPORARY_REG1
362 #define TMP2 SLJIT_TEMPORARY_REG3
363 #define TMP3 SLJIT_TEMPORARY_EREG2
364 #define STR_PTR SLJIT_SAVED_REG1
365 #define STR_END SLJIT_SAVED_REG2
366 #define STACK_TOP SLJIT_TEMPORARY_REG2
367 #define STACK_LIMIT SLJIT_SAVED_REG3
368 #define ARGUMENTS SLJIT_SAVED_EREG1
369 #define CALL_COUNT SLJIT_SAVED_EREG2
370 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
372 /* Locals layout. */
373 /* These two locals can be used by the current opcode. */
374 #define LOCALS0 (0 * sizeof(sljit_w))
375 #define LOCALS1 (1 * sizeof(sljit_w))
376 /* Two local variables for possessive quantifiers (char1 cannot use them). */
377 #define POSSESSIVE0 (2 * sizeof(sljit_w))
378 #define POSSESSIVE1 (3 * sizeof(sljit_w))
379 /* Head of the last recursion. */
380 #define RECURSIVE_HEAD (4 * sizeof(sljit_w))
381 /* Max limit of recursions. */
382 #define CALL_LIMIT (5 * sizeof(sljit_w))
383 /* Last known position of the requested byte. */
384 #define REQ_CHAR_PTR (6 * sizeof(sljit_w))
385 /* End pointer of the first line. */
386 #define FIRSTLINE_END (7 * sizeof(sljit_w))
387 /* The output vector is stored on the stack, and contains pointers
388 to characters. The vector data is divided into two groups: the first
389 group contains the start / end character pointers, and the second is
390 the start pointers when the end of the capturing group has not yet reached. */
391 #define OVECTOR_START (8 * sizeof(sljit_w))
392 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
393 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
394 #define PRIV_DATA(cc) (common->localptrs[(cc) - common->start])
396 #ifdef COMPILE_PCRE8
397 #define MOV_UCHAR SLJIT_MOV_UB
398 #define MOVU_UCHAR SLJIT_MOVU_UB
399 #else
400 #ifdef COMPILE_PCRE16
401 #define MOV_UCHAR SLJIT_MOV_UH
402 #define MOVU_UCHAR SLJIT_MOVU_UH
403 #else
404 #error Unsupported compiling mode
405 #endif
406 #endif
408 /* Shortcuts. */
409 #define DEFINE_COMPILER \
410 struct sljit_compiler *compiler = common->compiler
411 #define OP1(op, dst, dstw, src, srcw) \
412 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
413 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
414 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
415 #define LABEL() \
416 sljit_emit_label(compiler)
417 #define JUMP(type) \
418 sljit_emit_jump(compiler, (type))
419 #define JUMPTO(type, label) \
420 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
421 #define JUMPHERE(jump) \
422 sljit_set_label((jump), sljit_emit_label(compiler))
423 #define CMP(type, src1, src1w, src2, src2w) \
424 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
425 #define CMPTO(type, src1, src1w, src2, src2w, label) \
426 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
427 #define COND_VALUE(op, dst, dstw, type) \
428 sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
430 static pcre_uchar* bracketend(pcre_uchar* cc)
432 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
433 do cc += GET(cc, 1); while (*cc == OP_ALT);
434 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
435 cc += 1 + LINK_SIZE;
436 return cc;
439 /* Functions whose might need modification for all new supported opcodes:
440 next_opcode
441 get_localspace
442 set_localptrs
443 get_framesize
444 init_frame
445 get_localsize
446 copy_locals
447 compile_hotpath
448 compile_fallbackpath
451 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
453 SLJIT_UNUSED_ARG(common);
454 switch(*cc)
456 case OP_SOD:
457 case OP_SOM:
458 case OP_SET_SOM:
459 case OP_NOT_WORD_BOUNDARY:
460 case OP_WORD_BOUNDARY:
461 case OP_NOT_DIGIT:
462 case OP_DIGIT:
463 case OP_NOT_WHITESPACE:
464 case OP_WHITESPACE:
465 case OP_NOT_WORDCHAR:
466 case OP_WORDCHAR:
467 case OP_ANY:
468 case OP_ALLANY:
469 case OP_ANYNL:
470 case OP_NOT_HSPACE:
471 case OP_HSPACE:
472 case OP_NOT_VSPACE:
473 case OP_VSPACE:
474 case OP_EXTUNI:
475 case OP_EODN:
476 case OP_EOD:
477 case OP_CIRC:
478 case OP_CIRCM:
479 case OP_DOLL:
480 case OP_DOLLM:
481 case OP_TYPESTAR:
482 case OP_TYPEMINSTAR:
483 case OP_TYPEPLUS:
484 case OP_TYPEMINPLUS:
485 case OP_TYPEQUERY:
486 case OP_TYPEMINQUERY:
487 case OP_TYPEPOSSTAR:
488 case OP_TYPEPOSPLUS:
489 case OP_TYPEPOSQUERY:
490 case OP_CRSTAR:
491 case OP_CRMINSTAR:
492 case OP_CRPLUS:
493 case OP_CRMINPLUS:
494 case OP_CRQUERY:
495 case OP_CRMINQUERY:
496 case OP_DEF:
497 case OP_BRAZERO:
498 case OP_BRAMINZERO:
499 case OP_BRAPOSZERO:
500 case OP_FAIL:
501 case OP_ACCEPT:
502 case OP_ASSERT_ACCEPT:
503 case OP_SKIPZERO:
504 return cc + 1;
506 case OP_ANYBYTE:
507 #ifdef SUPPORT_UTF
508 if (common->utf) return NULL;
509 #endif
510 return cc + 1;
512 case OP_CHAR:
513 case OP_CHARI:
514 case OP_NOT:
515 case OP_NOTI:
516 case OP_STAR:
517 case OP_MINSTAR:
518 case OP_PLUS:
519 case OP_MINPLUS:
520 case OP_QUERY:
521 case OP_MINQUERY:
522 case OP_POSSTAR:
523 case OP_POSPLUS:
524 case OP_POSQUERY:
525 case OP_STARI:
526 case OP_MINSTARI:
527 case OP_PLUSI:
528 case OP_MINPLUSI:
529 case OP_QUERYI:
530 case OP_MINQUERYI:
531 case OP_POSSTARI:
532 case OP_POSPLUSI:
533 case OP_POSQUERYI:
534 case OP_NOTSTAR:
535 case OP_NOTMINSTAR:
536 case OP_NOTPLUS:
537 case OP_NOTMINPLUS:
538 case OP_NOTQUERY:
539 case OP_NOTMINQUERY:
540 case OP_NOTPOSSTAR:
541 case OP_NOTPOSPLUS:
542 case OP_NOTPOSQUERY:
543 case OP_NOTSTARI:
544 case OP_NOTMINSTARI:
545 case OP_NOTPLUSI:
546 case OP_NOTMINPLUSI:
547 case OP_NOTQUERYI:
548 case OP_NOTMINQUERYI:
549 case OP_NOTPOSSTARI:
550 case OP_NOTPOSPLUSI:
551 case OP_NOTPOSQUERYI:
552 cc += 2;
553 #ifdef SUPPORT_UTF
554 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
555 #endif
556 return cc;
558 case OP_UPTO:
559 case OP_MINUPTO:
560 case OP_EXACT:
561 case OP_POSUPTO:
562 case OP_UPTOI:
563 case OP_MINUPTOI:
564 case OP_EXACTI:
565 case OP_POSUPTOI:
566 case OP_NOTUPTO:
567 case OP_NOTMINUPTO:
568 case OP_NOTEXACT:
569 case OP_NOTPOSUPTO:
570 case OP_NOTUPTOI:
571 case OP_NOTMINUPTOI:
572 case OP_NOTEXACTI:
573 case OP_NOTPOSUPTOI:
574 cc += 2 + IMM2_SIZE;
575 #ifdef SUPPORT_UTF
576 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
577 #endif
578 return cc;
580 case OP_NOTPROP:
581 case OP_PROP:
582 return cc + 1 + 2;
584 case OP_TYPEUPTO:
585 case OP_TYPEMINUPTO:
586 case OP_TYPEEXACT:
587 case OP_TYPEPOSUPTO:
588 case OP_REF:
589 case OP_REFI:
590 case OP_CREF:
591 case OP_NCREF:
592 case OP_RREF:
593 case OP_NRREF:
594 case OP_CLOSE:
595 cc += 1 + IMM2_SIZE;
596 return cc;
598 case OP_CRRANGE:
599 case OP_CRMINRANGE:
600 return cc + 1 + 2 * IMM2_SIZE;
602 case OP_CLASS:
603 case OP_NCLASS:
604 return cc + 1 + 32 / sizeof(pcre_uchar);
606 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
607 case OP_XCLASS:
608 return cc + GET(cc, 1);
609 #endif
611 case OP_RECURSE:
612 case OP_ASSERT:
613 case OP_ASSERT_NOT:
614 case OP_ASSERTBACK:
615 case OP_ASSERTBACK_NOT:
616 case OP_REVERSE:
617 case OP_ONCE:
618 case OP_ONCE_NC:
619 case OP_BRA:
620 case OP_BRAPOS:
621 case OP_COND:
622 case OP_SBRA:
623 case OP_SBRAPOS:
624 case OP_SCOND:
625 case OP_ALT:
626 case OP_KET:
627 case OP_KETRMAX:
628 case OP_KETRMIN:
629 case OP_KETRPOS:
630 return cc + 1 + LINK_SIZE;
632 case OP_CBRA:
633 case OP_CBRAPOS:
634 case OP_SCBRA:
635 case OP_SCBRAPOS:
636 return cc + 1 + LINK_SIZE + IMM2_SIZE;
638 default:
639 return NULL;
643 static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
645 int localspace = 0;
646 pcre_uchar *alternative;
647 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
648 while (cc < ccend)
650 switch(*cc)
652 case OP_ASSERT:
653 case OP_ASSERT_NOT:
654 case OP_ASSERTBACK:
655 case OP_ASSERTBACK_NOT:
656 case OP_ONCE:
657 case OP_ONCE_NC:
658 case OP_BRAPOS:
659 case OP_SBRA:
660 case OP_SBRAPOS:
661 case OP_SCOND:
662 localspace += sizeof(sljit_w);
663 cc += 1 + LINK_SIZE;
664 break;
666 case OP_CBRAPOS:
667 case OP_SCBRAPOS:
668 localspace += sizeof(sljit_w);
669 cc += 1 + LINK_SIZE + IMM2_SIZE;
670 break;
672 case OP_COND:
673 /* Might be a hidden SCOND. */
674 alternative = cc + GET(cc, 1);
675 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
676 localspace += sizeof(sljit_w);
677 cc += 1 + LINK_SIZE;
678 break;
680 default:
681 cc = next_opcode(common, cc);
682 if (cc == NULL)
683 return -1;
684 break;
687 return localspace;
690 static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend)
692 pcre_uchar *cc = common->start;
693 pcre_uchar *alternative;
694 while (cc < ccend)
696 switch(*cc)
698 case OP_ASSERT:
699 case OP_ASSERT_NOT:
700 case OP_ASSERTBACK:
701 case OP_ASSERTBACK_NOT:
702 case OP_ONCE:
703 case OP_ONCE_NC:
704 case OP_BRAPOS:
705 case OP_SBRA:
706 case OP_SBRAPOS:
707 case OP_SCOND:
708 common->localptrs[cc - common->start] = localptr;
709 localptr += sizeof(sljit_w);
710 cc += 1 + LINK_SIZE;
711 break;
713 case OP_CBRAPOS:
714 case OP_SCBRAPOS:
715 common->localptrs[cc - common->start] = localptr;
716 localptr += sizeof(sljit_w);
717 cc += 1 + LINK_SIZE + IMM2_SIZE;
718 break;
720 case OP_COND:
721 /* Might be a hidden SCOND. */
722 alternative = cc + GET(cc, 1);
723 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
725 common->localptrs[cc - common->start] = localptr;
726 localptr += sizeof(sljit_w);
728 cc += 1 + LINK_SIZE;
729 break;
731 default:
732 cc = next_opcode(common, cc);
733 SLJIT_ASSERT(cc != NULL);
734 break;
739 /* Returns with -1 if no need for frame. */
740 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
742 pcre_uchar *ccend = bracketend(cc);
743 int length = 0;
744 BOOL possessive = FALSE;
745 BOOL setsom_found = FALSE;
747 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
749 length = 3;
750 possessive = TRUE;
753 cc = next_opcode(common, cc);
754 SLJIT_ASSERT(cc != NULL);
755 while (cc < ccend)
756 switch(*cc)
758 case OP_SET_SOM:
759 case OP_RECURSE:
760 if (!setsom_found)
762 length += 2;
763 setsom_found = TRUE;
765 cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;
766 break;
768 case OP_CBRA:
769 case OP_CBRAPOS:
770 case OP_SCBRA:
771 case OP_SCBRAPOS:
772 length += 3;
773 cc += 1 + LINK_SIZE + IMM2_SIZE;
774 break;
776 default:
777 cc = next_opcode(common, cc);
778 SLJIT_ASSERT(cc != NULL);
779 break;
782 /* Possessive quantifiers can use a special case. */
783 if (SLJIT_UNLIKELY(possessive) && length == 3)
784 return -1;
786 if (length > 0)
787 return length + 1;
788 return -1;
791 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
793 DEFINE_COMPILER;
794 pcre_uchar *ccend = bracketend(cc);
795 BOOL setsom_found = FALSE;
796 int offset;
798 /* >= 1 + shortest item size (2) */
799 SLJIT_UNUSED_ARG(stacktop);
800 SLJIT_ASSERT(stackpos >= stacktop + 2);
802 stackpos = STACK(stackpos);
803 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
804 cc = next_opcode(common, cc);
805 SLJIT_ASSERT(cc != NULL);
806 while (cc < ccend)
807 switch(*cc)
809 case OP_SET_SOM:
810 case OP_RECURSE:
811 if (!setsom_found)
813 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
814 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
815 stackpos += (int)sizeof(sljit_w);
816 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
817 stackpos += (int)sizeof(sljit_w);
818 setsom_found = TRUE;
820 cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;
821 break;
823 case OP_CBRA:
824 case OP_CBRAPOS:
825 case OP_SCBRA:
826 case OP_SCBRAPOS:
827 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
828 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
829 stackpos += (int)sizeof(sljit_w);
830 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
831 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
832 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
833 stackpos += (int)sizeof(sljit_w);
834 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
835 stackpos += (int)sizeof(sljit_w);
837 cc += 1 + LINK_SIZE + IMM2_SIZE;
838 break;
840 default:
841 cc = next_opcode(common, cc);
842 SLJIT_ASSERT(cc != NULL);
843 break;
846 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
847 SLJIT_ASSERT(stackpos == STACK(stacktop));
850 static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
852 int localsize = 2;
853 pcre_uchar *alternative;
854 /* Calculate the sum of the local variables. */
855 while (cc < ccend)
857 switch(*cc)
859 case OP_ASSERT:
860 case OP_ASSERT_NOT:
861 case OP_ASSERTBACK:
862 case OP_ASSERTBACK_NOT:
863 case OP_ONCE:
864 case OP_ONCE_NC:
865 case OP_BRAPOS:
866 case OP_SBRA:
867 case OP_SBRAPOS:
868 case OP_SCOND:
869 localsize++;
870 cc += 1 + LINK_SIZE;
871 break;
873 case OP_CBRA:
874 case OP_SCBRA:
875 localsize++;
876 cc += 1 + LINK_SIZE + IMM2_SIZE;
877 break;
879 case OP_CBRAPOS:
880 case OP_SCBRAPOS:
881 localsize += 2;
882 cc += 1 + LINK_SIZE + IMM2_SIZE;
883 break;
885 case OP_COND:
886 /* Might be a hidden SCOND. */
887 alternative = cc + GET(cc, 1);
888 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
889 localsize++;
890 cc += 1 + LINK_SIZE;
891 break;
893 default:
894 cc = next_opcode(common, cc);
895 SLJIT_ASSERT(cc != NULL);
896 break;
899 SLJIT_ASSERT(cc == ccend);
900 return localsize;
903 static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
904 BOOL save, int stackptr, int stacktop)
906 DEFINE_COMPILER;
907 int srcw[2];
908 int count;
909 BOOL tmp1next = TRUE;
910 BOOL tmp1empty = TRUE;
911 BOOL tmp2empty = TRUE;
912 pcre_uchar *alternative;
913 enum {
914 start,
915 loop,
917 } status;
919 status = save ? start : loop;
920 stackptr = STACK(stackptr - 2);
921 stacktop = STACK(stacktop - 1);
923 if (!save)
925 stackptr += sizeof(sljit_w);
926 if (stackptr < stacktop)
928 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
929 stackptr += sizeof(sljit_w);
930 tmp1empty = FALSE;
932 if (stackptr < stacktop)
934 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
935 stackptr += sizeof(sljit_w);
936 tmp2empty = FALSE;
938 /* The tmp1next must be TRUE in either way. */
941 while (status != end)
943 count = 0;
944 switch(status)
946 case start:
947 SLJIT_ASSERT(save);
948 count = 1;
949 srcw[0] = RECURSIVE_HEAD;
950 status = loop;
951 break;
953 case loop:
954 if (cc >= ccend)
956 status = end;
957 break;
960 switch(*cc)
962 case OP_ASSERT:
963 case OP_ASSERT_NOT:
964 case OP_ASSERTBACK:
965 case OP_ASSERTBACK_NOT:
966 case OP_ONCE:
967 case OP_ONCE_NC:
968 case OP_BRAPOS:
969 case OP_SBRA:
970 case OP_SBRAPOS:
971 case OP_SCOND:
972 count = 1;
973 srcw[0] = PRIV_DATA(cc);
974 SLJIT_ASSERT(srcw[0] != 0);
975 cc += 1 + LINK_SIZE;
976 break;
978 case OP_CBRA:
979 case OP_SCBRA:
980 count = 1;
981 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
982 cc += 1 + LINK_SIZE + IMM2_SIZE;
983 break;
985 case OP_CBRAPOS:
986 case OP_SCBRAPOS:
987 count = 2;
988 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
989 srcw[0] = PRIV_DATA(cc);
990 SLJIT_ASSERT(srcw[0] != 0);
991 cc += 1 + LINK_SIZE + IMM2_SIZE;
992 break;
994 case OP_COND:
995 /* Might be a hidden SCOND. */
996 alternative = cc + GET(cc, 1);
997 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
999 count = 1;
1000 srcw[0] = PRIV_DATA(cc);
1001 SLJIT_ASSERT(srcw[0] != 0);
1003 cc += 1 + LINK_SIZE;
1004 break;
1006 default:
1007 cc = next_opcode(common, cc);
1008 SLJIT_ASSERT(cc != NULL);
1009 break;
1011 break;
1013 case end:
1014 SLJIT_ASSERT_STOP();
1015 break;
1018 while (count > 0)
1020 count--;
1021 if (save)
1023 if (tmp1next)
1025 if (!tmp1empty)
1027 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1028 stackptr += sizeof(sljit_w);
1030 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1031 tmp1empty = FALSE;
1032 tmp1next = FALSE;
1034 else
1036 if (!tmp2empty)
1038 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1039 stackptr += sizeof(sljit_w);
1041 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1042 tmp2empty = FALSE;
1043 tmp1next = TRUE;
1046 else
1048 if (tmp1next)
1050 SLJIT_ASSERT(!tmp1empty);
1051 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1052 tmp1empty = stackptr >= stacktop;
1053 if (!tmp1empty)
1055 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1056 stackptr += sizeof(sljit_w);
1058 tmp1next = FALSE;
1060 else
1062 SLJIT_ASSERT(!tmp2empty);
1063 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1064 tmp2empty = stackptr >= stacktop;
1065 if (!tmp2empty)
1067 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1068 stackptr += sizeof(sljit_w);
1070 tmp1next = TRUE;
1076 if (save)
1078 if (tmp1next)
1080 if (!tmp1empty)
1082 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1083 stackptr += sizeof(sljit_w);
1085 if (!tmp2empty)
1087 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1088 stackptr += sizeof(sljit_w);
1091 else
1093 if (!tmp2empty)
1095 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1096 stackptr += sizeof(sljit_w);
1098 if (!tmp1empty)
1100 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1101 stackptr += sizeof(sljit_w);
1105 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1108 static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1110 return (value & (value - 1)) == 0;
1113 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1115 while (list)
1117 /* sljit_set_label is clever enough to do nothing
1118 if either the jump or the label is NULL */
1119 sljit_set_label(list->jump, label);
1120 list = list->next;
1124 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1126 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1127 if (list_item)
1129 list_item->next = *list;
1130 list_item->jump = jump;
1131 *list = list_item;
1135 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1137 DEFINE_COMPILER;
1138 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1140 if (list_item)
1142 list_item->type = type;
1143 list_item->data = data;
1144 list_item->start = start;
1145 list_item->leave = LABEL();
1146 list_item->next = common->stubs;
1147 common->stubs = list_item;
1151 static void flush_stubs(compiler_common *common)
1153 DEFINE_COMPILER;
1154 stub_list* list_item = common->stubs;
1156 while (list_item)
1158 JUMPHERE(list_item->start);
1159 switch(list_item->type)
1161 case stack_alloc:
1162 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1163 break;
1165 JUMPTO(SLJIT_JUMP, list_item->leave);
1166 list_item = list_item->next;
1168 common->stubs = NULL;
1171 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1173 DEFINE_COMPILER;
1175 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1176 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1179 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1181 /* May destroy all locals and registers except TMP2. */
1182 DEFINE_COMPILER;
1184 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1185 #ifdef DESTROY_REGISTERS
1186 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1187 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1188 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1189 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1190 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1191 #endif
1192 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1195 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1197 DEFINE_COMPILER;
1198 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1201 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1203 DEFINE_COMPILER;
1204 struct sljit_label *loop;
1205 int i;
1206 /* At this point we can freely use all temporary registers. */
1207 /* TMP1 returns with begin - 1. */
1208 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1209 if (length < 8)
1211 for (i = 0; i < length; i++)
1212 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1214 else
1216 OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START - sizeof(sljit_w));
1217 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1218 loop = LABEL();
1219 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1220 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1221 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1225 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1227 DEFINE_COMPILER;
1228 struct sljit_label *loop;
1229 struct sljit_jump *earlyexit;
1231 /* At this point we can freely use all registers. */
1232 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1233 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1235 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1236 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1237 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1238 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1239 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START);
1240 /* Unlikely, but possible */
1241 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1242 loop = LABEL();
1243 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1244 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1245 /* Copy the integer value to the output buffer */
1246 #ifdef COMPILE_PCRE16
1247 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1248 #endif
1249 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1250 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1251 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1252 JUMPHERE(earlyexit);
1254 /* Calculate the return value, which is the maximum ovector value. */
1255 if (topbracket > 1)
1257 OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1258 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1260 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1261 loop = LABEL();
1262 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1263 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1264 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1265 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1267 else
1268 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1271 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1273 /* Detects if the character has an othercase. */
1274 unsigned int c;
1276 #ifdef SUPPORT_UTF
1277 if (common->utf)
1279 GETCHAR(c, cc);
1280 if (c > 127)
1282 #ifdef SUPPORT_UCP
1283 return c != UCD_OTHERCASE(c);
1284 #else
1285 return FALSE;
1286 #endif
1288 #ifndef COMPILE_PCRE8
1289 return common->fcc[c] != c;
1290 #endif
1292 else
1293 #endif
1294 c = *cc;
1295 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1298 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1300 /* Returns with the othercase. */
1301 #ifdef SUPPORT_UTF
1302 if (common->utf && c > 127)
1304 #ifdef SUPPORT_UCP
1305 return UCD_OTHERCASE(c);
1306 #else
1307 return c;
1308 #endif
1310 #endif
1311 return TABLE_GET(c, common->fcc, c);
1314 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1316 /* Detects if the character and its othercase has only 1 bit difference. */
1317 unsigned int c, oc, bit;
1318 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1319 int n;
1320 #endif
1322 #ifdef SUPPORT_UTF
1323 if (common->utf)
1325 GETCHAR(c, cc);
1326 if (c <= 127)
1327 oc = common->fcc[c];
1328 else
1330 #ifdef SUPPORT_UCP
1331 oc = UCD_OTHERCASE(c);
1332 #else
1333 oc = c;
1334 #endif
1337 else
1339 c = *cc;
1340 oc = TABLE_GET(c, common->fcc, c);
1342 #else
1343 c = *cc;
1344 oc = TABLE_GET(c, common->fcc, c);
1345 #endif
1347 SLJIT_ASSERT(c != oc);
1349 bit = c ^ oc;
1350 /* Optimized for English alphabet. */
1351 if (c <= 127 && bit == 0x20)
1352 return (0 << 8) | 0x20;
1354 /* Since c != oc, they must have at least 1 bit difference. */
1355 if (!ispowerof2(bit))
1356 return 0;
1358 #ifdef COMPILE_PCRE8
1360 #ifdef SUPPORT_UTF
1361 if (common->utf && c > 127)
1363 n = GET_EXTRALEN(*cc);
1364 while ((bit & 0x3f) == 0)
1366 n--;
1367 bit >>= 6;
1369 return (n << 8) | bit;
1371 #endif /* SUPPORT_UTF */
1372 return (0 << 8) | bit;
1374 #else /* COMPILE_PCRE8 */
1376 #ifdef COMPILE_PCRE16
1377 #ifdef SUPPORT_UTF
1378 if (common->utf && c > 65535)
1380 if (bit >= (1 << 10))
1381 bit >>= 10;
1382 else
1383 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
1385 #endif /* SUPPORT_UTF */
1386 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
1387 #endif /* COMPILE_PCRE16 */
1389 #endif /* COMPILE_PCRE8 */
1392 static SLJIT_INLINE void check_input_end(compiler_common *common, jump_list **fallbacks)
1394 DEFINE_COMPILER;
1395 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1398 static void read_char(compiler_common *common)
1400 /* Reads the character into TMP1, updates STR_PTR.
1401 Does not check STR_END. TMP2 Destroyed. */
1402 DEFINE_COMPILER;
1403 #ifdef SUPPORT_UTF
1404 struct sljit_jump *jump;
1405 #endif
1407 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1408 #ifdef SUPPORT_UTF
1409 if (common->utf)
1411 #ifdef COMPILE_PCRE8
1412 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1413 #else
1414 #ifdef COMPILE_PCRE16
1415 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1416 #endif
1417 #endif /* COMPILE_PCRE8 */
1418 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1419 JUMPHERE(jump);
1421 #endif
1422 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1425 static void peek_char(compiler_common *common)
1427 /* Reads the character into TMP1, keeps STR_PTR.
1428 Does not check STR_END. TMP2 Destroyed. */
1429 DEFINE_COMPILER;
1430 #ifdef SUPPORT_UTF
1431 struct sljit_jump *jump;
1432 #endif
1434 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1435 #ifdef SUPPORT_UTF
1436 if (common->utf)
1438 #ifdef COMPILE_PCRE8
1439 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1440 #else
1441 #ifdef COMPILE_PCRE16
1442 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1443 #endif
1444 #endif /* COMPILE_PCRE8 */
1445 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1446 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1447 JUMPHERE(jump);
1449 #endif
1452 static void read_char8_type(compiler_common *common)
1454 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
1455 DEFINE_COMPILER;
1456 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
1457 struct sljit_jump *jump;
1458 #endif
1460 #ifdef SUPPORT_UTF
1461 if (common->utf)
1463 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1464 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1465 #ifdef COMPILE_PCRE8
1466 /* This can be an extra read in some situations, but hopefully
1467 it is needed in most cases. */
1468 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1469 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
1470 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
1471 JUMPHERE(jump);
1472 #else
1473 #ifdef COMPILE_PCRE16
1474 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1475 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1476 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1477 JUMPHERE(jump);
1478 /* Skip low surrogate if necessary. */
1479 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
1480 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
1481 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1482 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
1483 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1484 #endif
1485 #endif /* COMPILE_PCRE8 */
1486 return;
1488 #endif
1489 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1490 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1491 #ifdef COMPILE_PCRE16
1492 /* The ctypes array contains only 256 values. */
1493 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1494 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1495 #endif
1496 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1497 #ifdef COMPILE_PCRE16
1498 JUMPHERE(jump);
1499 #endif
1502 static void skip_char_back(compiler_common *common)
1504 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
1505 DEFINE_COMPILER;
1506 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1507 struct sljit_label *label;
1509 if (common->utf)
1511 label = LABEL();
1512 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1513 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1514 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
1515 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
1516 return;
1518 #endif
1519 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1520 if (common->utf)
1522 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1523 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1524 /* Skip low surrogate if necessary. */
1525 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1526 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
1527 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1528 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1529 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1530 return;
1532 #endif
1533 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1536 static void check_newlinechar(compiler_common *common, int nltype, jump_list **fallbacks, BOOL jumpiftrue)
1538 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
1539 DEFINE_COMPILER;
1541 if (nltype == NLTYPE_ANY)
1543 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
1544 add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1546 else if (nltype == NLTYPE_ANYCRLF)
1548 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
1549 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1550 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1551 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
1552 add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1554 else
1556 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
1557 add_jump(compiler, fallbacks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
1561 #ifdef SUPPORT_UTF
1563 #ifdef COMPILE_PCRE8
1564 static void do_utfreadchar(compiler_common *common)
1566 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
1567 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
1568 DEFINE_COMPILER;
1569 struct sljit_jump *jump;
1571 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1572 /* Searching for the first zero. */
1573 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
1574 jump = JUMP(SLJIT_C_NOT_ZERO);
1575 /* Two byte sequence. */
1576 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1577 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1578 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
1579 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
1580 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1581 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1582 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1583 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1584 JUMPHERE(jump);
1586 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
1587 jump = JUMP(SLJIT_C_NOT_ZERO);
1588 /* Three byte sequence. */
1589 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1590 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
1591 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
1592 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1593 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1594 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1595 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1596 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
1597 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1598 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1599 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
1600 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1601 JUMPHERE(jump);
1603 /* Four byte sequence. */
1604 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1605 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
1606 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
1607 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1608 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
1609 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1610 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1611 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1612 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1613 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1614 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
1615 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
1616 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1617 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1618 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
1619 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1622 static void do_utfreadtype8(compiler_common *common)
1624 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
1625 of the character (>= 0xc0). Return value in TMP1. */
1626 DEFINE_COMPILER;
1627 struct sljit_jump *jump;
1628 struct sljit_jump *compare;
1630 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1632 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
1633 jump = JUMP(SLJIT_C_NOT_ZERO);
1634 /* Two byte sequence. */
1635 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1636 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1637 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
1638 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1639 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
1640 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
1641 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1642 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1643 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1645 JUMPHERE(compare);
1646 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1647 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1648 JUMPHERE(jump);
1650 /* We only have types for characters less than 256. */
1651 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
1652 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1653 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1654 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1657 #else /* COMPILE_PCRE8 */
1659 #ifdef COMPILE_PCRE16
1660 static void do_utfreadchar(compiler_common *common)
1662 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
1663 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
1664 DEFINE_COMPILER;
1665 struct sljit_jump *jump;
1667 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1668 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
1669 /* Do nothing, only return. */
1670 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1672 JUMPHERE(jump);
1673 /* Combine two 16 bit characters. */
1674 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1675 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1676 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
1677 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
1678 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
1679 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1680 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1681 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
1682 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1684 #endif /* COMPILE_PCRE16 */
1686 #endif /* COMPILE_PCRE8 */
1688 #endif /* SUPPORT_UTF */
1690 #ifdef SUPPORT_UCP
1692 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
1693 #define UCD_BLOCK_MASK 127
1694 #define UCD_BLOCK_SHIFT 7
1696 static void do_getucd(compiler_common *common)
1698 /* Search the UCD record for the character comes in TMP1.
1699 Returns chartype in TMP1 and UCD offset in TMP2. */
1700 DEFINE_COMPILER;
1702 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
1704 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1705 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1706 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
1707 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
1708 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1709 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
1710 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
1711 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
1712 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
1713 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
1714 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1716 #endif
1718 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
1720 DEFINE_COMPILER;
1721 struct sljit_label *mainloop;
1722 struct sljit_label *newlinelabel = NULL;
1723 struct sljit_jump *start;
1724 struct sljit_jump *end = NULL;
1725 struct sljit_jump *nl = NULL;
1726 #ifdef SUPPORT_UTF
1727 struct sljit_jump *singlechar;
1728 #endif
1729 jump_list *newline = NULL;
1730 BOOL newlinecheck = FALSE;
1731 BOOL readuchar = FALSE;
1733 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
1734 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
1735 newlinecheck = TRUE;
1737 if (firstline)
1739 /* Search for the end of the first line. */
1740 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);
1741 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_END, 0);
1743 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
1745 mainloop = LABEL();
1746 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1747 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1748 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
1749 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1750 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
1751 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
1752 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1754 else
1756 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1757 mainloop = LABEL();
1758 /* Continual stores does not cause data dependency. */
1759 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);
1760 read_char(common);
1761 check_newlinechar(common, common->nltype, &newline, TRUE);
1762 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
1763 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);
1764 set_jumps(newline, LABEL());
1767 JUMPHERE(end);
1768 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
1771 start = JUMP(SLJIT_JUMP);
1773 if (newlinecheck)
1775 newlinelabel = LABEL();
1776 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1777 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1778 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1779 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
1780 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1781 #ifdef COMPILE_PCRE16
1782 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1783 #endif
1784 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1785 nl = JUMP(SLJIT_JUMP);
1788 mainloop = LABEL();
1790 /* Increasing the STR_PTR here requires one less jump in the most common case. */
1791 #ifdef SUPPORT_UTF
1792 if (common->utf) readuchar = TRUE;
1793 #endif
1794 if (newlinecheck) readuchar = TRUE;
1796 if (readuchar)
1797 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1799 if (newlinecheck)
1800 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
1802 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1803 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1804 if (common->utf)
1806 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1807 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
1808 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1809 JUMPHERE(singlechar);
1811 #endif
1812 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1813 if (common->utf)
1815 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1816 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1817 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
1818 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1819 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1820 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1821 JUMPHERE(singlechar);
1823 #endif
1824 JUMPHERE(start);
1826 if (newlinecheck)
1828 JUMPHERE(end);
1829 JUMPHERE(nl);
1832 return mainloop;
1835 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
1837 DEFINE_COMPILER;
1838 struct sljit_label *start;
1839 struct sljit_jump *leave;
1840 struct sljit_jump *found;
1841 pcre_uchar oc, bit;
1843 if (firstline)
1845 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
1846 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
1849 start = LABEL();
1850 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1851 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1853 oc = first_char;
1854 if (caseless)
1856 oc = TABLE_GET(first_char, common->fcc, first_char);
1857 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
1858 if (first_char > 127 && common->utf)
1859 oc = UCD_OTHERCASE(first_char);
1860 #endif
1862 if (first_char == oc)
1863 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
1864 else
1866 bit = first_char ^ oc;
1867 if (ispowerof2(bit))
1869 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
1870 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
1872 else
1874 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
1875 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1876 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
1877 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
1878 found = JUMP(SLJIT_C_NOT_ZERO);
1882 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1883 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1884 if (common->utf)
1886 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
1887 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
1888 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1890 #endif
1891 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1892 if (common->utf)
1894 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
1895 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1896 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
1897 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1898 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1899 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1901 #endif
1902 JUMPTO(SLJIT_JUMP, start);
1903 JUMPHERE(found);
1904 JUMPHERE(leave);
1906 if (firstline)
1907 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1910 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
1912 DEFINE_COMPILER;
1913 struct sljit_label *loop;
1914 struct sljit_jump *lastchar;
1915 struct sljit_jump *firstchar;
1916 struct sljit_jump *leave;
1917 struct sljit_jump *foundcr = NULL;
1918 struct sljit_jump *notfoundnl;
1919 jump_list *newline = NULL;
1921 if (firstline)
1923 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
1924 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
1927 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
1929 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1930 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
1931 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
1932 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
1933 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
1935 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
1936 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
1937 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
1938 #ifdef COMPILE_PCRE16
1939 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
1940 #endif
1941 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1943 loop = LABEL();
1944 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1945 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1946 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
1947 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
1948 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
1949 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
1951 JUMPHERE(leave);
1952 JUMPHERE(firstchar);
1953 JUMPHERE(lastchar);
1955 if (firstline)
1956 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1957 return;
1960 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
1961 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
1962 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
1963 skip_char_back(common);
1965 loop = LABEL();
1966 read_char(common);
1967 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1968 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
1969 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
1970 check_newlinechar(common, common->nltype, &newline, FALSE);
1971 set_jumps(newline, loop);
1973 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
1975 leave = JUMP(SLJIT_JUMP);
1976 JUMPHERE(foundcr);
1977 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1978 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1979 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1980 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1981 #ifdef COMPILE_PCRE16
1982 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1983 #endif
1984 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1985 JUMPHERE(notfoundnl);
1986 JUMPHERE(leave);
1988 JUMPHERE(lastchar);
1989 JUMPHERE(firstchar);
1991 if (firstline)
1992 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1995 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
1997 DEFINE_COMPILER;
1998 struct sljit_label *start;
1999 struct sljit_jump *leave;
2000 struct sljit_jump *found;
2001 #ifndef COMPILE_PCRE8
2002 struct sljit_jump *jump;
2003 #endif
2005 if (firstline)
2007 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2008 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
2011 start = LABEL();
2012 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2013 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2014 #ifdef SUPPORT_UTF
2015 if (common->utf)
2016 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2017 #endif
2018 #ifndef COMPILE_PCRE8
2019 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2020 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2021 JUMPHERE(jump);
2022 #endif
2023 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2024 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2025 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2026 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2027 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2028 found = JUMP(SLJIT_C_NOT_ZERO);
2030 #ifdef SUPPORT_UTF
2031 if (common->utf)
2032 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2033 #endif
2034 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2035 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2036 if (common->utf)
2038 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2039 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2040 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2042 #endif
2043 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2044 if (common->utf)
2046 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2047 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2048 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2049 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2050 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2051 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2053 #endif
2054 JUMPTO(SLJIT_JUMP, start);
2055 JUMPHERE(found);
2056 JUMPHERE(leave);
2058 if (firstline)
2059 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2062 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2064 DEFINE_COMPILER;
2065 struct sljit_label *loop;
2066 struct sljit_jump *toolong;
2067 struct sljit_jump *alreadyfound;
2068 struct sljit_jump *found;
2069 struct sljit_jump *foundoc = NULL;
2070 struct sljit_jump *notfound;
2071 pcre_uchar oc, bit;
2073 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR);
2074 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2075 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2076 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2078 if (has_firstchar)
2079 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2080 else
2081 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2083 loop = LABEL();
2084 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2086 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2087 oc = req_char;
2088 if (caseless)
2090 oc = TABLE_GET(req_char, common->fcc, req_char);
2091 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2092 if (req_char > 127 && common->utf)
2093 oc = UCD_OTHERCASE(req_char);
2094 #endif
2096 if (req_char == oc)
2097 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2098 else
2100 bit = req_char ^ oc;
2101 if (ispowerof2(bit))
2103 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
2104 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
2106 else
2108 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2109 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
2112 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2113 JUMPTO(SLJIT_JUMP, loop);
2115 JUMPHERE(found);
2116 if (foundoc)
2117 JUMPHERE(foundoc);
2118 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR, TMP1, 0);
2119 JUMPHERE(alreadyfound);
2120 JUMPHERE(toolong);
2121 return notfound;
2124 static void do_revertframes(compiler_common *common)
2126 DEFINE_COMPILER;
2127 struct sljit_jump *jump;
2128 struct sljit_label *mainloop;
2130 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2131 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
2133 /* Drop frames until we reach STACK_TOP. */
2134 mainloop = LABEL();
2135 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2136 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2137 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_LOCALS_REG, 0);
2138 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2139 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
2140 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
2141 JUMPTO(SLJIT_JUMP, mainloop);
2143 JUMPHERE(jump);
2144 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2145 /* End of dropping frames. */
2146 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2148 JUMPHERE(jump);
2149 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
2150 /* Set string begin. */
2151 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2152 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2153 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
2154 JUMPTO(SLJIT_JUMP, mainloop);
2156 JUMPHERE(jump);
2157 /* Unknown command. */
2158 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2159 JUMPTO(SLJIT_JUMP, mainloop);
2162 static void check_wordboundary(compiler_common *common)
2164 DEFINE_COMPILER;
2165 struct sljit_jump *beginend;
2166 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
2167 struct sljit_jump *jump;
2168 #endif
2170 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
2172 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize);
2173 /* Get type of the previous char, and put it to LOCALS1. */
2174 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2175 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2176 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
2177 beginend = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
2178 skip_char_back(common);
2179 read_char(common);
2181 /* Testing char type. */
2182 #ifdef SUPPORT_UCP
2183 if (common->use_ucp)
2185 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2186 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2187 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2188 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2189 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2190 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2191 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2192 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2193 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2194 JUMPHERE(jump);
2195 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
2197 else
2198 #endif
2200 #ifndef COMPILE_PCRE8
2201 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2202 #elif defined SUPPORT_UTF
2203 /* Here LOCALS1 has already been zeroed. */
2204 jump = NULL;
2205 if (common->utf)
2206 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2207 #endif /* COMPILE_PCRE8 */
2208 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
2209 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
2210 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2211 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2212 #ifndef COMPILE_PCRE8
2213 JUMPHERE(jump);
2214 #elif defined SUPPORT_UTF
2215 if (jump != NULL)
2216 JUMPHERE(jump);
2217 #endif /* COMPILE_PCRE8 */
2219 JUMPHERE(beginend);
2221 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2222 beginend = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2223 peek_char(common);
2225 /* Testing char type. This is a code duplication. */
2226 #ifdef SUPPORT_UCP
2227 if (common->use_ucp)
2229 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2230 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2231 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2232 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2233 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2234 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2235 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2236 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2237 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2238 JUMPHERE(jump);
2240 else
2241 #endif
2243 #ifndef COMPILE_PCRE8
2244 /* TMP2 may be destroyed by peek_char. */
2245 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2246 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2247 #elif defined SUPPORT_UTF
2248 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2249 jump = NULL;
2250 if (common->utf)
2251 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2252 #endif
2253 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
2254 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
2255 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2256 #ifndef COMPILE_PCRE8
2257 JUMPHERE(jump);
2258 #elif defined SUPPORT_UTF
2259 if (jump != NULL)
2260 JUMPHERE(jump);
2261 #endif /* COMPILE_PCRE8 */
2263 JUMPHERE(beginend);
2265 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2266 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2269 static void check_anynewline(compiler_common *common)
2271 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2272 DEFINE_COMPILER;
2274 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2276 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2277 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2278 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2279 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2280 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2281 #ifdef COMPILE_PCRE8
2282 if (common->utf)
2284 #endif
2285 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2286 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2287 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2288 #ifdef COMPILE_PCRE8
2290 #endif
2291 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2292 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2293 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2296 static void check_hspace(compiler_common *common)
2298 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2299 DEFINE_COMPILER;
2301 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2303 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
2304 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2305 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2306 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2307 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
2308 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2309 #ifdef COMPILE_PCRE8
2310 if (common->utf)
2312 #endif
2313 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2314 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
2315 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2316 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
2317 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2318 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
2319 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
2320 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2321 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
2322 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2323 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
2324 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2325 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
2326 #ifdef COMPILE_PCRE8
2328 #endif
2329 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2330 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2332 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2335 static void check_vspace(compiler_common *common)
2337 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2338 DEFINE_COMPILER;
2340 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2342 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2343 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2344 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2345 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2346 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2347 #ifdef COMPILE_PCRE8
2348 if (common->utf)
2350 #endif
2351 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2352 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2353 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2354 #ifdef COMPILE_PCRE8
2356 #endif
2357 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2358 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2360 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2363 #define CHAR1 STR_END
2364 #define CHAR2 STACK_TOP
2366 static void do_casefulcmp(compiler_common *common)
2368 DEFINE_COMPILER;
2369 struct sljit_jump *jump;
2370 struct sljit_label *label;
2372 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2373 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2374 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
2375 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
2376 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2377 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2379 label = LABEL();
2380 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
2381 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2382 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2383 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2384 JUMPTO(SLJIT_C_NOT_ZERO, label);
2386 JUMPHERE(jump);
2387 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2388 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
2389 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2390 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2393 #define LCC_TABLE STACK_LIMIT
2395 static void do_caselesscmp(compiler_common *common)
2397 DEFINE_COMPILER;
2398 struct sljit_jump *jump;
2399 struct sljit_label *label;
2401 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2402 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2404 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
2405 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
2406 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
2407 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
2408 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2409 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2411 label = LABEL();
2412 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
2413 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2414 #ifndef COMPILE_PCRE8
2415 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
2416 #endif
2417 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
2418 #ifndef COMPILE_PCRE8
2419 JUMPHERE(jump);
2420 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
2421 #endif
2422 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
2423 #ifndef COMPILE_PCRE8
2424 JUMPHERE(jump);
2425 #endif
2426 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2427 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2428 JUMPTO(SLJIT_C_NOT_ZERO, label);
2430 JUMPHERE(jump);
2431 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2432 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
2433 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2434 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2435 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2438 #undef LCC_TABLE
2439 #undef CHAR1
2440 #undef CHAR2
2442 #if defined SUPPORT_UTF && defined SUPPORT_UCP
2444 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
2446 /* This function would be ineffective to do in JIT level. */
2447 int c1, c2;
2448 const pcre_uchar *src2 = args->ptr;
2449 const pcre_uchar *end2 = args->end;
2451 while (src1 < end1)
2453 if (src2 >= end2)
2454 return 0;
2455 GETCHARINC(c1, src1);
2456 GETCHARINC(c2, src2);
2457 if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return 0;
2459 return src2;
2462 #endif /* SUPPORT_UTF && SUPPORT_UCP */
2464 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
2465 compare_context* context, jump_list **fallbacks)
2467 DEFINE_COMPILER;
2468 unsigned int othercasebit = 0;
2469 pcre_uchar *othercasechar = NULL;
2470 #ifdef SUPPORT_UTF
2471 int utflength;
2472 #endif
2474 if (caseless && char_has_othercase(common, cc))
2476 othercasebit = char_get_othercase_bit(common, cc);
2477 SLJIT_ASSERT(othercasebit);
2478 /* Extracting bit difference info. */
2479 #ifdef COMPILE_PCRE8
2480 othercasechar = cc + (othercasebit >> 8);
2481 othercasebit &= 0xff;
2482 #else
2483 #ifdef COMPILE_PCRE16
2484 othercasechar = cc + (othercasebit >> 9);
2485 if ((othercasebit & 0x100) != 0)
2486 othercasebit = (othercasebit & 0xff) << 8;
2487 else
2488 othercasebit &= 0xff;
2489 #endif
2490 #endif
2493 if (context->sourcereg == -1)
2495 #ifdef COMPILE_PCRE8
2496 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2497 if (context->length >= 4)
2498 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2499 else if (context->length >= 2)
2500 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2501 else
2502 #endif
2503 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2504 #else
2505 #ifdef COMPILE_PCRE16
2506 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2507 if (context->length >= 4)
2508 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2509 else
2510 #endif
2511 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2512 #endif
2513 #endif /* COMPILE_PCRE8 */
2514 context->sourcereg = TMP2;
2517 #ifdef SUPPORT_UTF
2518 utflength = 1;
2519 if (common->utf && HAS_EXTRALEN(*cc))
2520 utflength += GET_EXTRALEN(*cc);
2524 #endif
2526 context->length -= IN_UCHARS(1);
2527 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2529 /* Unaligned read is supported. */
2530 if (othercasebit != 0 && othercasechar == cc)
2532 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
2533 context->oc.asuchars[context->ucharptr] = othercasebit;
2535 else
2537 context->c.asuchars[context->ucharptr] = *cc;
2538 context->oc.asuchars[context->ucharptr] = 0;
2540 context->ucharptr++;
2542 #ifdef COMPILE_PCRE8
2543 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
2544 #else
2545 if (context->ucharptr >= 2 || context->length == 0)
2546 #endif
2548 if (context->length >= 4)
2549 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2550 #ifdef COMPILE_PCRE8
2551 else if (context->length >= 2)
2552 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2553 else if (context->length >= 1)
2554 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2555 #else
2556 else if (context->length >= 2)
2557 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2558 #endif
2559 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2561 switch(context->ucharptr)
2563 case 4 / sizeof(pcre_uchar):
2564 if (context->oc.asint != 0)
2565 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
2566 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
2567 break;
2569 case 2 / sizeof(pcre_uchar):
2570 if (context->oc.asushort != 0)
2571 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
2572 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
2573 break;
2575 #ifdef COMPILE_PCRE8
2576 case 1:
2577 if (context->oc.asbyte != 0)
2578 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
2579 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
2580 break;
2581 #endif
2583 default:
2584 SLJIT_ASSERT_STOP();
2585 break;
2587 context->ucharptr = 0;
2590 #else
2592 /* Unaligned read is unsupported. */
2593 #ifdef COMPILE_PCRE8
2594 if (context->length > 0)
2595 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2596 #else
2597 if (context->length > 0)
2598 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2599 #endif
2600 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2602 if (othercasebit != 0 && othercasechar == cc)
2604 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
2605 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
2607 else
2608 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
2610 #endif
2612 cc++;
2613 #ifdef SUPPORT_UTF
2614 utflength--;
2616 while (utflength > 0);
2617 #endif
2619 return cc;
2622 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2624 #define SET_TYPE_OFFSET(value) \
2625 if ((value) != typeoffset) \
2627 if ((value) > typeoffset) \
2628 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
2629 else \
2630 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
2632 typeoffset = (value);
2634 #define SET_CHAR_OFFSET(value) \
2635 if ((value) != charoffset) \
2637 if ((value) > charoffset) \
2638 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
2639 else \
2640 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
2642 charoffset = (value);
2644 static void compile_xclass_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
2646 DEFINE_COMPILER;
2647 jump_list *found = NULL;
2648 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : fallbacks;
2649 unsigned int c;
2650 int compares;
2651 struct sljit_jump *jump = NULL;
2652 pcre_uchar *ccbegin;
2653 #ifdef SUPPORT_UCP
2654 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
2655 BOOL charsaved = FALSE;
2656 int typereg = TMP1, scriptreg = TMP1;
2657 unsigned int typeoffset;
2658 #endif
2659 int invertcmp, numberofcmps;
2660 unsigned int charoffset;
2662 /* Although SUPPORT_UTF must be defined, we are not necessary in utf mode. */
2663 check_input_end(common, fallbacks);
2664 read_char(common);
2666 if ((*cc++ & XCL_MAP) != 0)
2668 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2669 #ifndef COMPILE_PCRE8
2670 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2671 #elif defined SUPPORT_UTF
2672 if (common->utf)
2673 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2674 #endif
2676 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2677 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2678 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
2679 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2680 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2681 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
2683 #ifndef COMPILE_PCRE8
2684 JUMPHERE(jump);
2685 #elif defined SUPPORT_UTF
2686 if (common->utf)
2687 JUMPHERE(jump);
2688 #endif
2689 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2690 #ifdef SUPPORT_UCP
2691 charsaved = TRUE;
2692 #endif
2693 cc += 32 / sizeof(pcre_uchar);
2696 /* Scanning the necessary info. */
2697 ccbegin = cc;
2698 compares = 0;
2699 while (*cc != XCL_END)
2701 compares++;
2702 if (*cc == XCL_SINGLE)
2704 cc += 2;
2705 #ifdef SUPPORT_UTF
2706 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2707 #endif
2708 #ifdef SUPPORT_UCP
2709 needschar = TRUE;
2710 #endif
2712 else if (*cc == XCL_RANGE)
2714 cc += 2;
2715 #ifdef SUPPORT_UTF
2716 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2717 #endif
2718 cc++;
2719 #ifdef SUPPORT_UTF
2720 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2721 #endif
2722 #ifdef SUPPORT_UCP
2723 needschar = TRUE;
2724 #endif
2726 #ifdef SUPPORT_UCP
2727 else
2729 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
2730 cc++;
2731 switch(*cc)
2733 case PT_ANY:
2734 break;
2736 case PT_LAMP:
2737 case PT_GC:
2738 case PT_PC:
2739 case PT_ALNUM:
2740 needstype = TRUE;
2741 break;
2743 case PT_SC:
2744 needsscript = TRUE;
2745 break;
2747 case PT_SPACE:
2748 case PT_PXSPACE:
2749 case PT_WORD:
2750 needstype = TRUE;
2751 needschar = TRUE;
2752 break;
2754 default:
2755 SLJIT_ASSERT_STOP();
2756 break;
2758 cc += 2;
2760 #endif
2763 #ifdef SUPPORT_UCP
2764 /* Simple register allocation. TMP1 is preferred if possible. */
2765 if (needstype || needsscript)
2767 if (needschar && !charsaved)
2768 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2769 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2770 if (needschar)
2772 if (needstype)
2774 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2775 typereg = RETURN_ADDR;
2778 if (needsscript)
2779 scriptreg = TMP3;
2780 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2782 else if (needstype && needsscript)
2783 scriptreg = TMP3;
2784 /* In all other cases only one of them was specified, and that can goes to TMP1. */
2786 if (needsscript)
2788 if (scriptreg == TMP1)
2790 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
2791 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
2793 else
2795 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
2796 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
2797 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
2801 #endif
2803 /* Generating code. */
2804 cc = ccbegin;
2805 charoffset = 0;
2806 numberofcmps = 0;
2807 #ifdef SUPPORT_UCP
2808 typeoffset = 0;
2809 #endif
2811 while (*cc != XCL_END)
2813 compares--;
2814 invertcmp = (compares == 0 && list != fallbacks);
2815 jump = NULL;
2817 if (*cc == XCL_SINGLE)
2819 cc ++;
2820 #ifdef SUPPORT_UTF
2821 if (common->utf)
2823 GETCHARINC(c, cc);
2825 else
2826 #endif
2827 c = *cc++;
2829 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
2831 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2832 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2833 numberofcmps++;
2835 else if (numberofcmps > 0)
2837 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2838 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2839 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2840 numberofcmps = 0;
2842 else
2844 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
2845 numberofcmps = 0;
2848 else if (*cc == XCL_RANGE)
2850 cc ++;
2851 #ifdef SUPPORT_UTF
2852 if (common->utf)
2854 GETCHARINC(c, cc);
2856 else
2857 #endif
2858 c = *cc++;
2859 SET_CHAR_OFFSET(c);
2860 #ifdef SUPPORT_UTF
2861 if (common->utf)
2863 GETCHARINC(c, cc);
2865 else
2866 #endif
2867 c = *cc++;
2868 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
2870 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2871 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2872 numberofcmps++;
2874 else if (numberofcmps > 0)
2876 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2877 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
2878 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2879 numberofcmps = 0;
2881 else
2883 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
2884 numberofcmps = 0;
2887 #ifdef SUPPORT_UCP
2888 else
2890 if (*cc == XCL_NOTPROP)
2891 invertcmp ^= 0x1;
2892 cc++;
2893 switch(*cc)
2895 case PT_ANY:
2896 if (list != fallbacks)
2898 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
2899 continue;
2901 else if (cc[-1] == XCL_NOTPROP)
2902 continue;
2903 jump = JUMP(SLJIT_JUMP);
2904 break;
2906 case PT_LAMP:
2907 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
2908 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2909 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
2910 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2911 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
2912 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2913 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2914 break;
2916 case PT_GC:
2917 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
2918 SET_TYPE_OFFSET(c);
2919 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
2920 break;
2922 case PT_PC:
2923 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
2924 break;
2926 case PT_SC:
2927 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
2928 break;
2930 case PT_SPACE:
2931 case PT_PXSPACE:
2932 if (*cc == PT_SPACE)
2934 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2935 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
2937 SET_CHAR_OFFSET(9);
2938 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
2939 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2940 if (*cc == PT_SPACE)
2941 JUMPHERE(jump);
2943 SET_TYPE_OFFSET(ucp_Zl);
2944 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
2945 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
2946 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2947 break;
2949 case PT_WORD:
2950 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
2951 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2952 /* ... fall through */
2954 case PT_ALNUM:
2955 SET_TYPE_OFFSET(ucp_Ll);
2956 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2957 COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2958 SET_TYPE_OFFSET(ucp_Nd);
2959 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2960 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
2961 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2962 break;
2964 cc += 2;
2966 #endif
2968 if (jump != NULL)
2969 add_jump(compiler, compares > 0 ? list : fallbacks, jump);
2972 if (found != NULL)
2973 set_jumps(found, LABEL());
2976 #undef SET_TYPE_OFFSET
2977 #undef SET_CHAR_OFFSET
2979 #endif
2981 static pcre_uchar *compile_char1_hotpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **fallbacks)
2983 DEFINE_COMPILER;
2984 int length;
2985 unsigned int c, oc, bit;
2986 compare_context context;
2987 struct sljit_jump *jump[4];
2988 #ifdef SUPPORT_UTF
2989 struct sljit_label *label;
2990 #ifdef SUPPORT_UCP
2991 pcre_uchar propdata[5];
2992 #endif
2993 #endif
2995 switch(type)
2997 case OP_SOD:
2998 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2999 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3000 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3001 return cc;
3003 case OP_SOM:
3004 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3005 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3006 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3007 return cc;
3009 case OP_NOT_WORD_BOUNDARY:
3010 case OP_WORD_BOUNDARY:
3011 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
3012 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3013 return cc;
3015 case OP_NOT_DIGIT:
3016 case OP_DIGIT:
3017 check_input_end(common, fallbacks);
3018 read_char8_type(common);
3019 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
3020 add_jump(compiler, fallbacks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3021 return cc;
3023 case OP_NOT_WHITESPACE:
3024 case OP_WHITESPACE:
3025 check_input_end(common, fallbacks);
3026 read_char8_type(common);
3027 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
3028 add_jump(compiler, fallbacks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3029 return cc;
3031 case OP_NOT_WORDCHAR:
3032 case OP_WORDCHAR:
3033 check_input_end(common, fallbacks);
3034 read_char8_type(common);
3035 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
3036 add_jump(compiler, fallbacks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3037 return cc;
3039 case OP_ANY:
3040 check_input_end(common, fallbacks);
3041 read_char(common);
3042 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3044 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
3045 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3046 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3047 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
3048 JUMPHERE(jump[1]);
3049 JUMPHERE(jump[0]);
3051 else
3052 check_newlinechar(common, common->nltype, fallbacks, TRUE);
3053 return cc;
3055 case OP_ALLANY:
3056 check_input_end(common, fallbacks);
3057 #ifdef SUPPORT_UTF
3058 if (common->utf)
3060 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3061 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3062 #ifdef COMPILE_PCRE8
3063 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3064 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3065 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3066 #else /* COMPILE_PCRE8 */
3067 #ifdef COMPILE_PCRE16
3068 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3069 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3070 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3071 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
3072 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3073 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3074 #endif /* COMPILE_PCRE16 */
3075 #endif /* COMPILE_PCRE8 */
3076 JUMPHERE(jump[0]);
3077 return cc;
3079 #endif
3080 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3081 return cc;
3083 case OP_ANYBYTE:
3084 check_input_end(common, fallbacks);
3085 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3086 return cc;
3088 #ifdef SUPPORT_UTF
3089 #ifdef SUPPORT_UCP
3090 case OP_NOTPROP:
3091 case OP_PROP:
3092 propdata[0] = 0;
3093 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
3094 propdata[2] = cc[0];
3095 propdata[3] = cc[1];
3096 propdata[4] = XCL_END;
3097 compile_xclass_hotpath(common, propdata, fallbacks);
3098 return cc + 2;
3099 #endif
3100 #endif
3102 case OP_ANYNL:
3103 check_input_end(common, fallbacks);
3104 read_char(common);
3105 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3106 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3107 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3108 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3109 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3110 jump[3] = JUMP(SLJIT_JUMP);
3111 JUMPHERE(jump[0]);
3112 check_newlinechar(common, common->bsr_nltype, fallbacks, FALSE);
3113 JUMPHERE(jump[1]);
3114 JUMPHERE(jump[2]);
3115 JUMPHERE(jump[3]);
3116 return cc;
3118 case OP_NOT_HSPACE:
3119 case OP_HSPACE:
3120 check_input_end(common, fallbacks);
3121 read_char(common);
3122 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
3123 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3124 return cc;
3126 case OP_NOT_VSPACE:
3127 case OP_VSPACE:
3128 check_input_end(common, fallbacks);
3129 read_char(common);
3130 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
3131 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3132 return cc;
3134 #ifdef SUPPORT_UCP
3135 case OP_EXTUNI:
3136 check_input_end(common, fallbacks);
3137 read_char(common);
3138 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3139 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3140 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc));
3142 label = LABEL();
3143 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3144 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3145 read_char(common);
3146 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3147 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3148 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label);
3150 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3151 JUMPHERE(jump[0]);
3152 return cc;
3153 #endif
3155 case OP_EODN:
3156 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3157 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3159 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3160 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3161 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3162 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3163 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3164 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3166 else if (common->nltype == NLTYPE_FIXED)
3168 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3169 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3170 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3171 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3173 else
3175 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3176 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3177 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3178 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
3179 jump[2] = JUMP(SLJIT_C_GREATER);
3180 add_jump(compiler, fallbacks, JUMP(SLJIT_C_LESS));
3181 /* Equal. */
3182 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3183 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3184 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3186 JUMPHERE(jump[1]);
3187 if (common->nltype == NLTYPE_ANYCRLF)
3189 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3190 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
3191 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3193 else
3195 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
3196 read_char(common);
3197 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
3198 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3199 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3200 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3202 JUMPHERE(jump[2]);
3203 JUMPHERE(jump[3]);
3205 JUMPHERE(jump[0]);
3206 return cc;
3208 case OP_EOD:
3209 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
3210 return cc;
3212 case OP_CIRC:
3213 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3214 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3215 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
3216 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3217 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3218 return cc;
3220 case OP_CIRCM:
3221 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3222 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3223 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
3224 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3225 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3226 jump[0] = JUMP(SLJIT_JUMP);
3227 JUMPHERE(jump[1]);
3229 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, STR_PTR, 0, STR_END, 0));
3230 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3232 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3233 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
3234 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3235 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3236 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3237 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3239 else
3241 skip_char_back(common);
3242 read_char(common);
3243 check_newlinechar(common, common->nltype, fallbacks, FALSE);
3245 JUMPHERE(jump[0]);
3246 return cc;
3248 case OP_DOLL:
3249 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3250 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3251 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3253 if (!common->endonly)
3254 compile_char1_hotpath(common, OP_EODN, cc, fallbacks);
3255 else
3256 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
3257 return cc;
3259 case OP_DOLLM:
3260 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3261 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3262 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3263 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3264 jump[0] = JUMP(SLJIT_JUMP);
3265 JUMPHERE(jump[1]);
3267 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3269 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3270 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
3271 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3272 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3273 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3274 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3276 else
3278 peek_char(common);
3279 check_newlinechar(common, common->nltype, fallbacks, FALSE);
3281 JUMPHERE(jump[0]);
3282 return cc;
3284 case OP_CHAR:
3285 case OP_CHARI:
3286 length = 1;
3287 #ifdef SUPPORT_UTF
3288 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
3289 #endif
3290 if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
3292 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3293 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3295 context.length = IN_UCHARS(length);
3296 context.sourcereg = -1;
3297 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3298 context.ucharptr = 0;
3299 #endif
3300 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, fallbacks);
3302 check_input_end(common, fallbacks);
3303 read_char(common);
3304 #ifdef SUPPORT_UTF
3305 if (common->utf)
3307 GETCHAR(c, cc);
3309 else
3310 #endif
3311 c = *cc;
3312 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
3313 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3314 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_othercase(common, c));
3315 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3316 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3317 return cc + length;
3319 case OP_NOT:
3320 case OP_NOTI:
3321 check_input_end(common, fallbacks);
3322 length = 1;
3323 #ifdef SUPPORT_UTF
3324 if (common->utf)
3326 #ifdef COMPILE_PCRE8
3327 c = *cc;
3328 if (c < 128)
3330 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3331 if (type == OP_NOT || !char_has_othercase(common, cc))
3332 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3333 else
3335 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
3336 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
3337 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
3339 /* Skip the variable-length character. */
3340 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3341 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3342 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3343 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3344 JUMPHERE(jump[0]);
3345 return cc + 1;
3347 else
3348 #endif /* COMPILE_PCRE8 */
3350 GETCHARLEN(c, cc, length);
3351 read_char(common);
3354 else
3355 #endif /* SUPPORT_UTF */
3357 read_char(common);
3358 c = *cc;
3361 if (type == OP_NOT || !char_has_othercase(common, cc))
3362 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3363 else
3365 oc = char_othercase(common, c);
3366 bit = c ^ oc;
3367 if (ispowerof2(bit))
3369 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3370 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3372 else
3374 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3375 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
3378 return cc + 1;
3380 case OP_CLASS:
3381 case OP_NCLASS:
3382 check_input_end(common, fallbacks);
3383 read_char(common);
3384 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3385 jump[0] = NULL;
3386 #ifdef COMPILE_PCRE8
3387 /* This check only affects 8 bit mode. In other modes, we
3388 always need to compare the value with 255. */
3389 if (common->utf)
3390 #endif /* COMPILE_PCRE8 */
3392 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3393 if (type == OP_CLASS)
3395 add_jump(compiler, fallbacks, jump[0]);
3396 jump[0] = NULL;
3399 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
3400 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3401 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3402 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3403 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3404 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3405 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3406 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3407 if (jump[0] != NULL)
3408 JUMPHERE(jump[0]);
3409 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
3410 return cc + 32 / sizeof(pcre_uchar);
3412 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3413 case OP_XCLASS:
3414 compile_xclass_hotpath(common, cc + LINK_SIZE, fallbacks);
3415 return cc + GET(cc, 0) - 1;
3416 #endif
3418 case OP_REVERSE:
3419 length = GET(cc, 0);
3420 SLJIT_ASSERT(length > 0);
3421 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3422 #ifdef SUPPORT_UTF
3423 if (common->utf)
3425 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3426 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
3427 label = LABEL();
3428 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
3429 skip_char_back(common);
3430 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3431 JUMPTO(SLJIT_C_NOT_ZERO, label);
3432 return cc + LINK_SIZE;
3434 #endif
3435 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3436 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3437 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
3438 return cc + LINK_SIZE;
3440 SLJIT_ASSERT_STOP();
3441 return cc;
3444 static SLJIT_INLINE pcre_uchar *compile_charn_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **fallbacks)
3446 /* This function consumes at least one input character. */
3447 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
3448 DEFINE_COMPILER;
3449 pcre_uchar *ccbegin = cc;
3450 compare_context context;
3451 int size;
3453 context.length = 0;
3456 if (cc >= ccend)
3457 break;
3459 if (*cc == OP_CHAR)
3461 size = 1;
3462 #ifdef SUPPORT_UTF
3463 if (common->utf && HAS_EXTRALEN(cc[1]))
3464 size += GET_EXTRALEN(cc[1]);
3465 #endif
3467 else if (*cc == OP_CHARI)
3469 size = 1;
3470 #ifdef SUPPORT_UTF
3471 if (common->utf)
3473 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3474 size = 0;
3475 else if (HAS_EXTRALEN(cc[1]))
3476 size += GET_EXTRALEN(cc[1]);
3478 else
3479 #endif
3480 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3481 size = 0;
3483 else
3484 size = 0;
3486 cc += 1 + size;
3487 context.length += IN_UCHARS(size);
3489 while (size > 0 && context.length <= 128);
3491 cc = ccbegin;
3492 if (context.length > 0)
3494 /* We have a fixed-length byte sequence. */
3495 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
3496 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3498 context.sourcereg = -1;
3499 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3500 context.ucharptr = 0;
3501 #endif
3502 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, fallbacks); while (context.length > 0);
3503 return cc;
3506 /* A non-fixed length character will be checked if length == 0. */
3507 return compile_char1_hotpath(common, *cc, cc + 1, fallbacks);
3510 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
3512 DEFINE_COMPILER;
3513 int offset = GET2(cc, 1) << 1;
3515 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3516 if (!common->jscript_compat)
3518 if (fallbacks == NULL)
3520 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
3521 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3522 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3523 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3524 return JUMP(SLJIT_C_NOT_ZERO);
3526 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3528 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3531 /* Forward definitions. */
3532 static void compile_hotpath(compiler_common *, pcre_uchar *, pcre_uchar *, fallback_common *);
3533 static void compile_fallbackpath(compiler_common *, struct fallback_common *);
3535 #define PUSH_FALLBACK(size, ccstart, error) \
3536 do \
3538 fallback = sljit_alloc_memory(compiler, (size)); \
3539 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3540 return error; \
3541 memset(fallback, 0, size); \
3542 fallback->prev = parent->top; \
3543 fallback->cc = (ccstart); \
3544 parent->top = fallback; \
3546 while (0)
3548 #define PUSH_FALLBACK_NOVALUE(size, ccstart) \
3549 do \
3551 fallback = sljit_alloc_memory(compiler, (size)); \
3552 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3553 return; \
3554 memset(fallback, 0, size); \
3555 fallback->prev = parent->top; \
3556 fallback->cc = (ccstart); \
3557 parent->top = fallback; \
3559 while (0)
3561 #define FALLBACK_AS(type) ((type*)fallback)
3563 static pcre_uchar *compile_ref_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks, BOOL withchecks, BOOL emptyfail)
3565 DEFINE_COMPILER;
3566 int offset = GET2(cc, 1) << 1;
3567 struct sljit_jump *jump = NULL;
3569 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3570 if (withchecks && !common->jscript_compat)
3571 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3573 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3574 if (common->utf && *cc == OP_REFI)
3576 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
3577 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3578 if (withchecks)
3579 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
3581 /* Needed to save important temporary registers. */
3582 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
3583 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
3584 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, ptr), STR_PTR, 0);
3585 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
3586 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3587 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
3588 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
3590 else
3591 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3593 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
3594 if (withchecks)
3595 jump = JUMP(SLJIT_C_ZERO);
3596 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3598 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3599 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3600 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3603 if (jump != NULL)
3605 if (emptyfail)
3606 add_jump(compiler, fallbacks, jump);
3607 else
3608 JUMPHERE(jump);
3610 return cc + 1 + IMM2_SIZE;
3613 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
3615 DEFINE_COMPILER;
3616 fallback_common *fallback;
3617 pcre_uchar type;
3618 struct sljit_label *label;
3619 struct sljit_jump *zerolength;
3620 struct sljit_jump *jump = NULL;
3621 pcre_uchar *ccbegin = cc;
3622 int min = 0, max = 0;
3623 BOOL minimize;
3625 PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL);
3627 type = cc[1 + IMM2_SIZE];
3628 minimize = (type & 0x1) != 0;
3629 switch(type)
3631 case OP_CRSTAR:
3632 case OP_CRMINSTAR:
3633 min = 0;
3634 max = 0;
3635 cc += 1 + IMM2_SIZE + 1;
3636 break;
3637 case OP_CRPLUS:
3638 case OP_CRMINPLUS:
3639 min = 1;
3640 max = 0;
3641 cc += 1 + IMM2_SIZE + 1;
3642 break;
3643 case OP_CRQUERY:
3644 case OP_CRMINQUERY:
3645 min = 0;
3646 max = 1;
3647 cc += 1 + IMM2_SIZE + 1;
3648 break;
3649 case OP_CRRANGE:
3650 case OP_CRMINRANGE:
3651 min = GET2(cc, 1 + IMM2_SIZE + 1);
3652 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
3653 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
3654 break;
3655 default:
3656 SLJIT_ASSERT_STOP();
3657 break;
3660 if (!minimize)
3662 if (min == 0)
3664 allocate_stack(common, 2);
3665 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3666 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
3667 /* Temporary release of STR_PTR. */
3668 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3669 zerolength = compile_ref_checks(common, ccbegin, NULL);
3670 /* Restore if not zero length. */
3671 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3673 else
3675 allocate_stack(common, 1);
3676 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3677 zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
3680 if (min > 1 || max > 1)
3681 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
3683 label = LABEL();
3684 compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, FALSE, FALSE);
3686 if (min > 1 || max > 1)
3688 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3689 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3690 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
3691 if (min > 1)
3692 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
3693 if (max > 1)
3695 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
3696 allocate_stack(common, 1);
3697 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3698 JUMPTO(SLJIT_JUMP, label);
3699 JUMPHERE(jump);
3703 if (max == 0)
3705 /* Includes min > 1 case as well. */
3706 allocate_stack(common, 1);
3707 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3708 JUMPTO(SLJIT_JUMP, label);
3711 JUMPHERE(zerolength);
3712 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
3714 decrease_call_count(common);
3715 return cc;
3718 allocate_stack(common, 2);
3719 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3720 if (type != OP_CRMINSTAR)
3721 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
3723 if (min == 0)
3725 zerolength = compile_ref_checks(common, ccbegin, NULL);
3726 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3727 jump = JUMP(SLJIT_JUMP);
3729 else
3730 zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
3732 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
3733 if (max > 0)
3734 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
3736 compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, TRUE, TRUE);
3737 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3739 if (min > 1)
3741 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
3742 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3743 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
3744 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, FALLBACK_AS(iterator_fallback)->hotpath);
3746 else if (max > 0)
3747 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
3749 if (jump != NULL)
3750 JUMPHERE(jump);
3751 JUMPHERE(zerolength);
3753 decrease_call_count(common);
3754 return cc;
3757 static SLJIT_INLINE pcre_uchar *compile_recurse_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
3759 DEFINE_COMPILER;
3760 fallback_common *fallback;
3761 recurse_entry *entry = common->entries;
3762 recurse_entry *prev = NULL;
3763 int start = GET(cc, 1);
3765 PUSH_FALLBACK(sizeof(recurse_fallback), cc, NULL);
3766 while (entry != NULL)
3768 if (entry->start == start)
3769 break;
3770 prev = entry;
3771 entry = entry->next;
3774 if (entry == NULL)
3776 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
3777 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3778 return NULL;
3779 entry->next = NULL;
3780 entry->entry = NULL;
3781 entry->calls = NULL;
3782 entry->start = start;
3784 if (prev != NULL)
3785 prev->next = entry;
3786 else
3787 common->entries = entry;
3790 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
3791 allocate_stack(common, 1);
3792 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
3794 if (entry->entry == NULL)
3795 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
3796 else
3797 JUMPTO(SLJIT_FAST_CALL, entry->entry);
3798 /* Leave if the match is failed. */
3799 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
3800 return cc + 1 + LINK_SIZE;
3803 static pcre_uchar *compile_assert_hotpath(compiler_common *common, pcre_uchar *cc, assert_fallback *fallback, BOOL conditional)
3805 DEFINE_COMPILER;
3806 int framesize;
3807 int localptr;
3808 fallback_common altfallback;
3809 pcre_uchar *ccbegin;
3810 pcre_uchar opcode;
3811 pcre_uchar bra = OP_BRA;
3812 jump_list *tmp = NULL;
3813 jump_list **target = (conditional) ? &fallback->condfailed : &fallback->common.topfallbacks;
3814 jump_list **found;
3815 /* Saving previous accept variables. */
3816 struct sljit_label *save_acceptlabel = common->acceptlabel;
3817 struct sljit_jump *jump;
3818 struct sljit_jump *brajump = NULL;
3819 jump_list *save_accept = common->accept;
3821 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
3823 SLJIT_ASSERT(!conditional);
3824 bra = *cc;
3825 cc++;
3827 localptr = PRIV_DATA(cc);
3828 SLJIT_ASSERT(localptr != 0);
3829 framesize = get_framesize(common, cc, FALSE);
3830 fallback->framesize = framesize;
3831 fallback->localptr = localptr;
3832 opcode = *cc;
3833 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
3834 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
3835 ccbegin = cc;
3836 cc += GET(cc, 1);
3838 if (bra == OP_BRAMINZERO)
3840 /* This is a braminzero fallback path. */
3841 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3842 free_stack(common, 1);
3843 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
3846 if (framesize < 0)
3848 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
3849 allocate_stack(common, 1);
3850 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3852 else
3854 allocate_stack(common, framesize + 2);
3855 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3856 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
3857 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
3858 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3859 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
3860 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
3863 memset(&altfallback, 0, sizeof(fallback_common));
3864 while (1)
3866 common->acceptlabel = NULL;
3867 common->accept = NULL;
3868 altfallback.top = NULL;
3869 altfallback.topfallbacks = NULL;
3871 if (*ccbegin == OP_ALT)
3872 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3874 altfallback.cc = ccbegin;
3875 compile_hotpath(common, ccbegin + 1 + LINK_SIZE, cc, &altfallback);
3876 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3878 common->acceptlabel = save_acceptlabel;
3879 common->accept = save_accept;
3880 return NULL;
3882 common->acceptlabel = LABEL();
3883 if (common->accept != NULL)
3884 set_jumps(common->accept, common->acceptlabel);
3886 /* Reset stack. */
3887 if (framesize < 0)
3888 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3889 else {
3890 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
3892 /* We don't need to keep the STR_PTR, only the previous localptr. */
3893 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
3895 else
3897 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3898 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
3902 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
3904 /* We know that STR_PTR was stored on the top of the stack. */
3905 if (conditional)
3906 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3907 else if (bra == OP_BRAZERO)
3909 if (framesize < 0)
3910 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3911 else
3913 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
3914 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w));
3915 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
3917 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3918 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3920 else if (framesize >= 0)
3922 /* For OP_BRA and OP_BRAMINZERO. */
3923 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
3926 add_jump(compiler, found, JUMP(SLJIT_JUMP));
3928 compile_fallbackpath(common, altfallback.top);
3929 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3931 common->acceptlabel = save_acceptlabel;
3932 common->accept = save_accept;
3933 return NULL;
3935 set_jumps(altfallback.topfallbacks, LABEL());
3937 if (*cc != OP_ALT)
3938 break;
3940 ccbegin = cc;
3941 cc += GET(cc, 1);
3943 /* None of them matched. */
3945 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
3947 /* Assert is failed. */
3948 if (conditional || bra == OP_BRAZERO)
3949 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3951 if (framesize < 0)
3953 /* The topmost item should be 0. */
3954 if (bra == OP_BRAZERO)
3955 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3956 else
3957 free_stack(common, 1);
3959 else
3961 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
3962 /* The topmost item should be 0. */
3963 if (bra == OP_BRAZERO)
3965 free_stack(common, framesize + 1);
3966 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3968 else
3969 free_stack(common, framesize + 2);
3970 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
3972 jump = JUMP(SLJIT_JUMP);
3973 if (bra != OP_BRAZERO)
3974 add_jump(compiler, target, jump);
3976 /* Assert is successful. */
3977 set_jumps(tmp, LABEL());
3978 if (framesize < 0)
3980 /* We know that STR_PTR was stored on the top of the stack. */
3981 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3982 /* Keep the STR_PTR on the top of the stack. */
3983 if (bra == OP_BRAZERO)
3984 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3985 else if (bra == OP_BRAMINZERO)
3987 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3988 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3991 else
3993 if (bra == OP_BRA)
3995 /* We don't need to keep the STR_PTR, only the previous localptr. */
3996 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
3997 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3999 else
4001 /* We don't need to keep the STR_PTR, only the previous localptr. */
4002 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w));
4003 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4004 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
4008 if (bra == OP_BRAZERO)
4010 fallback->hotpath = LABEL();
4011 sljit_set_label(jump, fallback->hotpath);
4013 else if (bra == OP_BRAMINZERO)
4015 JUMPTO(SLJIT_JUMP, fallback->hotpath);
4016 JUMPHERE(brajump);
4017 if (framesize >= 0)
4019 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4020 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4021 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4023 set_jumps(fallback->common.topfallbacks, LABEL());
4026 else
4028 /* AssertNot is successful. */
4029 if (framesize < 0)
4031 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4032 if (bra != OP_BRA)
4033 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4034 else
4035 free_stack(common, 1);
4037 else
4039 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4040 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4041 /* The topmost item should be 0. */
4042 if (bra != OP_BRA)
4044 free_stack(common, framesize + 1);
4045 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4047 else
4048 free_stack(common, framesize + 2);
4049 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4052 if (bra == OP_BRAZERO)
4053 fallback->hotpath = LABEL();
4054 else if (bra == OP_BRAMINZERO)
4056 JUMPTO(SLJIT_JUMP, fallback->hotpath);
4057 JUMPHERE(brajump);
4060 if (bra != OP_BRA)
4062 SLJIT_ASSERT(found == &fallback->common.topfallbacks);
4063 set_jumps(fallback->common.topfallbacks, LABEL());
4064 fallback->common.topfallbacks = NULL;
4068 common->acceptlabel = save_acceptlabel;
4069 common->accept = save_accept;
4070 return cc + 1 + LINK_SIZE;
4073 static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table)
4075 int condition = FALSE;
4076 pcre_uchar *slotA = name_table;
4077 pcre_uchar *slotB;
4078 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
4079 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
4080 sljit_w no_capture;
4081 int i;
4083 locals += OVECTOR_START / sizeof(sljit_w);
4084 no_capture = locals[1];
4086 for (i = 0; i < name_count; i++)
4088 if (GET2(slotA, 0) == refno) break;
4089 slotA += name_entry_size;
4092 if (i < name_count)
4094 /* Found a name for the number - there can be only one; duplicate names
4095 for different numbers are allowed, but not vice versa. First scan down
4096 for duplicates. */
4098 slotB = slotA;
4099 while (slotB > name_table)
4101 slotB -= name_entry_size;
4102 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4104 condition = locals[GET2(slotB, 0) << 1] != no_capture;
4105 if (condition) break;
4107 else break;
4110 /* Scan up for duplicates */
4111 if (!condition)
4113 slotB = slotA;
4114 for (i++; i < name_count; i++)
4116 slotB += name_entry_size;
4117 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4119 condition = locals[GET2(slotB, 0) << 1] != no_capture;
4120 if (condition) break;
4122 else break;
4126 return condition;
4129 static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table)
4131 int condition = FALSE;
4132 pcre_uchar *slotA = name_table;
4133 pcre_uchar *slotB;
4134 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
4135 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
4136 sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)];
4137 int i;
4139 for (i = 0; i < name_count; i++)
4141 if (GET2(slotA, 0) == recno) break;
4142 slotA += name_entry_size;
4145 if (i < name_count)
4147 /* Found a name for the number - there can be only one; duplicate
4148 names for different numbers are allowed, but not vice versa. First
4149 scan down for duplicates. */
4151 slotB = slotA;
4152 while (slotB > name_table)
4154 slotB -= name_entry_size;
4155 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4157 condition = GET2(slotB, 0) == group_num;
4158 if (condition) break;
4160 else break;
4163 /* Scan up for duplicates */
4164 if (!condition)
4166 slotB = slotA;
4167 for (i++; i < name_count; i++)
4169 slotB += name_entry_size;
4170 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4172 condition = GET2(slotB, 0) == group_num;
4173 if (condition) break;
4175 else break;
4179 return condition;
4183 Handling bracketed expressions is probably the most complex part.
4185 Stack layout naming characters:
4186 S - Push the current STR_PTR
4187 0 - Push a 0 (NULL)
4188 A - Push the current STR_PTR. Needed for restoring the STR_PTR
4189 before the next alternative. Not pushed if there are no alternatives.
4190 M - Any values pushed by the current alternative. Can be empty, or anything.
4191 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
4192 L - Push the previous local (pointed by localptr) to the stack
4193 () - opional values stored on the stack
4194 ()* - optonal, can be stored multiple times
4196 The following list shows the regular expression templates, their PCRE byte codes
4197 and stack layout supported by pcre-sljit.
4199 (?:) OP_BRA | OP_KET A M
4200 () OP_CBRA | OP_KET C M
4201 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
4202 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
4203 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
4204 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
4205 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
4206 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
4207 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
4208 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
4209 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
4210 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
4211 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
4212 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
4213 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
4214 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
4215 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
4216 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
4217 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
4218 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
4219 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
4220 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
4223 Stack layout naming characters:
4224 A - Push the alternative index (starting from 0) on the stack.
4225 Not pushed if there is no alternatives.
4226 M - Any values pushed by the current alternative. Can be empty, or anything.
4228 The next list shows the possible content of a bracket:
4229 (|) OP_*BRA | OP_ALT ... M A
4230 (?()|) OP_*COND | OP_ALT M A
4231 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
4232 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
4233 Or nothing, if trace is unnecessary
4236 static pcre_uchar *compile_bracket_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4238 DEFINE_COMPILER;
4239 fallback_common *fallback;
4240 pcre_uchar opcode;
4241 int localptr = 0;
4242 int offset = 0;
4243 int stacksize;
4244 pcre_uchar *ccbegin;
4245 pcre_uchar *hotpath;
4246 pcre_uchar bra = OP_BRA;
4247 pcre_uchar ket;
4248 assert_fallback *assert;
4249 BOOL has_alternatives;
4250 struct sljit_jump *jump;
4251 struct sljit_jump *skip;
4252 struct sljit_label *rmaxlabel = NULL;
4253 struct sljit_jump *braminzerojump = NULL;
4255 PUSH_FALLBACK(sizeof(bracket_fallback), cc, NULL);
4257 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4259 bra = *cc;
4260 cc++;
4261 opcode = *cc;
4264 opcode = *cc;
4265 ccbegin = cc;
4266 hotpath = ccbegin + 1 + LINK_SIZE;
4268 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
4270 /* Drop this bracket_fallback. */
4271 parent->top = fallback->prev;
4272 return bracketend(cc);
4275 ket = *(bracketend(cc) - 1 - LINK_SIZE);
4276 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
4277 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
4278 cc += GET(cc, 1);
4280 has_alternatives = *cc == OP_ALT;
4281 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
4283 has_alternatives = (*hotpath == OP_RREF) ? FALSE : TRUE;
4284 if (*hotpath == OP_NRREF)
4286 stacksize = GET2(hotpath, 1);
4287 if (common->currententry == NULL || stacksize == RREF_ANY)
4288 has_alternatives = FALSE;
4289 else if (common->currententry->start == 0)
4290 has_alternatives = stacksize != 0;
4291 else
4292 has_alternatives = stacksize != GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4296 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
4297 opcode = OP_SCOND;
4298 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
4299 opcode = OP_ONCE;
4301 if (opcode == OP_CBRA || opcode == OP_SCBRA)
4303 /* Capturing brackets has a pre-allocated space. */
4304 offset = GET2(ccbegin, 1 + LINK_SIZE);
4305 localptr = OVECTOR_PRIV(offset);
4306 offset <<= 1;
4307 FALLBACK_AS(bracket_fallback)->localptr = localptr;
4308 hotpath += IMM2_SIZE;
4310 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
4312 /* Other brackets simply allocate the next entry. */
4313 localptr = PRIV_DATA(ccbegin);
4314 SLJIT_ASSERT(localptr != 0);
4315 FALLBACK_AS(bracket_fallback)->localptr = localptr;
4316 if (opcode == OP_ONCE)
4317 FALLBACK_AS(bracket_fallback)->u.framesize = get_framesize(common, ccbegin, FALSE);
4320 /* Instructions before the first alternative. */
4321 stacksize = 0;
4322 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4323 stacksize++;
4324 if (bra == OP_BRAZERO)
4325 stacksize++;
4327 if (stacksize > 0)
4328 allocate_stack(common, stacksize);
4330 stacksize = 0;
4331 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4333 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4334 stacksize++;
4337 if (bra == OP_BRAZERO)
4338 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
4340 if (bra == OP_BRAMINZERO)
4342 /* This is a fallback path! (Since the hot-path of OP_BRAMINZERO matches to the empty string) */
4343 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4344 if (ket != OP_KETRMIN)
4346 free_stack(common, 1);
4347 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4349 else
4351 if (opcode == OP_ONCE || opcode >= OP_SBRA)
4353 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4354 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4355 /* Nothing stored during the first run. */
4356 skip = JUMP(SLJIT_JUMP);
4357 JUMPHERE(jump);
4358 /* Checking zero-length iteration. */
4359 if (opcode != OP_ONCE || FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4361 /* When we come from outside, localptr contains the previous STR_PTR. */
4362 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4364 else
4366 /* Except when the whole stack frame must be saved. */
4367 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4368 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (FALLBACK_AS(bracket_fallback)->u.framesize + 1) * sizeof(sljit_w));
4370 JUMPHERE(skip);
4372 else
4374 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4375 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4376 JUMPHERE(jump);
4381 if (ket == OP_KETRMIN)
4382 FALLBACK_AS(bracket_fallback)->recursivehotpath = LABEL();
4384 if (ket == OP_KETRMAX)
4386 rmaxlabel = LABEL();
4387 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
4388 FALLBACK_AS(bracket_fallback)->althotpath = rmaxlabel;
4391 /* Handling capturing brackets and alternatives. */
4392 if (opcode == OP_ONCE)
4394 if (FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4396 /* Neither capturing brackets nor recursions are not found in the block. */
4397 if (ket == OP_KETRMIN)
4399 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4400 allocate_stack(common, 2);
4401 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4402 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4403 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4405 else if (ket == OP_KETRMAX || has_alternatives)
4407 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4408 allocate_stack(common, 1);
4409 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4411 else
4412 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4414 else
4416 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
4418 allocate_stack(common, FALLBACK_AS(bracket_fallback)->u.framesize + 2);
4419 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4420 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(FALLBACK_AS(bracket_fallback)->u.framesize + 1));
4421 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4422 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4423 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4424 init_frame(common, ccbegin, FALLBACK_AS(bracket_fallback)->u.framesize + 1, 2, FALSE);
4426 else
4428 allocate_stack(common, FALLBACK_AS(bracket_fallback)->u.framesize + 1);
4429 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4430 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(FALLBACK_AS(bracket_fallback)->u.framesize));
4431 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4432 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4433 init_frame(common, ccbegin, FALLBACK_AS(bracket_fallback)->u.framesize, 1, FALSE);
4437 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
4439 /* Saving the previous values. */
4440 allocate_stack(common, 3);
4441 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4442 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4443 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4444 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4445 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4446 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4447 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
4449 else if (opcode == OP_SBRA || opcode == OP_SCOND)
4451 /* Saving the previous value. */
4452 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4453 allocate_stack(common, 1);
4454 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4455 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4457 else if (has_alternatives)
4459 /* Pushing the starting string pointer. */
4460 allocate_stack(common, 1);
4461 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4464 /* Generating code for the first alternative. */
4465 if (opcode == OP_COND || opcode == OP_SCOND)
4467 if (*hotpath == OP_CREF)
4469 SLJIT_ASSERT(has_alternatives);
4470 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed),
4471 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(hotpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4472 hotpath += 1 + IMM2_SIZE;
4474 else if (*hotpath == OP_NCREF)
4476 SLJIT_ASSERT(has_alternatives);
4477 stacksize = GET2(hotpath, 1);
4478 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4480 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4481 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4482 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4483 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
4484 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0);
4485 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4486 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
4487 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4488 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4490 JUMPHERE(jump);
4491 hotpath += 1 + IMM2_SIZE;
4493 else if (*hotpath == OP_RREF || *hotpath == OP_NRREF)
4495 /* Never has other case. */
4496 FALLBACK_AS(bracket_fallback)->u.condfailed = NULL;
4498 stacksize = GET2(hotpath, 1);
4499 if (common->currententry == NULL)
4500 stacksize = 0;
4501 else if (stacksize == RREF_ANY)
4502 stacksize = 1;
4503 else if (common->currententry->start == 0)
4504 stacksize = stacksize == 0;
4505 else
4506 stacksize = stacksize == GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4508 if (*hotpath == OP_RREF || stacksize || common->currententry == NULL)
4510 SLJIT_ASSERT(!has_alternatives);
4511 if (stacksize != 0)
4512 hotpath += 1 + IMM2_SIZE;
4513 else
4515 if (*cc == OP_ALT)
4517 hotpath = cc + 1 + LINK_SIZE;
4518 cc += GET(cc, 1);
4520 else
4521 hotpath = cc;
4524 else
4526 SLJIT_ASSERT(has_alternatives);
4528 stacksize = GET2(hotpath, 1);
4529 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4530 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4531 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4532 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
4533 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
4534 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0);
4535 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4536 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
4537 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4538 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4539 hotpath += 1 + IMM2_SIZE;
4542 else
4544 SLJIT_ASSERT(has_alternatives && *hotpath >= OP_ASSERT && *hotpath <= OP_ASSERTBACK_NOT);
4545 /* Similar code as PUSH_FALLBACK macro. */
4546 assert = sljit_alloc_memory(compiler, sizeof(assert_fallback));
4547 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4548 return NULL;
4549 memset(assert, 0, sizeof(assert_fallback));
4550 assert->common.cc = hotpath;
4551 FALLBACK_AS(bracket_fallback)->u.assert = assert;
4552 hotpath = compile_assert_hotpath(common, hotpath, assert, TRUE);
4556 compile_hotpath(common, hotpath, cc, fallback);
4557 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4558 return NULL;
4560 if (opcode == OP_ONCE)
4562 if (FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4564 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4565 /* TMP2 which is set here used by OP_KETRMAX below. */
4566 if (ket == OP_KETRMAX)
4567 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
4568 else if (ket == OP_KETRMIN)
4570 /* Move the STR_PTR to the localptr. */
4571 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
4574 else
4576 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
4577 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (FALLBACK_AS(bracket_fallback)->u.framesize + stacksize) * sizeof(sljit_w));
4578 if (ket == OP_KETRMAX)
4580 /* TMP2 which is set here used by OP_KETRMAX below. */
4581 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4586 stacksize = 0;
4587 if (ket != OP_KET || bra != OP_BRA)
4588 stacksize++;
4589 if (has_alternatives && opcode != OP_ONCE)
4590 stacksize++;
4592 if (stacksize > 0)
4593 allocate_stack(common, stacksize);
4595 stacksize = 0;
4596 if (ket != OP_KET)
4598 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
4599 stacksize++;
4601 else if (bra != OP_BRA)
4603 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4604 stacksize++;
4607 if (has_alternatives)
4609 if (opcode != OP_ONCE)
4610 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4611 if (ket != OP_KETRMAX)
4612 FALLBACK_AS(bracket_fallback)->althotpath = LABEL();
4615 /* Must be after the hotpath label. */
4616 if (offset != 0)
4618 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4619 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
4620 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
4623 if (ket == OP_KETRMAX)
4625 if (opcode == OP_ONCE || opcode >= OP_SBRA)
4627 if (has_alternatives)
4628 FALLBACK_AS(bracket_fallback)->althotpath = LABEL();
4629 /* Checking zero-length iteration. */
4630 if (opcode != OP_ONCE)
4631 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0, rmaxlabel);
4632 else
4633 /* TMP2 must contain the starting STR_PTR. */
4634 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
4636 else
4637 JUMPTO(SLJIT_JUMP, rmaxlabel);
4638 FALLBACK_AS(bracket_fallback)->recursivehotpath = LABEL();
4641 if (bra == OP_BRAZERO)
4642 FALLBACK_AS(bracket_fallback)->zerohotpath = LABEL();
4644 if (bra == OP_BRAMINZERO)
4646 /* This is a fallback path! (From the viewpoint of OP_BRAMINZERO) */
4647 JUMPTO(SLJIT_JUMP, ((braminzero_fallback*)parent)->hotpath);
4648 if (braminzerojump != NULL)
4650 JUMPHERE(braminzerojump);
4651 /* We need to release the end pointer to perform the
4652 fallback for the zero-length iteration. When
4653 framesize is < 0, OP_ONCE will do the release itself. */
4654 if (opcode == OP_ONCE && FALLBACK_AS(bracket_fallback)->u.framesize >= 0)
4656 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4657 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4659 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
4660 free_stack(common, 1);
4662 /* Continue to the normal fallback. */
4665 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
4666 decrease_call_count(common);
4668 /* Skip the other alternatives. */
4669 while (*cc == OP_ALT)
4670 cc += GET(cc, 1);
4671 cc += 1 + LINK_SIZE;
4672 return cc;
4675 static pcre_uchar *compile_bracketpos_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4677 DEFINE_COMPILER;
4678 fallback_common *fallback;
4679 pcre_uchar opcode;
4680 int localptr;
4681 int cbraprivptr = 0;
4682 int framesize;
4683 int stacksize;
4684 int offset = 0;
4685 BOOL zero = FALSE;
4686 pcre_uchar *ccbegin = NULL;
4687 int stack;
4688 struct sljit_label *loop = NULL;
4689 struct jump_list *emptymatch = NULL;
4691 PUSH_FALLBACK(sizeof(bracketpos_fallback), cc, NULL);
4692 if (*cc == OP_BRAPOSZERO)
4694 zero = TRUE;
4695 cc++;
4698 opcode = *cc;
4699 localptr = PRIV_DATA(cc);
4700 SLJIT_ASSERT(localptr != 0);
4701 FALLBACK_AS(bracketpos_fallback)->localptr = localptr;
4702 switch(opcode)
4704 case OP_BRAPOS:
4705 case OP_SBRAPOS:
4706 ccbegin = cc + 1 + LINK_SIZE;
4707 break;
4709 case OP_CBRAPOS:
4710 case OP_SCBRAPOS:
4711 offset = GET2(cc, 1 + LINK_SIZE);
4712 cbraprivptr = OVECTOR_PRIV(offset);
4713 offset <<= 1;
4714 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
4715 break;
4717 default:
4718 SLJIT_ASSERT_STOP();
4719 break;
4722 framesize = get_framesize(common, cc, FALSE);
4723 FALLBACK_AS(bracketpos_fallback)->framesize = framesize;
4724 if (framesize < 0)
4726 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
4727 if (!zero)
4728 stacksize++;
4729 FALLBACK_AS(bracketpos_fallback)->stacksize = stacksize;
4730 allocate_stack(common, stacksize);
4731 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4733 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4735 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4736 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4737 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4738 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4740 else
4741 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4743 if (!zero)
4744 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
4746 else
4748 stacksize = framesize + 1;
4749 if (!zero)
4750 stacksize++;
4751 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
4752 stacksize++;
4753 FALLBACK_AS(bracketpos_fallback)->stacksize = stacksize;
4754 allocate_stack(common, stacksize);
4756 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4757 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
4758 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4759 stack = 0;
4760 if (!zero)
4762 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
4763 stack++;
4765 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
4767 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
4768 stack++;
4770 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
4771 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
4774 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4775 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
4777 loop = LABEL();
4778 while (*cc != OP_KETRPOS)
4780 fallback->top = NULL;
4781 fallback->topfallbacks = NULL;
4782 cc += GET(cc, 1);
4784 compile_hotpath(common, ccbegin, cc, fallback);
4785 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4786 return NULL;
4788 if (framesize < 0)
4790 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4792 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4794 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
4795 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
4796 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
4797 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
4799 else
4801 if (opcode == OP_SBRAPOS)
4802 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4803 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4806 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
4807 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
4809 if (!zero)
4810 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
4812 else
4814 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4816 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, stacksize * sizeof(sljit_w));
4817 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
4818 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
4819 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
4820 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
4822 else
4824 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4825 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_w));
4826 if (opcode == OP_SBRAPOS)
4827 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
4828 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w), STR_PTR, 0);
4831 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
4832 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
4834 if (!zero)
4836 if (framesize < 0)
4837 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
4838 else
4839 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4842 JUMPTO(SLJIT_JUMP, loop);
4843 flush_stubs(common);
4845 compile_fallbackpath(common, fallback->top);
4846 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4847 return NULL;
4848 set_jumps(fallback->topfallbacks, LABEL());
4850 if (framesize < 0)
4852 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4853 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
4854 else
4855 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4857 else
4859 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4861 /* Last alternative. */
4862 if (*cc == OP_KETRPOS)
4863 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4864 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
4866 else
4868 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4869 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
4873 if (*cc == OP_KETRPOS)
4874 break;
4875 ccbegin = cc + 1 + LINK_SIZE;
4878 fallback->topfallbacks = NULL;
4879 if (!zero)
4881 if (framesize < 0)
4882 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
4883 else /* TMP2 is set to [localptr] above. */
4884 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0));
4887 /* None of them matched. */
4888 set_jumps(emptymatch, LABEL());
4889 decrease_call_count(common);
4890 return cc + 1 + LINK_SIZE;
4893 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
4895 int class_len;
4897 *opcode = *cc;
4898 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
4900 cc++;
4901 *type = OP_CHAR;
4903 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
4905 cc++;
4906 *type = OP_CHARI;
4907 *opcode -= OP_STARI - OP_STAR;
4909 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
4911 cc++;
4912 *type = OP_NOT;
4913 *opcode -= OP_NOTSTAR - OP_STAR;
4915 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
4917 cc++;
4918 *type = OP_NOTI;
4919 *opcode -= OP_NOTSTARI - OP_STAR;
4921 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
4923 cc++;
4924 *opcode -= OP_TYPESTAR - OP_STAR;
4925 *type = 0;
4927 else
4929 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
4930 *type = *opcode;
4931 cc++;
4932 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
4933 *opcode = cc[class_len - 1];
4934 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
4936 *opcode -= OP_CRSTAR - OP_STAR;
4937 if (end != NULL)
4938 *end = cc + class_len;
4940 else
4942 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
4943 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
4944 *arg2 = GET2(cc, class_len);
4946 if (*arg2 == 0)
4948 SLJIT_ASSERT(*arg1 != 0);
4949 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
4951 if (*arg1 == *arg2)
4952 *opcode = OP_EXACT;
4954 if (end != NULL)
4955 *end = cc + class_len + 2 * IMM2_SIZE;
4957 return cc;
4960 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
4962 *arg1 = GET2(cc, 0);
4963 cc += IMM2_SIZE;
4966 if (*type == 0)
4968 *type = *cc;
4969 if (end != NULL)
4970 *end = next_opcode(common, cc);
4971 cc++;
4972 return cc;
4975 if (end != NULL)
4977 *end = cc + 1;
4978 #ifdef SUPPORT_UTF
4979 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
4980 #endif
4982 return cc;
4985 static pcre_uchar *compile_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4987 DEFINE_COMPILER;
4988 fallback_common *fallback;
4989 pcre_uchar opcode;
4990 pcre_uchar type;
4991 int arg1 = -1, arg2 = -1;
4992 pcre_uchar* end;
4993 jump_list *nomatch = NULL;
4994 struct sljit_jump *jump = NULL;
4995 struct sljit_label *label;
4997 PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL);
4999 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
5001 switch(opcode)
5003 case OP_STAR:
5004 case OP_PLUS:
5005 case OP_UPTO:
5006 case OP_CRRANGE:
5007 if (type == OP_ANYNL || type == OP_EXTUNI)
5009 if (opcode == OP_STAR || opcode == OP_UPTO)
5011 allocate_stack(common, 2);
5012 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5013 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5015 else
5017 allocate_stack(common, 1);
5018 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5020 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
5021 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5023 label = LABEL();
5024 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
5025 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
5027 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5028 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5029 if (opcode == OP_CRRANGE && arg2 > 0)
5030 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
5031 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
5032 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
5033 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5036 allocate_stack(common, 1);
5037 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5038 JUMPTO(SLJIT_JUMP, label);
5039 if (jump != NULL)
5040 JUMPHERE(jump);
5042 else
5044 allocate_stack(common, 2);
5045 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5046 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5047 label = LABEL();
5048 compile_char1_hotpath(common, type, cc, &nomatch);
5049 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5050 if (opcode <= OP_PLUS || (opcode == OP_CRRANGE && arg1 == 0))
5052 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5053 JUMPTO(SLJIT_JUMP, label);
5055 else
5057 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5058 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5059 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5060 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5062 set_jumps(nomatch, LABEL());
5063 if (opcode == OP_PLUS || opcode == OP_CRRANGE)
5064 add_jump(compiler, &fallback->topfallbacks,
5065 CMP(SLJIT_C_LESS, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, opcode == OP_PLUS ? 2 : arg2 + 1));
5066 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5068 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
5069 break;
5071 case OP_MINSTAR:
5072 case OP_MINPLUS:
5073 allocate_stack(common, 1);
5074 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5075 if (opcode == OP_MINPLUS)
5076 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5077 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
5078 break;
5080 case OP_MINUPTO:
5081 case OP_CRMINRANGE:
5082 allocate_stack(common, 2);
5083 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5084 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5085 if (opcode == OP_CRMINRANGE)
5086 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5087 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
5088 break;
5090 case OP_QUERY:
5091 case OP_MINQUERY:
5092 allocate_stack(common, 1);
5093 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5094 if (opcode == OP_QUERY)
5095 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
5096 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
5097 break;
5099 case OP_EXACT:
5100 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
5101 label = LABEL();
5102 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
5103 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5104 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5105 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5106 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5107 break;
5109 case OP_POSSTAR:
5110 case OP_POSPLUS:
5111 case OP_POSUPTO:
5112 if (opcode != OP_POSSTAR)
5113 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
5114 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5115 label = LABEL();
5116 compile_char1_hotpath(common, type, cc, &nomatch);
5117 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5118 if (opcode != OP_POSUPTO)
5120 if (opcode == OP_POSPLUS)
5121 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2);
5122 JUMPTO(SLJIT_JUMP, label);
5124 else
5126 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5127 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5128 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5129 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5131 set_jumps(nomatch, LABEL());
5132 if (opcode == OP_POSPLUS)
5133 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_LESS, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2));
5134 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5135 break;
5137 case OP_POSQUERY:
5138 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5139 compile_char1_hotpath(common, type, cc, &nomatch);
5140 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5141 set_jumps(nomatch, LABEL());
5142 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5143 break;
5145 default:
5146 SLJIT_ASSERT_STOP();
5147 break;
5150 decrease_call_count(common);
5151 return end;
5154 static SLJIT_INLINE pcre_uchar *compile_fail_accept_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
5156 DEFINE_COMPILER;
5157 fallback_common *fallback;
5159 PUSH_FALLBACK(sizeof(bracket_fallback), cc, NULL);
5161 if (*cc == OP_FAIL)
5163 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5164 return cc + 1;
5167 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
5169 /* No need to check notempty conditions. */
5170 if (common->acceptlabel == NULL)
5171 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
5172 else
5173 JUMPTO(SLJIT_JUMP, common->acceptlabel);
5174 return cc + 1;
5177 if (common->acceptlabel == NULL)
5178 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
5179 else
5180 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
5181 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5182 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
5183 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5184 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
5185 if (common->acceptlabel == NULL)
5186 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5187 else
5188 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
5189 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5190 if (common->acceptlabel == NULL)
5191 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
5192 else
5193 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
5194 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5195 return cc + 1;
5198 static SLJIT_INLINE pcre_uchar *compile_close_hotpath(compiler_common *common, pcre_uchar *cc)
5200 DEFINE_COMPILER;
5201 int offset = GET2(cc, 1);
5203 /* Data will be discarded anyway... */
5204 if (common->currententry != NULL)
5205 return cc + 1 + IMM2_SIZE;
5207 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
5208 offset <<= 1;
5209 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5210 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5211 return cc + 1 + IMM2_SIZE;
5214 static void compile_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, fallback_common *parent)
5216 DEFINE_COMPILER;
5217 fallback_common *fallback;
5219 while (cc < ccend)
5221 switch(*cc)
5223 case OP_SOD:
5224 case OP_SOM:
5225 case OP_NOT_WORD_BOUNDARY:
5226 case OP_WORD_BOUNDARY:
5227 case OP_NOT_DIGIT:
5228 case OP_DIGIT:
5229 case OP_NOT_WHITESPACE:
5230 case OP_WHITESPACE:
5231 case OP_NOT_WORDCHAR:
5232 case OP_WORDCHAR:
5233 case OP_ANY:
5234 case OP_ALLANY:
5235 case OP_ANYBYTE:
5236 case OP_NOTPROP:
5237 case OP_PROP:
5238 case OP_ANYNL:
5239 case OP_NOT_HSPACE:
5240 case OP_HSPACE:
5241 case OP_NOT_VSPACE:
5242 case OP_VSPACE:
5243 case OP_EXTUNI:
5244 case OP_EODN:
5245 case OP_EOD:
5246 case OP_CIRC:
5247 case OP_CIRCM:
5248 case OP_DOLL:
5249 case OP_DOLLM:
5250 case OP_NOT:
5251 case OP_NOTI:
5252 case OP_REVERSE:
5253 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5254 break;
5256 case OP_SET_SOM:
5257 PUSH_FALLBACK_NOVALUE(sizeof(fallback_common), cc);
5258 allocate_stack(common, 1);
5259 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5260 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
5261 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5262 cc++;
5263 break;
5265 case OP_CHAR:
5266 case OP_CHARI:
5267 cc = compile_charn_hotpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5268 break;
5270 case OP_STAR:
5271 case OP_MINSTAR:
5272 case OP_PLUS:
5273 case OP_MINPLUS:
5274 case OP_QUERY:
5275 case OP_MINQUERY:
5276 case OP_UPTO:
5277 case OP_MINUPTO:
5278 case OP_EXACT:
5279 case OP_POSSTAR:
5280 case OP_POSPLUS:
5281 case OP_POSQUERY:
5282 case OP_POSUPTO:
5283 case OP_STARI:
5284 case OP_MINSTARI:
5285 case OP_PLUSI:
5286 case OP_MINPLUSI:
5287 case OP_QUERYI:
5288 case OP_MINQUERYI:
5289 case OP_UPTOI:
5290 case OP_MINUPTOI:
5291 case OP_EXACTI:
5292 case OP_POSSTARI:
5293 case OP_POSPLUSI:
5294 case OP_POSQUERYI:
5295 case OP_POSUPTOI:
5296 case OP_NOTSTAR:
5297 case OP_NOTMINSTAR:
5298 case OP_NOTPLUS:
5299 case OP_NOTMINPLUS:
5300 case OP_NOTQUERY:
5301 case OP_NOTMINQUERY:
5302 case OP_NOTUPTO:
5303 case OP_NOTMINUPTO:
5304 case OP_NOTEXACT:
5305 case OP_NOTPOSSTAR:
5306 case OP_NOTPOSPLUS:
5307 case OP_NOTPOSQUERY:
5308 case OP_NOTPOSUPTO:
5309 case OP_NOTSTARI:
5310 case OP_NOTMINSTARI:
5311 case OP_NOTPLUSI:
5312 case OP_NOTMINPLUSI:
5313 case OP_NOTQUERYI:
5314 case OP_NOTMINQUERYI:
5315 case OP_NOTUPTOI:
5316 case OP_NOTMINUPTOI:
5317 case OP_NOTEXACTI:
5318 case OP_NOTPOSSTARI:
5319 case OP_NOTPOSPLUSI:
5320 case OP_NOTPOSQUERYI:
5321 case OP_NOTPOSUPTOI:
5322 case OP_TYPESTAR:
5323 case OP_TYPEMINSTAR:
5324 case OP_TYPEPLUS:
5325 case OP_TYPEMINPLUS:
5326 case OP_TYPEQUERY:
5327 case OP_TYPEMINQUERY:
5328 case OP_TYPEUPTO:
5329 case OP_TYPEMINUPTO:
5330 case OP_TYPEEXACT:
5331 case OP_TYPEPOSSTAR:
5332 case OP_TYPEPOSPLUS:
5333 case OP_TYPEPOSQUERY:
5334 case OP_TYPEPOSUPTO:
5335 cc = compile_iterator_hotpath(common, cc, parent);
5336 break;
5338 case OP_CLASS:
5339 case OP_NCLASS:
5340 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
5341 cc = compile_iterator_hotpath(common, cc, parent);
5342 else
5343 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5344 break;
5346 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
5347 case OP_XCLASS:
5348 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
5349 cc = compile_iterator_hotpath(common, cc, parent);
5350 else
5351 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5352 break;
5353 #endif
5355 case OP_REF:
5356 case OP_REFI:
5357 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
5358 cc = compile_ref_iterator_hotpath(common, cc, parent);
5359 else
5360 cc = compile_ref_hotpath(common, cc, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks, TRUE, FALSE);
5361 break;
5363 case OP_RECURSE:
5364 cc = compile_recurse_hotpath(common, cc, parent);
5365 break;
5367 case OP_ASSERT:
5368 case OP_ASSERT_NOT:
5369 case OP_ASSERTBACK:
5370 case OP_ASSERTBACK_NOT:
5371 PUSH_FALLBACK_NOVALUE(sizeof(assert_fallback), cc);
5372 cc = compile_assert_hotpath(common, cc, FALLBACK_AS(assert_fallback), FALSE);
5373 break;
5375 case OP_BRAMINZERO:
5376 PUSH_FALLBACK_NOVALUE(sizeof(braminzero_fallback), cc);
5377 cc = bracketend(cc + 1);
5378 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
5380 allocate_stack(common, 1);
5381 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5383 else
5385 allocate_stack(common, 2);
5386 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5387 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
5389 FALLBACK_AS(braminzero_fallback)->hotpath = LABEL();
5390 if (cc[1] > OP_ASSERTBACK_NOT)
5391 decrease_call_count(common);
5392 break;
5394 case OP_ONCE:
5395 case OP_ONCE_NC:
5396 case OP_BRA:
5397 case OP_CBRA:
5398 case OP_COND:
5399 case OP_SBRA:
5400 case OP_SCBRA:
5401 case OP_SCOND:
5402 cc = compile_bracket_hotpath(common, cc, parent);
5403 break;
5405 case OP_BRAZERO:
5406 if (cc[1] > OP_ASSERTBACK_NOT)
5407 cc = compile_bracket_hotpath(common, cc, parent);
5408 else
5410 PUSH_FALLBACK_NOVALUE(sizeof(assert_fallback), cc);
5411 cc = compile_assert_hotpath(common, cc, FALLBACK_AS(assert_fallback), FALSE);
5413 break;
5415 case OP_BRAPOS:
5416 case OP_CBRAPOS:
5417 case OP_SBRAPOS:
5418 case OP_SCBRAPOS:
5419 case OP_BRAPOSZERO:
5420 cc = compile_bracketpos_hotpath(common, cc, parent);
5421 break;
5423 case OP_FAIL:
5424 case OP_ACCEPT:
5425 case OP_ASSERT_ACCEPT:
5426 cc = compile_fail_accept_hotpath(common, cc, parent);
5427 break;
5429 case OP_CLOSE:
5430 cc = compile_close_hotpath(common, cc);
5431 break;
5433 case OP_SKIPZERO:
5434 cc = bracketend(cc + 1);
5435 break;
5437 default:
5438 SLJIT_ASSERT_STOP();
5439 return;
5441 if (cc == NULL)
5442 return;
5444 SLJIT_ASSERT(cc == ccend);
5447 #undef PUSH_FALLBACK
5448 #undef PUSH_FALLBACK_NOVALUE
5449 #undef FALLBACK_AS
5451 #define COMPILE_FALLBACKPATH(current) \
5452 do \
5454 compile_fallbackpath(common, (current)); \
5455 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5456 return; \
5458 while (0)
5460 #define CURRENT_AS(type) ((type*)current)
5462 static void compile_iterator_fallbackpath(compiler_common *common, struct fallback_common *current)
5464 DEFINE_COMPILER;
5465 pcre_uchar *cc = current->cc;
5466 pcre_uchar opcode;
5467 pcre_uchar type;
5468 int arg1 = -1, arg2 = -1;
5469 struct sljit_label *label = NULL;
5470 struct sljit_jump *jump = NULL;
5472 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);
5474 switch(opcode)
5476 case OP_STAR:
5477 case OP_PLUS:
5478 case OP_UPTO:
5479 case OP_CRRANGE:
5480 if (type == OP_ANYNL || type == OP_EXTUNI)
5482 set_jumps(current->topfallbacks, LABEL());
5483 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5484 free_stack(common, 1);
5485 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5487 else
5489 if (opcode == OP_STAR || opcode == OP_UPTO)
5490 arg2 = 0;
5491 else if (opcode == OP_PLUS)
5492 arg2 = 1;
5493 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, arg2 + 1);
5494 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5495 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5496 skip_char_back(common);
5497 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5498 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5499 if (opcode == OP_PLUS || opcode == OP_CRRANGE)
5500 set_jumps(current->topfallbacks, LABEL());
5501 JUMPHERE(jump);
5502 free_stack(common, 2);
5504 break;
5506 case OP_MINSTAR:
5507 case OP_MINPLUS:
5508 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5509 if (opcode == OP_MINPLUS)
5511 set_jumps(current->topfallbacks, LABEL());
5512 current->topfallbacks = NULL;
5514 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5515 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5516 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5517 set_jumps(current->topfallbacks, LABEL());
5518 free_stack(common, 1);
5519 break;
5521 case OP_MINUPTO:
5522 case OP_CRMINRANGE:
5523 if (opcode == OP_CRMINRANGE)
5525 set_jumps(current->topfallbacks, LABEL());
5526 current->topfallbacks = NULL;
5527 label = LABEL();
5529 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5530 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5532 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5533 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5534 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5535 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5537 if (opcode == OP_CRMINRANGE)
5538 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label);
5540 if (opcode == OP_CRMINRANGE && arg1 == 0)
5541 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5542 else
5543 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_fallback)->hotpath);
5545 set_jumps(current->topfallbacks, LABEL());
5546 free_stack(common, 2);
5547 break;
5549 case OP_QUERY:
5550 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5551 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5552 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5553 jump = JUMP(SLJIT_JUMP);
5554 set_jumps(current->topfallbacks, LABEL());
5555 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5556 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5557 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5558 JUMPHERE(jump);
5559 free_stack(common, 1);
5560 break;
5562 case OP_MINQUERY:
5563 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5564 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5565 jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5566 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5567 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5568 set_jumps(current->topfallbacks, LABEL());
5569 JUMPHERE(jump);
5570 free_stack(common, 1);
5571 break;
5573 case OP_EXACT:
5574 case OP_POSPLUS:
5575 set_jumps(current->topfallbacks, LABEL());
5576 break;
5578 case OP_POSSTAR:
5579 case OP_POSQUERY:
5580 case OP_POSUPTO:
5581 break;
5583 default:
5584 SLJIT_ASSERT_STOP();
5585 break;
5589 static void compile_ref_iterator_fallbackpath(compiler_common *common, struct fallback_common *current)
5591 DEFINE_COMPILER;
5592 pcre_uchar *cc = current->cc;
5593 pcre_uchar type;
5595 type = cc[1 + IMM2_SIZE];
5596 if ((type & 0x1) == 0)
5598 set_jumps(current->topfallbacks, LABEL());
5599 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5600 free_stack(common, 1);
5601 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5602 return;
5605 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5606 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5607 set_jumps(current->topfallbacks, LABEL());
5608 free_stack(common, 2);
5611 static void compile_recurse_fallbackpath(compiler_common *common, struct fallback_common *current)
5613 DEFINE_COMPILER;
5615 set_jumps(current->topfallbacks, LABEL());
5616 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5617 free_stack(common, 1);
5618 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
5621 static void compile_assert_fallbackpath(compiler_common *common, struct fallback_common *current)
5623 DEFINE_COMPILER;
5624 pcre_uchar *cc = current->cc;
5625 pcre_uchar bra = OP_BRA;
5626 struct sljit_jump *brajump = NULL;
5628 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
5629 if (*cc == OP_BRAZERO)
5631 bra = *cc;
5632 cc++;
5635 if (bra == OP_BRAZERO)
5637 SLJIT_ASSERT(current->topfallbacks == NULL);
5638 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5641 if (CURRENT_AS(assert_fallback)->framesize < 0)
5643 set_jumps(current->topfallbacks, LABEL());
5645 if (bra == OP_BRAZERO)
5647 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5648 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_fallback)->hotpath);
5649 free_stack(common, 1);
5651 return;
5654 if (bra == OP_BRAZERO)
5656 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
5658 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5659 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_fallback)->hotpath);
5660 free_stack(common, 1);
5661 return;
5663 free_stack(common, 1);
5664 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5667 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
5669 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_fallback)->localptr);
5670 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5671 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_fallback)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_fallback)->framesize * sizeof(sljit_w));
5673 set_jumps(current->topfallbacks, LABEL());
5675 else
5676 set_jumps(current->topfallbacks, LABEL());
5678 if (bra == OP_BRAZERO)
5680 /* We know there is enough place on the stack. */
5681 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5682 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5683 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_fallback)->hotpath);
5684 JUMPHERE(brajump);
5688 static void compile_bracket_fallbackpath(compiler_common *common, struct fallback_common *current)
5690 DEFINE_COMPILER;
5691 int opcode;
5692 int offset = 0;
5693 int localptr = CURRENT_AS(bracket_fallback)->localptr;
5694 int stacksize;
5695 int count;
5696 pcre_uchar *cc = current->cc;
5697 pcre_uchar *ccbegin;
5698 pcre_uchar *ccprev;
5699 jump_list *jumplist = NULL;
5700 jump_list *jumplistitem = NULL;
5701 pcre_uchar bra = OP_BRA;
5702 pcre_uchar ket;
5703 assert_fallback *assert;
5704 BOOL has_alternatives;
5705 struct sljit_jump *brazero = NULL;
5706 struct sljit_jump *once = NULL;
5707 struct sljit_jump *cond = NULL;
5708 struct sljit_label *rminlabel = NULL;
5710 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5712 bra = *cc;
5713 cc++;
5716 opcode = *cc;
5717 ccbegin = cc;
5718 ket = *(bracketend(ccbegin) - 1 - LINK_SIZE);
5719 cc += GET(cc, 1);
5720 has_alternatives = *cc == OP_ALT;
5721 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5722 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_fallback)->u.condfailed != NULL;
5723 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5724 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
5725 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5726 opcode = OP_SCOND;
5727 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5728 opcode = OP_ONCE;
5730 if (ket == OP_KETRMAX)
5732 if (bra != OP_BRAZERO)
5733 free_stack(common, 1);
5734 else
5736 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5737 free_stack(common, 1);
5738 brazero = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0);
5741 else if (ket == OP_KETRMIN)
5743 if (bra != OP_BRAMINZERO)
5745 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5746 if (opcode >= OP_SBRA || opcode == OP_ONCE)
5748 /* Checking zero-length iteration. */
5749 if (opcode != OP_ONCE || CURRENT_AS(bracket_fallback)->u.framesize < 0)
5750 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, CURRENT_AS(bracket_fallback)->recursivehotpath);
5751 else
5753 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5754 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_fallback)->u.framesize + 1) * sizeof(sljit_w), CURRENT_AS(bracket_fallback)->recursivehotpath);
5756 if (opcode != OP_ONCE)
5757 free_stack(common, 1);
5759 else
5760 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->recursivehotpath);
5762 rminlabel = LABEL();
5764 else if (bra == OP_BRAZERO)
5766 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5767 free_stack(common, 1);
5768 brazero = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
5771 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
5773 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
5775 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5776 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5778 once = JUMP(SLJIT_JUMP);
5780 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5782 if (has_alternatives)
5784 /* Always exactly one alternative. */
5785 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5786 free_stack(common, 1);
5788 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
5789 if (SLJIT_UNLIKELY(!jumplistitem))
5790 return;
5791 jumplist = jumplistitem;
5792 jumplistitem->next = NULL;
5793 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 1);
5796 else if (*cc == OP_ALT)
5798 /* Build a jump list. Get the last successfully matched branch index. */
5799 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5800 free_stack(common, 1);
5801 count = 1;
5804 /* Append as the last item. */
5805 if (jumplist != NULL)
5807 jumplistitem->next = sljit_alloc_memory(compiler, sizeof(jump_list));
5808 jumplistitem = jumplistitem->next;
5810 else
5812 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
5813 jumplist = jumplistitem;
5816 if (SLJIT_UNLIKELY(!jumplistitem))
5817 return;
5819 jumplistitem->next = NULL;
5820 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, count++);
5821 cc += GET(cc, 1);
5823 while (*cc == OP_ALT);
5825 cc = ccbegin + GET(ccbegin, 1);
5828 COMPILE_FALLBACKPATH(current->top);
5829 if (current->topfallbacks)
5830 set_jumps(current->topfallbacks, LABEL());
5832 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5834 /* Conditional block always has at most one alternative. */
5835 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
5837 SLJIT_ASSERT(has_alternatives);
5838 assert = CURRENT_AS(bracket_fallback)->u.assert;
5839 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
5841 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
5842 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5843 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
5845 cond = JUMP(SLJIT_JUMP);
5846 set_jumps(CURRENT_AS(bracket_fallback)->u.assert->condfailed, LABEL());
5848 else if (CURRENT_AS(bracket_fallback)->u.condfailed != NULL)
5850 SLJIT_ASSERT(has_alternatives);
5851 cond = JUMP(SLJIT_JUMP);
5852 set_jumps(CURRENT_AS(bracket_fallback)->u.condfailed, LABEL());
5854 else
5855 SLJIT_ASSERT(!has_alternatives);
5858 if (has_alternatives)
5860 count = 1;
5863 current->top = NULL;
5864 current->topfallbacks = NULL;
5865 current->nextfallbacks = NULL;
5866 if (*cc == OP_ALT)
5868 ccprev = cc + 1 + LINK_SIZE;
5869 cc += GET(cc, 1);
5870 if (opcode != OP_COND && opcode != OP_SCOND)
5872 if (localptr != 0 && opcode != OP_ONCE)
5873 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5874 else
5875 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5877 compile_hotpath(common, ccprev, cc, current);
5878 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5879 return;
5882 /* Instructions after the current alternative is succesfully matched. */
5883 /* There is a similar code in compile_bracket_hotpath. */
5884 if (opcode == OP_ONCE)
5886 if (CURRENT_AS(bracket_fallback)->u.framesize < 0)
5888 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5889 /* TMP2 which is set here used by OP_KETRMAX below. */
5890 if (ket == OP_KETRMAX)
5891 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5892 else if (ket == OP_KETRMIN)
5894 /* Move the STR_PTR to the localptr. */
5895 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
5898 else
5900 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (CURRENT_AS(bracket_fallback)->u.framesize + 2) * sizeof(sljit_w));
5901 if (ket == OP_KETRMAX)
5903 /* TMP2 which is set here used by OP_KETRMAX below. */
5904 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5909 stacksize = 0;
5910 if (opcode != OP_ONCE)
5911 stacksize++;
5912 if (ket != OP_KET || bra != OP_BRA)
5913 stacksize++;
5915 if (stacksize > 0) {
5916 if (opcode != OP_ONCE || CURRENT_AS(bracket_fallback)->u.framesize >= 0)
5917 allocate_stack(common, stacksize);
5918 else
5920 /* We know we have place at least for one item on the top of the stack. */
5921 SLJIT_ASSERT(stacksize == 1);
5922 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5926 stacksize = 0;
5927 if (ket != OP_KET || bra != OP_BRA)
5929 if (ket != OP_KET)
5930 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5931 else
5932 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5933 stacksize++;
5936 if (opcode != OP_ONCE)
5937 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, count++);
5939 if (offset != 0)
5941 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5942 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5943 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
5946 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->althotpath);
5948 if (opcode != OP_ONCE)
5950 SLJIT_ASSERT(jumplist);
5951 JUMPHERE(jumplist->jump);
5952 jumplist = jumplist->next;
5955 COMPILE_FALLBACKPATH(current->top);
5956 if (current->topfallbacks)
5957 set_jumps(current->topfallbacks, LABEL());
5958 SLJIT_ASSERT(!current->nextfallbacks);
5960 while (*cc == OP_ALT);
5961 SLJIT_ASSERT(!jumplist);
5963 if (cond != NULL)
5965 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
5966 assert = CURRENT_AS(bracket_fallback)->u.assert;
5967 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
5970 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
5971 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5972 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
5974 JUMPHERE(cond);
5977 /* Free the STR_PTR. */
5978 if (localptr == 0)
5979 free_stack(common, 1);
5982 if (offset != 0)
5984 /* Using both tmp register is better for instruction scheduling. */
5985 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5986 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5987 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5988 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
5989 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(2));
5990 free_stack(common, 3);
5992 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5994 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(0));
5995 free_stack(common, 1);
5997 else if (opcode == OP_ONCE)
5999 cc = ccbegin + GET(ccbegin, 1);
6000 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
6002 /* Reset head and drop saved frame. */
6003 stacksize = (ket == OP_KETRMAX || ket == OP_KETRMIN || *cc == OP_ALT) ? 2 : 1;
6004 free_stack(common, CURRENT_AS(bracket_fallback)->u.framesize + stacksize);
6006 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
6008 /* The STR_PTR must be released. */
6009 free_stack(common, 1);
6012 JUMPHERE(once);
6013 /* Restore previous localptr */
6014 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
6015 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_fallback)->u.framesize * sizeof(sljit_w));
6016 else if (ket == OP_KETRMIN)
6018 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6019 /* See the comment below. */
6020 free_stack(common, 2);
6021 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
6025 if (ket == OP_KETRMAX)
6027 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6028 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_fallback)->recursivehotpath);
6029 if (bra == OP_BRAZERO)
6031 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6032 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->zerohotpath);
6033 JUMPHERE(brazero);
6035 free_stack(common, 1);
6037 else if (ket == OP_KETRMIN)
6039 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6041 /* OP_ONCE removes everything in case of a fallback, so we don't
6042 need to explicitly release the STR_PTR. The extra release would
6043 affect badly the free_stack(2) above. */
6044 if (opcode != OP_ONCE)
6045 free_stack(common, 1);
6046 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rminlabel);
6047 if (opcode == OP_ONCE)
6048 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
6049 else if (bra == OP_BRAMINZERO)
6050 free_stack(common, 1);
6052 else if (bra == OP_BRAZERO)
6054 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6055 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->zerohotpath);
6056 JUMPHERE(brazero);
6060 static void compile_bracketpos_fallbackpath(compiler_common *common, struct fallback_common *current)
6062 DEFINE_COMPILER;
6063 int offset;
6064 struct sljit_jump *jump;
6066 if (CURRENT_AS(bracketpos_fallback)->framesize < 0)
6068 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
6070 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
6071 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6072 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6073 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6074 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
6076 set_jumps(current->topfallbacks, LABEL());
6077 free_stack(common, CURRENT_AS(bracketpos_fallback)->stacksize);
6078 return;
6081 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_fallback)->localptr);
6082 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6084 if (current->topfallbacks)
6086 jump = JUMP(SLJIT_JUMP);
6087 set_jumps(current->topfallbacks, LABEL());
6088 /* Drop the stack frame. */
6089 free_stack(common, CURRENT_AS(bracketpos_fallback)->stacksize);
6090 JUMPHERE(jump);
6092 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_fallback)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_fallback)->framesize * sizeof(sljit_w));
6095 static void compile_braminzero_fallbackpath(compiler_common *common, struct fallback_common *current)
6097 assert_fallback fallback;
6099 current->top = NULL;
6100 current->topfallbacks = NULL;
6101 current->nextfallbacks = NULL;
6102 if (current->cc[1] > OP_ASSERTBACK_NOT)
6104 /* Manual call of compile_bracket_hotpath and compile_bracket_fallbackpath. */
6105 compile_bracket_hotpath(common, current->cc, current);
6106 compile_bracket_fallbackpath(common, current->top);
6108 else
6110 memset(&fallback, 0, sizeof(fallback));
6111 fallback.common.cc = current->cc;
6112 fallback.hotpath = CURRENT_AS(braminzero_fallback)->hotpath;
6113 /* Manual call of compile_assert_hotpath. */
6114 compile_assert_hotpath(common, current->cc, &fallback, FALSE);
6116 SLJIT_ASSERT(!current->nextfallbacks && !current->topfallbacks);
6119 static void compile_fallbackpath(compiler_common *common, struct fallback_common *current)
6121 DEFINE_COMPILER;
6123 while (current)
6125 if (current->nextfallbacks != NULL)
6126 set_jumps(current->nextfallbacks, LABEL());
6127 switch(*current->cc)
6129 case OP_SET_SOM:
6130 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6131 free_stack(common, 1);
6132 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP1, 0);
6133 break;
6135 case OP_STAR:
6136 case OP_MINSTAR:
6137 case OP_PLUS:
6138 case OP_MINPLUS:
6139 case OP_QUERY:
6140 case OP_MINQUERY:
6141 case OP_UPTO:
6142 case OP_MINUPTO:
6143 case OP_EXACT:
6144 case OP_POSSTAR:
6145 case OP_POSPLUS:
6146 case OP_POSQUERY:
6147 case OP_POSUPTO:
6148 case OP_STARI:
6149 case OP_MINSTARI:
6150 case OP_PLUSI:
6151 case OP_MINPLUSI:
6152 case OP_QUERYI:
6153 case OP_MINQUERYI:
6154 case OP_UPTOI:
6155 case OP_MINUPTOI:
6156 case OP_EXACTI:
6157 case OP_POSSTARI:
6158 case OP_POSPLUSI:
6159 case OP_POSQUERYI:
6160 case OP_POSUPTOI:
6161 case OP_NOTSTAR:
6162 case OP_NOTMINSTAR:
6163 case OP_NOTPLUS:
6164 case OP_NOTMINPLUS:
6165 case OP_NOTQUERY:
6166 case OP_NOTMINQUERY:
6167 case OP_NOTUPTO:
6168 case OP_NOTMINUPTO:
6169 case OP_NOTEXACT:
6170 case OP_NOTPOSSTAR:
6171 case OP_NOTPOSPLUS:
6172 case OP_NOTPOSQUERY:
6173 case OP_NOTPOSUPTO:
6174 case OP_NOTSTARI:
6175 case OP_NOTMINSTARI:
6176 case OP_NOTPLUSI:
6177 case OP_NOTMINPLUSI:
6178 case OP_NOTQUERYI:
6179 case OP_NOTMINQUERYI:
6180 case OP_NOTUPTOI:
6181 case OP_NOTMINUPTOI:
6182 case OP_NOTEXACTI:
6183 case OP_NOTPOSSTARI:
6184 case OP_NOTPOSPLUSI:
6185 case OP_NOTPOSQUERYI:
6186 case OP_NOTPOSUPTOI:
6187 case OP_TYPESTAR:
6188 case OP_TYPEMINSTAR:
6189 case OP_TYPEPLUS:
6190 case OP_TYPEMINPLUS:
6191 case OP_TYPEQUERY:
6192 case OP_TYPEMINQUERY:
6193 case OP_TYPEUPTO:
6194 case OP_TYPEMINUPTO:
6195 case OP_TYPEEXACT:
6196 case OP_TYPEPOSSTAR:
6197 case OP_TYPEPOSPLUS:
6198 case OP_TYPEPOSQUERY:
6199 case OP_TYPEPOSUPTO:
6200 case OP_CLASS:
6201 case OP_NCLASS:
6202 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6203 case OP_XCLASS:
6204 #endif
6205 compile_iterator_fallbackpath(common, current);
6206 break;
6208 case OP_REF:
6209 case OP_REFI:
6210 compile_ref_iterator_fallbackpath(common, current);
6211 break;
6213 case OP_RECURSE:
6214 compile_recurse_fallbackpath(common, current);
6215 break;
6217 case OP_ASSERT:
6218 case OP_ASSERT_NOT:
6219 case OP_ASSERTBACK:
6220 case OP_ASSERTBACK_NOT:
6221 compile_assert_fallbackpath(common, current);
6222 break;
6224 case OP_ONCE:
6225 case OP_ONCE_NC:
6226 case OP_BRA:
6227 case OP_CBRA:
6228 case OP_COND:
6229 case OP_SBRA:
6230 case OP_SCBRA:
6231 case OP_SCOND:
6232 compile_bracket_fallbackpath(common, current);
6233 break;
6235 case OP_BRAZERO:
6236 if (current->cc[1] > OP_ASSERTBACK_NOT)
6237 compile_bracket_fallbackpath(common, current);
6238 else
6239 compile_assert_fallbackpath(common, current);
6240 break;
6242 case OP_BRAPOS:
6243 case OP_CBRAPOS:
6244 case OP_SBRAPOS:
6245 case OP_SCBRAPOS:
6246 case OP_BRAPOSZERO:
6247 compile_bracketpos_fallbackpath(common, current);
6248 break;
6250 case OP_BRAMINZERO:
6251 compile_braminzero_fallbackpath(common, current);
6252 break;
6254 case OP_FAIL:
6255 case OP_ACCEPT:
6256 case OP_ASSERT_ACCEPT:
6257 set_jumps(current->topfallbacks, LABEL());
6258 break;
6260 default:
6261 SLJIT_ASSERT_STOP();
6262 break;
6264 current = current->prev;
6268 static SLJIT_INLINE void compile_recurse(compiler_common *common)
6270 DEFINE_COMPILER;
6271 pcre_uchar *cc = common->start + common->currententry->start;
6272 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
6273 pcre_uchar *ccend = bracketend(cc);
6274 int localsize = get_localsize(common, ccbegin, ccend);
6275 int framesize = get_framesize(common, cc, TRUE);
6276 int alternativesize;
6277 BOOL needsframe;
6278 fallback_common altfallback;
6279 struct sljit_jump *jump;
6281 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
6282 needsframe = framesize >= 0;
6283 if (!needsframe)
6284 framesize = 0;
6285 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
6287 SLJIT_ASSERT(common->currententry->entry == NULL);
6288 common->currententry->entry = LABEL();
6289 set_jumps(common->currententry->calls, common->currententry->entry);
6291 sljit_emit_fast_enter(compiler, TMP2, 0, 1, 5, 5, common->localsize);
6292 allocate_stack(common, localsize + framesize + alternativesize);
6293 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(localsize + framesize + alternativesize - 1), TMP2, 0);
6294 copy_locals(common, ccbegin, ccend, TRUE, localsize + framesize + alternativesize, framesize + alternativesize);
6295 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD, STACK_TOP, 0);
6296 if (needsframe)
6297 init_frame(common, cc, framesize + alternativesize - 1, alternativesize, FALSE);
6299 if (alternativesize > 0)
6300 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6302 memset(&altfallback, 0, sizeof(fallback_common));
6303 common->acceptlabel = NULL;
6304 common->accept = NULL;
6305 altfallback.cc = ccbegin;
6306 cc += GET(cc, 1);
6307 while (1)
6309 altfallback.top = NULL;
6310 altfallback.topfallbacks = NULL;
6312 if (altfallback.cc != ccbegin)
6313 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6315 compile_hotpath(common, altfallback.cc, cc, &altfallback);
6316 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6317 return;
6319 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6321 compile_fallbackpath(common, altfallback.top);
6322 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6323 return;
6324 set_jumps(altfallback.topfallbacks, LABEL());
6326 if (*cc != OP_ALT)
6327 break;
6329 altfallback.cc = cc + 1 + LINK_SIZE;
6330 cc += GET(cc, 1);
6332 /* None of them matched. */
6333 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6334 jump = JUMP(SLJIT_JUMP);
6336 set_jumps(common->accept, LABEL());
6337 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD);
6338 if (needsframe)
6340 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6341 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w));
6342 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6343 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w));
6344 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP3, 0);
6346 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
6348 JUMPHERE(jump);
6349 copy_locals(common, ccbegin, ccend, FALSE, localsize + framesize + alternativesize, framesize + alternativesize);
6350 free_stack(common, localsize + framesize + alternativesize);
6351 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_w));
6352 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
6353 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD, TMP2, 0);
6354 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
6357 #undef COMPILE_FALLBACKPATH
6358 #undef CURRENT_AS
6360 void
6361 PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra)
6363 struct sljit_compiler *compiler;
6364 fallback_common rootfallback;
6365 compiler_common common_data;
6366 compiler_common *common = &common_data;
6367 const pcre_uint8 *tables = re->tables;
6368 pcre_study_data *study;
6369 pcre_uchar *ccend;
6370 executable_function *function;
6371 void *executable_func;
6372 sljit_uw executable_size;
6373 struct sljit_label *leave;
6374 struct sljit_label *mainloop = NULL;
6375 struct sljit_label *empty_match_found;
6376 struct sljit_label *empty_match_fallback;
6377 struct sljit_jump *alloc_error;
6378 struct sljit_jump *reqbyte_notfound = NULL;
6379 struct sljit_jump *empty_match;
6381 SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
6382 study = extra->study_data;
6384 if (!tables)
6385 tables = PRIV(default_tables);
6387 memset(&rootfallback, 0, sizeof(fallback_common));
6388 rootfallback.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
6390 common->compiler = NULL;
6391 common->start = rootfallback.cc;
6392 common->cbraptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_w);
6393 common->fcc = tables + fcc_offset;
6394 common->lcc = (sljit_w)(tables + lcc_offset);
6395 common->nltype = NLTYPE_FIXED;
6396 switch(re->options & PCRE_NEWLINE_BITS)
6398 case 0:
6399 /* Compile-time default */
6400 switch (NEWLINE)
6402 case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
6403 case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
6404 default: common->newline = NEWLINE; break;
6406 break;
6407 case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
6408 case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
6409 case PCRE_NEWLINE_CR+
6410 PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
6411 case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
6412 case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
6413 default: return;
6415 if ((re->options & PCRE_BSR_ANYCRLF) != 0)
6416 common->bsr_nltype = NLTYPE_ANYCRLF;
6417 else if ((re->options & PCRE_BSR_UNICODE) != 0)
6418 common->bsr_nltype = NLTYPE_ANY;
6419 else
6421 #ifdef BSR_ANYCRLF
6422 common->bsr_nltype = NLTYPE_ANYCRLF;
6423 #else
6424 common->bsr_nltype = NLTYPE_ANY;
6425 #endif
6427 common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6428 common->ctypes = (sljit_w)(tables + ctypes_offset);
6429 common->name_table = (sljit_w)((pcre_uchar *)re + re->name_table_offset);
6430 common->name_count = re->name_count;
6431 common->name_entry_size = re->name_entry_size;
6432 common->acceptlabel = NULL;
6433 common->stubs = NULL;
6434 common->entries = NULL;
6435 common->currententry = NULL;
6436 common->accept = NULL;
6437 common->calllimit = NULL;
6438 common->stackalloc = NULL;
6439 common->revertframes = NULL;
6440 common->wordboundary = NULL;
6441 common->anynewline = NULL;
6442 common->hspace = NULL;
6443 common->vspace = NULL;
6444 common->casefulcmp = NULL;
6445 common->caselesscmp = NULL;
6446 common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6447 #ifdef SUPPORT_UTF
6448 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
6449 common->utf = (re->options & PCRE_UTF8) != 0;
6450 #ifdef SUPPORT_UCP
6451 common->use_ucp = (re->options & PCRE_UCP) != 0;
6452 #endif
6453 common->utfreadchar = NULL;
6454 #ifdef COMPILE_PCRE8
6455 common->utfreadtype8 = NULL;
6456 #endif
6457 #endif /* SUPPORT_UTF */
6458 #ifdef SUPPORT_UCP
6459 common->getucd = NULL;
6460 #endif
6461 ccend = bracketend(rootfallback.cc);
6462 SLJIT_ASSERT(*rootfallback.cc == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
6463 common->localsize = get_localspace(common, rootfallback.cc, ccend);
6464 if (common->localsize < 0)
6465 return;
6466 common->localsize += common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w);
6467 if (common->localsize > SLJIT_MAX_LOCAL_SIZE)
6468 return;
6469 common->localptrs = (int*)SLJIT_MALLOC((ccend - rootfallback.cc) * sizeof(int));
6470 if (!common->localptrs)
6471 return;
6472 memset(common->localptrs, 0, (ccend - rootfallback.cc) * sizeof(int));
6473 set_localptrs(common, common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w), ccend);
6475 compiler = sljit_create_compiler();
6476 if (!compiler)
6478 SLJIT_FREE(common->localptrs);
6479 return;
6481 common->compiler = compiler;
6483 /* Main pcre_jit_exec entry. */
6484 sljit_emit_enter(compiler, 1, 5, 5, common->localsize);
6486 /* Register init. */
6487 reset_ovector(common, (re->top_bracket + 1) * 2);
6488 if ((re->flags & PCRE_REQCHSET) != 0)
6489 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR, SLJIT_TEMPORARY_REG1, 0);
6491 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_SAVED_REG1, 0);
6492 OP1(SLJIT_MOV, TMP1, 0, SLJIT_SAVED_REG1, 0);
6493 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6494 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
6495 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
6496 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, calllimit));
6497 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
6498 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
6499 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT, TMP1, 0);
6501 /* Main part of the matching */
6502 if ((re->options & PCRE_ANCHORED) == 0)
6504 mainloop = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
6505 /* Forward search if possible. */
6506 if ((re->flags & PCRE_FIRSTSET) != 0)
6507 fast_forward_first_char(common, re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);
6508 else if ((re->flags & PCRE_STARTLINE) != 0)
6509 fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
6510 else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
6511 fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
6513 if ((re->flags & PCRE_REQCHSET) != 0)
6514 reqbyte_notfound = search_requested_char(common, re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
6516 /* Store the current STR_PTR in OVECTOR(0). */
6517 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
6518 /* Copy the limit of allowed recursions. */
6519 OP1(SLJIT_MOV, CALL_COUNT, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT);
6521 compile_hotpath(common, rootfallback.cc, ccend, &rootfallback);
6522 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6524 sljit_free_compiler(compiler);
6525 SLJIT_FREE(common->localptrs);
6526 return;
6529 empty_match = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6530 empty_match_found = LABEL();
6532 common->acceptlabel = LABEL();
6533 if (common->accept != NULL)
6534 set_jumps(common->accept, common->acceptlabel);
6536 /* This means we have a match. Update the ovector. */
6537 copy_ovector(common, re->top_bracket + 1);
6538 leave = LABEL();
6539 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
6541 empty_match_fallback = LABEL();
6542 compile_fallbackpath(common, rootfallback.top);
6543 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6545 sljit_free_compiler(compiler);
6546 SLJIT_FREE(common->localptrs);
6547 return;
6550 SLJIT_ASSERT(rootfallback.prev == NULL);
6552 /* Check we have remaining characters. */
6553 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6555 if ((re->options & PCRE_ANCHORED) == 0)
6557 if ((re->options & PCRE_FIRSTLINE) == 0)
6559 if (study != NULL && study->minlength > 1)
6561 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
6562 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_END, 0, mainloop);
6564 else
6565 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
6567 else
6569 if (study != NULL && study->minlength > 1)
6571 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
6572 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
6573 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER);
6574 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
6575 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_GREATER_EQUAL);
6576 JUMPTO(SLJIT_C_ZERO, mainloop);
6578 else
6579 CMPTO(SLJIT_C_LESS, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, mainloop);
6583 if (reqbyte_notfound != NULL)
6584 JUMPHERE(reqbyte_notfound);
6585 /* Copy OVECTOR(1) to OVECTOR(0) */
6586 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6587 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
6588 JUMPTO(SLJIT_JUMP, leave);
6590 flush_stubs(common);
6592 JUMPHERE(empty_match);
6593 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6594 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
6595 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_fallback);
6596 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
6597 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found);
6598 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6599 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found);
6600 JUMPTO(SLJIT_JUMP, empty_match_fallback);
6602 common->currententry = common->entries;
6603 while (common->currententry != NULL)
6605 /* Might add new entries. */
6606 compile_recurse(common);
6607 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6609 sljit_free_compiler(compiler);
6610 SLJIT_FREE(common->localptrs);
6611 return;
6613 flush_stubs(common);
6614 common->currententry = common->currententry->next;
6617 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
6618 /* This is a (really) rare case. */
6619 set_jumps(common->stackalloc, LABEL());
6620 /* RETURN_ADDR is not a saved register. */
6621 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize);
6622 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
6623 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6624 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
6625 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
6626 OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
6628 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
6629 alloc_error = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6630 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6631 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
6632 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top));
6633 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit));
6634 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
6635 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
6637 /* Allocation failed. */
6638 JUMPHERE(alloc_error);
6639 /* We break the return address cache here, but this is a really rare case. */
6640 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
6641 JUMPTO(SLJIT_JUMP, leave);
6643 /* Call limit reached. */
6644 set_jumps(common->calllimit, LABEL());
6645 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
6646 JUMPTO(SLJIT_JUMP, leave);
6648 if (common->revertframes != NULL)
6650 set_jumps(common->revertframes, LABEL());
6651 do_revertframes(common);
6653 if (common->wordboundary != NULL)
6655 set_jumps(common->wordboundary, LABEL());
6656 check_wordboundary(common);
6658 if (common->anynewline != NULL)
6660 set_jumps(common->anynewline, LABEL());
6661 check_anynewline(common);
6663 if (common->hspace != NULL)
6665 set_jumps(common->hspace, LABEL());
6666 check_hspace(common);
6668 if (common->vspace != NULL)
6670 set_jumps(common->vspace, LABEL());
6671 check_vspace(common);
6673 if (common->casefulcmp != NULL)
6675 set_jumps(common->casefulcmp, LABEL());
6676 do_casefulcmp(common);
6678 if (common->caselesscmp != NULL)
6680 set_jumps(common->caselesscmp, LABEL());
6681 do_caselesscmp(common);
6683 #ifdef SUPPORT_UTF
6684 if (common->utfreadchar != NULL)
6686 set_jumps(common->utfreadchar, LABEL());
6687 do_utfreadchar(common);
6689 #ifdef COMPILE_PCRE8
6690 if (common->utfreadtype8 != NULL)
6692 set_jumps(common->utfreadtype8, LABEL());
6693 do_utfreadtype8(common);
6695 #endif
6696 #endif /* COMPILE_PCRE8 */
6697 #ifdef SUPPORT_UCP
6698 if (common->getucd != NULL)
6700 set_jumps(common->getucd, LABEL());
6701 do_getucd(common);
6703 #endif
6705 SLJIT_FREE(common->localptrs);
6706 executable_func = sljit_generate_code(compiler);
6707 executable_size = sljit_get_generated_code_size(compiler);
6708 sljit_free_compiler(compiler);
6709 if (executable_func == NULL)
6710 return;
6712 function = SLJIT_MALLOC(sizeof(executable_function));
6713 if (function == NULL)
6715 /* This case is highly unlikely since we just recently
6716 freed a lot of memory. Although not impossible. */
6717 sljit_free_code(executable_func);
6718 return;
6721 function->executable_func = executable_func;
6722 function->executable_size = executable_size;
6723 function->callback = NULL;
6724 function->userdata = NULL;
6725 extra->executable_jit = function;
6726 extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
6729 static int jit_machine_stack_exec(jit_arguments *arguments, executable_function *function)
6731 union {
6732 void* executable_func;
6733 jit_function call_executable_func;
6734 } convert_executable_func;
6735 pcre_uint8 local_area[LOCAL_SPACE_SIZE];
6736 struct sljit_stack local_stack;
6738 local_stack.top = (sljit_w)&local_area;
6739 local_stack.base = local_stack.top;
6740 local_stack.limit = local_stack.base + LOCAL_SPACE_SIZE;
6741 local_stack.max_limit = local_stack.limit;
6742 arguments->stack = &local_stack;
6743 convert_executable_func.executable_func = function->executable_func;
6744 return convert_executable_func.call_executable_func(arguments);
6748 PRIV(jit_exec)(const REAL_PCRE *re, void *executable_func,
6749 const pcre_uchar *subject, int length, int start_offset, int options,
6750 int match_limit, int *offsets, int offsetcount)
6752 executable_function *function = (executable_function*)executable_func;
6753 union {
6754 void* executable_func;
6755 jit_function call_executable_func;
6756 } convert_executable_func;
6757 jit_arguments arguments;
6758 int maxoffsetcount;
6759 int retval;
6761 /* Sanity checks should be handled by pcre_exec. */
6762 arguments.stack = NULL;
6763 arguments.str = subject + start_offset;
6764 arguments.begin = subject;
6765 arguments.end = subject + length;
6766 arguments.calllimit = match_limit; /* JIT decreases this value less times. */
6767 arguments.notbol = (options & PCRE_NOTBOL) != 0;
6768 arguments.noteol = (options & PCRE_NOTEOL) != 0;
6769 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
6770 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
6771 arguments.offsets = offsets;
6773 /* pcre_exec() rounds offsetcount to a multiple of 3, and then uses only 2/3 of
6774 the output vector for storing captured strings, with the remainder used as
6775 workspace. We don't need the workspace here. For compatibility, we limit the
6776 number of captured strings in the same way as pcre_exec(), so that the user
6777 gets the same result with and without JIT. */
6779 if (offsetcount != 2)
6780 offsetcount = ((offsetcount - (offsetcount % 3)) * 2) / 3;
6781 maxoffsetcount = (re->top_bracket + 1) * 2;
6782 if (offsetcount > maxoffsetcount)
6783 offsetcount = maxoffsetcount;
6784 arguments.offsetcount = offsetcount;
6786 if (function->callback)
6787 arguments.stack = (struct sljit_stack*)function->callback(function->userdata);
6788 else
6789 arguments.stack = (struct sljit_stack*)function->userdata;
6791 if (arguments.stack == NULL)
6792 retval = jit_machine_stack_exec(&arguments, function);
6793 else
6795 convert_executable_func.executable_func = function->executable_func;
6796 retval = convert_executable_func.call_executable_func(&arguments);
6799 if (retval * 2 > offsetcount)
6800 retval = 0;
6801 return retval;
6804 void
6805 PRIV(jit_free)(void *executable_func)
6807 executable_function *function = (executable_function*)executable_func;
6808 sljit_free_code(function->executable_func);
6809 SLJIT_FREE(function);
6813 PRIV(jit_get_size)(void *executable_func)
6815 return ((executable_function*)executable_func)->executable_size;
6818 const char*
6819 PRIV(jit_get_target)(void)
6821 return sljit_get_platform_name();
6824 #ifdef COMPILE_PCRE8
6825 PCRE_EXP_DECL pcre_jit_stack *
6826 pcre_jit_stack_alloc(int startsize, int maxsize)
6827 #else
6828 PCRE_EXP_DECL pcre16_jit_stack *
6829 pcre16_jit_stack_alloc(int startsize, int maxsize)
6830 #endif
6832 if (startsize < 1 || maxsize < 1)
6833 return NULL;
6834 if (startsize > maxsize)
6835 startsize = maxsize;
6836 startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
6837 maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
6838 return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize);
6841 #ifdef COMPILE_PCRE8
6842 PCRE_EXP_DECL void
6843 pcre_jit_stack_free(pcre_jit_stack *stack)
6844 #else
6845 PCRE_EXP_DECL void
6846 pcre16_jit_stack_free(pcre16_jit_stack *stack)
6847 #endif
6849 sljit_free_stack((struct sljit_stack*)stack);
6852 #ifdef COMPILE_PCRE8
6853 PCRE_EXP_DECL void
6854 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
6855 #else
6856 PCRE_EXP_DECL void
6857 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
6858 #endif
6860 executable_function *function;
6861 if (extra != NULL &&
6862 (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
6863 extra->executable_jit != NULL)
6865 function = (executable_function*)extra->executable_jit;
6866 function->callback = callback;
6867 function->userdata = userdata;
6871 #else /* SUPPORT_JIT */
6873 /* These are dummy functions to avoid linking errors when JIT support is not
6874 being compiled. */
6876 #ifdef COMPILE_PCRE8
6877 PCRE_EXP_DECL pcre_jit_stack *
6878 pcre_jit_stack_alloc(int startsize, int maxsize)
6879 #else
6880 PCRE_EXP_DECL pcre16_jit_stack *
6881 pcre16_jit_stack_alloc(int startsize, int maxsize)
6882 #endif
6884 (void)startsize;
6885 (void)maxsize;
6886 return NULL;
6889 #ifdef COMPILE_PCRE8
6890 PCRE_EXP_DECL void
6891 pcre_jit_stack_free(pcre_jit_stack *stack)
6892 #else
6893 PCRE_EXP_DECL void
6894 pcre16_jit_stack_free(pcre16_jit_stack *stack)
6895 #endif
6897 (void)stack;
6900 #ifdef COMPILE_PCRE8
6901 PCRE_EXP_DECL void
6902 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
6903 #else
6904 PCRE_EXP_DECL void
6905 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
6906 #endif
6908 (void)extra;
6909 (void)callback;
6910 (void)userdata;
6913 #endif
6915 /* End of pcre_jit_compile.c */