1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
8 Written by Philip Hazel
9 Copyright (c) 1997-2010 University of Cambridge
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
41 /* This module contains pcre_exec(), the externally visible function that does
42 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43 possible. There are also some static supporting functions. */
49 #define NLBLOCK md /* Block containing newline information */
50 #define PSSTART start_subject /* Field containing processed string start */
51 #define PSEND end_subject /* Field containing processed string end */
53 #include "pcre_internal.h"
55 /* Undefine some potentially clashing cpp symbols */
60 /* Flag bits for the match() function */
62 #define match_condassert 0x01 /* Called to check a condition assertion */
63 #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
65 /* Non-error returns from the match() function. Error returns are externally
66 defined PCRE_ERROR_xxx codes, which are all negative. */
69 #define MATCH_NOMATCH 0
71 /* Special internal returns from the match() function. Make them sufficiently
72 negative to avoid the external error codes. */
74 #define MATCH_ACCEPT (-999)
75 #define MATCH_COMMIT (-998)
76 #define MATCH_PRUNE (-997)
77 #define MATCH_SKIP (-996)
78 #define MATCH_SKIP_ARG (-995)
79 #define MATCH_THEN (-994)
81 /* This is a convenience macro for code that occurs many times. */
83 #define MRRETURN(ra) \
89 /* Maximum number of ints of offset to save on the stack for recursive calls.
90 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91 because the offset vector is always a multiple of 3 long. */
93 #define REC_STACK_SAVE_MAX 30
95 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
97 static const char rep_min
[] = { 0, 0, 1, 1, 0, 0 };
98 static const char rep_max
[] = { 0, 0, 0, 0, 1, 1 };
103 /*************************************************
104 * Debugging function to print chars *
105 *************************************************/
107 /* Print a sequence of chars in printable format, stopping at the end of the
108 subject if the requested.
111 p points to characters
112 length number to print
113 is_subject TRUE if printing from within md->start_subject
114 md pointer to matching data block, if is_subject is TRUE
120 pchars(const uschar
*p
, int length
, BOOL is_subject
, match_data
*md
)
123 if (is_subject
&& length
> md
->end_subject
- p
) length
= md
->end_subject
- p
;
125 if (isprint(c
= *(p
++))) printf("%c", c
); else printf("\\x%02x", c
);
131 /*************************************************
132 * Match a back-reference *
133 *************************************************/
135 /* If a back reference hasn't been set, the length that is passed is greater
136 than the number of characters left in the string, so the match fails.
139 offset index into the offset vector
140 eptr points into the subject
141 length length to be matched
142 md points to match data block
145 Returns: TRUE if matched
149 match_ref(int offset
, register USPTR eptr
, int length
, match_data
*md
,
150 unsigned long int ims
)
152 USPTR p
= md
->start_subject
+ md
->offset_vector
[offset
];
155 if (eptr
>= md
->end_subject
)
156 printf("matching subject <null>");
159 printf("matching subject ");
160 pchars(eptr
, length
, TRUE
, md
);
162 printf(" against backref ");
163 pchars(p
, length
, FALSE
, md
);
167 /* Always fail if not enough characters left */
169 if (length
> md
->end_subject
- eptr
) return FALSE
;
171 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
172 properly if Unicode properties are supported. Otherwise, we can check only
175 if ((ims
& PCRE_CASELESS
) != 0)
181 USPTR endptr
= eptr
+ length
;
182 while (eptr
< endptr
)
187 if (c
!= d
&& c
!= UCD_OTHERCASE(d
)) return FALSE
;
194 /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
195 is no UCP support. */
198 { if (md
->lcc
[*p
++] != md
->lcc
[*eptr
++]) return FALSE
; }
201 /* In the caseful case, we can just compare the bytes, whether or not we
202 are in UTF-8 mode. */
205 { while (length
-- > 0) if (*p
++ != *eptr
++) return FALSE
; }
212 /***************************************************************************
213 ****************************************************************************
214 RECURSION IN THE match() FUNCTION
216 The match() function is highly recursive, though not every recursive call
217 increases the recursive depth. Nevertheless, some regular expressions can cause
218 it to recurse to a great depth. I was writing for Unix, so I just let it call
219 itself recursively. This uses the stack for saving everything that has to be
220 saved for a recursive call. On Unix, the stack can be large, and this works
223 It turns out that on some non-Unix-like systems there are problems with
224 programs that use a lot of stack. (This despite the fact that every last chip
225 has oodles of memory these days, and techniques for extending the stack have
226 been known for decades.) So....
228 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
229 calls by keeping local variables that need to be preserved in blocks of memory
230 obtained from malloc() instead instead of on the stack. Macros are used to
231 achieve this so that the actual code doesn't look very different to what it
234 The original heap-recursive code used longjmp(). However, it seems that this
235 can be very slow on some operating systems. Following a suggestion from Stan
236 Switzer, the use of longjmp() has been abolished, at the cost of having to
237 provide a unique number for each call to RMATCH. There is no way of generating
238 a sequence of numbers at compile time in C. I have given them names, to make
239 them stand out more clearly.
241 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
242 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
243 tests. Furthermore, not using longjmp() means that local dynamic variables
244 don't have indeterminate values; this has meant that the frame size can be
245 reduced because the result can be "passed back" by straight setting of the
246 variable instead of being passed in the frame.
247 ****************************************************************************
248 ***************************************************************************/
250 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
251 below must be updated in sync. */
253 enum { RM1
=1, RM2
, RM3
, RM4
, RM5
, RM6
, RM7
, RM8
, RM9
, RM10
,
254 RM11
, RM12
, RM13
, RM14
, RM15
, RM16
, RM17
, RM18
, RM19
, RM20
,
255 RM21
, RM22
, RM23
, RM24
, RM25
, RM26
, RM27
, RM28
, RM29
, RM30
,
256 RM31
, RM32
, RM33
, RM34
, RM35
, RM36
, RM37
, RM38
, RM39
, RM40
,
257 RM41
, RM42
, RM43
, RM44
, RM45
, RM46
, RM47
, RM48
, RM49
, RM50
,
258 RM51
, RM52
, RM53
, RM54
, RM55
, RM56
, RM57
, RM58
, RM59
, RM60
,
261 /* These versions of the macros use the stack, as normal. There are debugging
262 versions and production versions. Note that the "rw" argument of RMATCH isn't
263 actually used in this definition. */
266 #define REGISTER register
269 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
271 printf("match() called in line %d\n", __LINE__); \
272 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
273 printf("to line %d\n", __LINE__); \
275 #define RRETURN(ra) \
277 printf("match() returned %d from line %d ", ra, __LINE__); \
281 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
282 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
283 #define RRETURN(ra) return ra
289 /* These versions of the macros manage a private stack on the heap. Note that
290 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
291 argument of match(), which never changes. */
295 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
297 heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
298 if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
299 frame->Xwhere = rw; \
300 newframe->Xeptr = ra;\
301 newframe->Xecode = rb;\
302 newframe->Xmstart = mstart;\
303 newframe->Xmarkptr = markptr;\
304 newframe->Xoffset_top = rc;\
305 newframe->Xims = re;\
306 newframe->Xeptrb = rf;\
307 newframe->Xflags = rg;\
308 newframe->Xrdepth = frame->Xrdepth + 1;\
309 newframe->Xprevframe = frame;\
311 DPRINTF(("restarting from line %d\n", __LINE__));\
314 DPRINTF(("jumped back to line %d\n", __LINE__));\
319 heapframe *oldframe = frame;\
320 frame = oldframe->Xprevframe;\
321 (pcre_stack_free)(oldframe);\
331 /* Structure for remembering the local variables in a private frame */
333 typedef struct heapframe
{
334 struct heapframe
*Xprevframe
;
336 /* Function arguments that may change */
339 const uschar
*Xecode
;
346 unsigned int Xrdepth
;
348 /* Function local variables */
360 recursion_info Xnew_recursive
;
366 unsigned long int Xoriginal_ims
;
371 int Xprop_fail_result
;
389 int Xsave_capture_last
;
390 int Xsave_offset1
, Xsave_offset2
, Xsave_offset3
;
391 int Xstacksave
[REC_STACK_SAVE_MAX
];
395 /* Where to jump back to */
404 /***************************************************************************
405 ***************************************************************************/
409 /*************************************************
410 * Match from current position *
411 *************************************************/
413 /* This function is called recursively in many circumstances. Whenever it
414 returns a negative (error) response, the outer incarnation must also return the
417 /* These macros pack up tests that are used for partial matching, and which
418 appears several times in the code. We set the "hit end" flag if the pointer is
419 at the end of the subject and also past the start of the subject (i.e.
420 something has been matched). For hard partial matching, we then return
421 immediately. The second one is used when we already know we are past the end of
424 #define CHECK_PARTIAL()\
425 if (md->partial != 0 && eptr >= md->end_subject && \
426 eptr > md->start_used_ptr) \
429 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
432 #define SCHECK_PARTIAL()\
433 if (md->partial != 0 && eptr > md->start_used_ptr) \
436 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
440 /* Performance note: It might be tempting to extract commonly used fields from
441 the md structure (e.g. utf8, end_subject) into individual variables to improve
442 performance. Tests using gcc on a SPARC disproved this; in the first case, it
443 made performance worse.
446 eptr pointer to current character in subject
447 ecode pointer to current position in compiled code
448 mstart pointer to the current match start position (can be modified
450 markptr pointer to the most recent MARK name, or NULL
451 offset_top current top pointer
452 md pointer to "static" info for the match
453 ims current /i, /m, and /s options
454 eptrb pointer to chain of blocks containing eptr at start of
455 brackets - for testing for empty matches
457 match_condassert - this is an assertion condition
458 match_cbegroup - this is the start of an unlimited repeat
459 group that can match an empty string
460 rdepth the recursion depth
462 Returns: MATCH_MATCH if matched ) these values are >= 0
463 MATCH_NOMATCH if failed to match )
464 a negative MATCH_xxx value for PRUNE, SKIP, etc
465 a negative PCRE_ERROR_xxx value if aborted by an error condition
466 (e.g. stopped by repeated call or recursion limit)
470 match(REGISTER USPTR eptr
, REGISTER
const uschar
*ecode
, USPTR mstart
,
471 const uschar
*markptr
, int offset_top
, match_data
*md
, unsigned long int ims
,
472 eptrblock
*eptrb
, int flags
, unsigned int rdepth
)
474 /* These variables do not need to be preserved over recursion in this function,
475 so they can be ordinary variables in all cases. Mark some of them with
476 "register" because they are used a lot in loops. */
478 register int rrc
; /* Returns from recursive calls */
479 register int i
; /* Used for loops not involving calls to RMATCH() */
480 register unsigned int c
; /* Character values not kept over RMATCH() calls */
481 register BOOL utf8
; /* Local copy of UTF-8 flag for speed */
483 BOOL minimize
, possessive
; /* Quantifier options */
486 /* When recursion is not being used, all "local" variables that have to be
487 preserved over calls to RMATCH() are part of a "frame" which is obtained from
488 heap storage. Set up the top-level frame here; others are obtained from the
489 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
492 heapframe
*frame
= (heapframe
*)(pcre_stack_malloc
)(sizeof(heapframe
));
493 if (frame
== NULL
) RRETURN(PCRE_ERROR_NOMEMORY
);
494 frame
->Xprevframe
= NULL
; /* Marks the top level */
496 /* Copy in the original argument variables */
499 frame
->Xecode
= ecode
;
500 frame
->Xmstart
= mstart
;
501 frame
->Xmarkptr
= markptr
;
502 frame
->Xoffset_top
= offset_top
;
504 frame
->Xeptrb
= eptrb
;
505 frame
->Xflags
= flags
;
506 frame
->Xrdepth
= rdepth
;
508 /* This is where control jumps back to to effect "recursion" */
512 /* Macros make the argument variables come from the current frame */
514 #define eptr frame->Xeptr
515 #define ecode frame->Xecode
516 #define mstart frame->Xmstart
517 #define markptr frame->Xmarkptr
518 #define offset_top frame->Xoffset_top
519 #define ims frame->Xims
520 #define eptrb frame->Xeptrb
521 #define flags frame->Xflags
522 #define rdepth frame->Xrdepth
524 /* Ditto for the local variables */
527 #define charptr frame->Xcharptr
529 #define callpat frame->Xcallpat
530 #define codelink frame->Xcodelink
531 #define data frame->Xdata
532 #define next frame->Xnext
533 #define pp frame->Xpp
534 #define prev frame->Xprev
535 #define saved_eptr frame->Xsaved_eptr
537 #define new_recursive frame->Xnew_recursive
539 #define cur_is_word frame->Xcur_is_word
540 #define condition frame->Xcondition
541 #define prev_is_word frame->Xprev_is_word
543 #define original_ims frame->Xoriginal_ims
546 #define prop_type frame->Xprop_type
547 #define prop_value frame->Xprop_value
548 #define prop_fail_result frame->Xprop_fail_result
549 #define prop_category frame->Xprop_category
550 #define prop_chartype frame->Xprop_chartype
551 #define prop_script frame->Xprop_script
552 #define oclength frame->Xoclength
553 #define occhars frame->Xocchars
556 #define ctype frame->Xctype
557 #define fc frame->Xfc
558 #define fi frame->Xfi
559 #define length frame->Xlength
560 #define max frame->Xmax
561 #define min frame->Xmin
562 #define number frame->Xnumber
563 #define offset frame->Xoffset
564 #define op frame->Xop
565 #define save_capture_last frame->Xsave_capture_last
566 #define save_offset1 frame->Xsave_offset1
567 #define save_offset2 frame->Xsave_offset2
568 #define save_offset3 frame->Xsave_offset3
569 #define stacksave frame->Xstacksave
571 #define newptrb frame->Xnewptrb
573 /* When recursion is being used, local variables are allocated on the stack and
574 get preserved during recursion in the normal way. In this environment, fi and
575 i, and fc and c, can be the same variables. */
577 #else /* NO_RECURSE not defined */
582 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
583 const uschar
*charptr
; /* in small blocks of the code. My normal */
584 #endif /* style of coding would have declared */
585 const uschar
*callpat
; /* them within each of those blocks. */
586 const uschar
*data
; /* However, in order to accommodate the */
587 const uschar
*next
; /* version of this code that uses an */
588 USPTR pp
; /* external "stack" implemented on the */
589 const uschar
*prev
; /* heap, it is easier to declare them all */
590 USPTR saved_eptr
; /* here, so the declarations can be cut */
591 /* out in a block. The only declarations */
592 recursion_info new_recursive
; /* within blocks below are for variables */
593 /* that do not have to be preserved over */
594 BOOL cur_is_word
; /* a recursive call to RMATCH(). */
598 unsigned long int original_ims
;
603 int prop_fail_result
;
619 int save_capture_last
;
620 int save_offset1
, save_offset2
, save_offset3
;
621 int stacksave
[REC_STACK_SAVE_MAX
];
624 #endif /* NO_RECURSE */
626 /* These statements are here to stop the compiler complaining about unitialized
631 prop_fail_result
= 0;
635 /* This label is used for tail recursion, which is used in a few cases even
636 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
637 used. Thanks to Ian Taylor for noticing this possibility and sending the
642 /* OK, now we can get on with the real code of the function. Recursive calls
643 are specified by the macro RMATCH and RRETURN is used to return. When
644 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
645 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
646 defined). However, RMATCH isn't like a function call because it's quite a
647 complicated macro. It has to be used in one particular way. This shouldn't,
648 however, impact performance when true recursion is being used. */
651 utf8
= md
->utf8
; /* Local copy of the flag */
656 /* First check that we haven't called match() too many times, or that we
657 haven't exceeded the recursive call limit. */
659 if (md
->match_call_count
++ >= md
->match_limit
) RRETURN(PCRE_ERROR_MATCHLIMIT
);
660 if (rdepth
>= md
->match_limit_recursion
) RRETURN(PCRE_ERROR_RECURSIONLIMIT
);
662 original_ims
= ims
; /* Save for resetting on ')' */
664 /* At the start of a group with an unlimited repeat that may match an empty
665 string, the match_cbegroup flag is set. When this is the case, add the current
666 subject pointer to the chain of such remembered pointers, to be checked when we
667 hit the closing ket, in order to break infinite loops that match no characters.
668 When match() is called in other circumstances, don't add to the chain. The
669 match_cbegroup flag must NOT be used with tail recursion, because the memory
670 block that is used is on the stack, so a new one may be required for each
673 if ((flags
& match_cbegroup
) != 0)
675 newptrb
.epb_saved_eptr
= eptr
;
676 newptrb
.epb_prev
= eptrb
;
680 /* Now start processing the opcodes. */
684 minimize
= possessive
= FALSE
;
691 RMATCH(eptr
, ecode
+ _pcre_OP_lengths
[*ecode
] + ecode
[1], offset_top
, md
,
692 ims
, eptrb
, flags
, RM55
);
694 /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
695 argument, and we must check whether that argument matches this MARK's
696 argument. It is passed back in md->start_match_ptr (an overloading of that
697 variable). If it does match, we reset that variable to the current subject
698 position and return MATCH_SKIP. Otherwise, pass back the return code
701 if (rrc
== MATCH_SKIP_ARG
&&
702 strcmp((char *)markptr
, (char *)(md
->start_match_ptr
)) == 0)
704 md
->start_match_ptr
= eptr
;
708 if (md
->mark
== NULL
) md
->mark
= markptr
;
712 MRRETURN(MATCH_NOMATCH
);
714 /* COMMIT overrides PRUNE, SKIP, and THEN */
717 RMATCH(eptr
, ecode
+ _pcre_OP_lengths
[*ecode
], offset_top
, md
,
718 ims
, eptrb
, flags
, RM52
);
719 if (rrc
!= MATCH_NOMATCH
&& rrc
!= MATCH_PRUNE
&&
720 rrc
!= MATCH_SKIP
&& rrc
!= MATCH_SKIP_ARG
&&
723 MRRETURN(MATCH_COMMIT
);
725 /* PRUNE overrides THEN */
728 RMATCH(eptr
, ecode
+ _pcre_OP_lengths
[*ecode
], offset_top
, md
,
729 ims
, eptrb
, flags
, RM51
);
730 if (rrc
!= MATCH_NOMATCH
&& rrc
!= MATCH_THEN
) RRETURN(rrc
);
731 MRRETURN(MATCH_PRUNE
);
734 RMATCH(eptr
, ecode
+ _pcre_OP_lengths
[*ecode
] + ecode
[1], offset_top
, md
,
735 ims
, eptrb
, flags
, RM56
);
736 if (rrc
!= MATCH_NOMATCH
&& rrc
!= MATCH_THEN
) RRETURN(rrc
);
737 md
->mark
= ecode
+ 2;
738 RRETURN(MATCH_PRUNE
);
740 /* SKIP overrides PRUNE and THEN */
743 RMATCH(eptr
, ecode
+ _pcre_OP_lengths
[*ecode
], offset_top
, md
,
744 ims
, eptrb
, flags
, RM53
);
745 if (rrc
!= MATCH_NOMATCH
&& rrc
!= MATCH_PRUNE
&& rrc
!= MATCH_THEN
)
747 md
->start_match_ptr
= eptr
; /* Pass back current position */
748 MRRETURN(MATCH_SKIP
);
751 RMATCH(eptr
, ecode
+ _pcre_OP_lengths
[*ecode
] + ecode
[1], offset_top
, md
,
752 ims
, eptrb
, flags
, RM57
);
753 if (rrc
!= MATCH_NOMATCH
&& rrc
!= MATCH_PRUNE
&& rrc
!= MATCH_THEN
)
756 /* Pass back the current skip name by overloading md->start_match_ptr and
757 returning the special MATCH_SKIP_ARG return code. This will either be
758 caught by a matching MARK, or get to the top, where it is treated the same
761 md
->start_match_ptr
= ecode
+ 2;
762 RRETURN(MATCH_SKIP_ARG
);
764 /* For THEN (and THEN_ARG) we pass back the address of the bracket or
765 the alt that is at the start of the current branch. This makes it possible
766 to skip back past alternatives that precede the THEN within the current
770 RMATCH(eptr
, ecode
+ _pcre_OP_lengths
[*ecode
], offset_top
, md
,
771 ims
, eptrb
, flags
, RM54
);
772 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
773 md
->start_match_ptr
= ecode
- GET(ecode
, 1);
774 MRRETURN(MATCH_THEN
);
777 RMATCH(eptr
, ecode
+ _pcre_OP_lengths
[*ecode
] + ecode
[1+LINK_SIZE
],
778 offset_top
, md
, ims
, eptrb
, flags
, RM58
);
779 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
780 md
->start_match_ptr
= ecode
- GET(ecode
, 1);
781 md
->mark
= ecode
+ LINK_SIZE
+ 2;
784 /* Handle a capturing bracket. If there is space in the offset vector, save
785 the current subject position in the working slot at the top of the vector.
786 We mustn't change the current values of the data slot, because they may be
787 set from a previous iteration of this group, and be referred to by a
788 reference inside the group.
790 If the bracket fails to match, we need to restore this value and also the
791 values of the final offsets, in case they were set by a previous iteration
794 If there isn't enough space in the offset vector, treat this as if it were
795 a non-capturing bracket. Don't worry about setting the flag for the error
796 case here; that is handled in the code for KET. */
800 number
= GET2(ecode
, 1+LINK_SIZE
);
801 offset
= number
<< 1;
804 printf("start bracket %d\n", number
);
806 pchars(eptr
, 16, TRUE
, md
);
810 if (offset
< md
->offset_max
)
812 save_offset1
= md
->offset_vector
[offset
];
813 save_offset2
= md
->offset_vector
[offset
+1];
814 save_offset3
= md
->offset_vector
[md
->offset_end
- number
];
815 save_capture_last
= md
->capture_last
;
817 DPRINTF(("saving %d %d %d\n", save_offset1
, save_offset2
, save_offset3
));
818 md
->offset_vector
[md
->offset_end
- number
] =
819 (int)(eptr
- md
->start_subject
);
821 flags
= (op
== OP_SCBRA
)? match_cbegroup
: 0;
824 RMATCH(eptr
, ecode
+ _pcre_OP_lengths
[*ecode
], offset_top
, md
,
825 ims
, eptrb
, flags
, RM1
);
826 if (rrc
!= MATCH_NOMATCH
&&
827 (rrc
!= MATCH_THEN
|| md
->start_match_ptr
!= ecode
))
829 md
->capture_last
= save_capture_last
;
830 ecode
+= GET(ecode
, 1);
832 while (*ecode
== OP_ALT
);
834 DPRINTF(("bracket %d failed\n", number
));
836 md
->offset_vector
[offset
] = save_offset1
;
837 md
->offset_vector
[offset
+1] = save_offset2
;
838 md
->offset_vector
[md
->offset_end
- number
] = save_offset3
;
840 if (rrc
!= MATCH_THEN
) md
->mark
= markptr
;
841 RRETURN(MATCH_NOMATCH
);
844 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
845 as a non-capturing bracket. */
847 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
848 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
850 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
852 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
853 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
855 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
856 final alternative within the brackets, we would return the result of a
857 recursive call to match() whatever happened. We can reduce stack usage by
858 turning this into a tail recursion, except in the case when match_cbegroup
863 DPRINTF(("start non-capturing bracket\n"));
864 flags
= (op
>= OP_SBRA
)? match_cbegroup
: 0;
867 if (ecode
[GET(ecode
, 1)] != OP_ALT
) /* Final alternative */
869 if (flags
== 0) /* Not a possibly empty group */
871 ecode
+= _pcre_OP_lengths
[*ecode
];
872 DPRINTF(("bracket 0 tail recursion\n"));
876 /* Possibly empty group; can't use tail recursion. */
878 RMATCH(eptr
, ecode
+ _pcre_OP_lengths
[*ecode
], offset_top
, md
, ims
,
880 if (rrc
== MATCH_NOMATCH
) md
->mark
= markptr
;
884 /* For non-final alternatives, continue the loop for a NOMATCH result;
887 RMATCH(eptr
, ecode
+ _pcre_OP_lengths
[*ecode
], offset_top
, md
, ims
,
889 if (rrc
!= MATCH_NOMATCH
&&
890 (rrc
!= MATCH_THEN
|| md
->start_match_ptr
!= ecode
))
892 ecode
+= GET(ecode
, 1);
894 /* Control never reaches here. */
896 /* Conditional group: compilation checked that there are no more than
897 two branches. If the condition is false, skipping the first branch takes us
898 past the end if there is only one branch, but that's OK because that is
899 exactly what going to the ket would do. As there is only one branch to be
900 obeyed, we can use tail recursion to avoid using another stack frame. */
904 codelink
= GET(ecode
, 1);
906 /* Because of the way auto-callout works during compile, a callout item is
907 inserted between OP_COND and an assertion condition. */
909 if (ecode
[LINK_SIZE
+1] == OP_CALLOUT
)
911 if (pcre_callout
!= NULL
)
913 pcre_callout_block cb
;
914 cb
.version
= 1; /* Version 1 of the callout block */
915 cb
.callout_number
= ecode
[LINK_SIZE
+2];
916 cb
.offset_vector
= md
->offset_vector
;
917 cb
.subject
= (PCRE_SPTR
)md
->start_subject
;
918 cb
.subject_length
= (int)(md
->end_subject
- md
->start_subject
);
919 cb
.start_match
= (int)(mstart
- md
->start_subject
);
920 cb
.current_position
= (int)(eptr
- md
->start_subject
);
921 cb
.pattern_position
= GET(ecode
, LINK_SIZE
+ 3);
922 cb
.next_item_length
= GET(ecode
, 3 + 2*LINK_SIZE
);
923 cb
.capture_top
= offset_top
/2;
924 cb
.capture_last
= md
->capture_last
;
925 cb
.callout_data
= md
->callout_data
;
926 if ((rrc
= (*pcre_callout
)(&cb
)) > 0) MRRETURN(MATCH_NOMATCH
);
927 if (rrc
< 0) RRETURN(rrc
);
929 ecode
+= _pcre_OP_lengths
[OP_CALLOUT
];
932 condcode
= ecode
[LINK_SIZE
+1];
934 /* Now see what the actual condition is */
936 if (condcode
== OP_RREF
|| condcode
== OP_NRREF
) /* Recursion test */
938 if (md
->recursive
== NULL
) /* Not recursing => FALSE */
941 ecode
+= GET(ecode
, 1);
945 int recno
= GET2(ecode
, LINK_SIZE
+ 2); /* Recursion group number*/
946 condition
= (recno
== RREF_ANY
|| recno
== md
->recursive
->group_num
);
948 /* If the test is for recursion into a specific subpattern, and it is
949 false, but the test was set up by name, scan the table to see if the
950 name refers to any other numbers, and test them. The condition is true
951 if any one is set. */
953 if (!condition
&& condcode
== OP_NRREF
&& recno
!= RREF_ANY
)
955 uschar
*slotA
= md
->name_table
;
956 for (i
= 0; i
< md
->name_count
; i
++)
958 if (GET2(slotA
, 0) == recno
) break;
959 slotA
+= md
->name_entry_size
;
962 /* Found a name for the number - there can be only one; duplicate
963 names for different numbers are allowed, but not vice versa. First
964 scan down for duplicates. */
966 if (i
< md
->name_count
)
968 uschar
*slotB
= slotA
;
969 while (slotB
> md
->name_table
)
971 slotB
-= md
->name_entry_size
;
972 if (strcmp((char *)slotA
+ 2, (char *)slotB
+ 2) == 0)
974 condition
= GET2(slotB
, 0) == md
->recursive
->group_num
;
975 if (condition
) break;
980 /* Scan up for duplicates */
985 for (i
++; i
< md
->name_count
; i
++)
987 slotB
+= md
->name_entry_size
;
988 if (strcmp((char *)slotA
+ 2, (char *)slotB
+ 2) == 0)
990 condition
= GET2(slotB
, 0) == md
->recursive
->group_num
;
991 if (condition
) break;
999 /* Chose branch according to the condition */
1001 ecode
+= condition
? 3 : GET(ecode
, 1);
1005 else if (condcode
== OP_CREF
|| condcode
== OP_NCREF
) /* Group used test */
1007 offset
= GET2(ecode
, LINK_SIZE
+2) << 1; /* Doubled ref number */
1008 condition
= offset
< offset_top
&& md
->offset_vector
[offset
] >= 0;
1010 /* If the numbered capture is unset, but the reference was by name,
1011 scan the table to see if the name refers to any other numbers, and test
1012 them. The condition is true if any one is set. This is tediously similar
1013 to the code above, but not close enough to try to amalgamate. */
1015 if (!condition
&& condcode
== OP_NCREF
)
1017 int refno
= offset
>> 1;
1018 uschar
*slotA
= md
->name_table
;
1020 for (i
= 0; i
< md
->name_count
; i
++)
1022 if (GET2(slotA
, 0) == refno
) break;
1023 slotA
+= md
->name_entry_size
;
1026 /* Found a name for the number - there can be only one; duplicate names
1027 for different numbers are allowed, but not vice versa. First scan down
1030 if (i
< md
->name_count
)
1032 uschar
*slotB
= slotA
;
1033 while (slotB
> md
->name_table
)
1035 slotB
-= md
->name_entry_size
;
1036 if (strcmp((char *)slotA
+ 2, (char *)slotB
+ 2) == 0)
1038 offset
= GET2(slotB
, 0) << 1;
1039 condition
= offset
< offset_top
&&
1040 md
->offset_vector
[offset
] >= 0;
1041 if (condition
) break;
1046 /* Scan up for duplicates */
1051 for (i
++; i
< md
->name_count
; i
++)
1053 slotB
+= md
->name_entry_size
;
1054 if (strcmp((char *)slotA
+ 2, (char *)slotB
+ 2) == 0)
1056 offset
= GET2(slotB
, 0) << 1;
1057 condition
= offset
< offset_top
&&
1058 md
->offset_vector
[offset
] >= 0;
1059 if (condition
) break;
1067 /* Chose branch according to the condition */
1069 ecode
+= condition
? 3 : GET(ecode
, 1);
1072 else if (condcode
== OP_DEF
) /* DEFINE - always false */
1075 ecode
+= GET(ecode
, 1);
1078 /* The condition is an assertion. Call match() to evaluate it - setting
1079 the final argument match_condassert causes it to stop at the end of an
1084 RMATCH(eptr
, ecode
+ 1 + LINK_SIZE
, offset_top
, md
, ims
, NULL
,
1085 match_condassert
, RM3
);
1086 if (rrc
== MATCH_MATCH
)
1089 ecode
+= 1 + LINK_SIZE
+ GET(ecode
, LINK_SIZE
+ 2);
1090 while (*ecode
== OP_ALT
) ecode
+= GET(ecode
, 1);
1092 else if (rrc
!= MATCH_NOMATCH
&&
1093 (rrc
!= MATCH_THEN
|| md
->start_match_ptr
!= ecode
))
1095 RRETURN(rrc
); /* Need braces because of following else */
1104 /* We are now at the branch that is to be obeyed. As there is only one,
1105 we can use tail recursion to avoid using another stack frame, except when
1106 match_cbegroup is required for an unlimited repeat of a possibly empty
1107 group. If the second alternative doesn't exist, we can just plough on. */
1109 if (condition
|| *ecode
== OP_ALT
)
1111 ecode
+= 1 + LINK_SIZE
;
1112 if (op
== OP_SCOND
) /* Possibly empty group */
1114 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, match_cbegroup
, RM49
);
1117 else /* Group must match something */
1123 else /* Condition false & no alternative */
1125 ecode
+= 1 + LINK_SIZE
;
1130 /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1131 to close any currently open capturing brackets. */
1134 number
= GET2(ecode
, 1);
1135 offset
= number
<< 1;
1138 printf("end bracket %d at *ACCEPT", number
);
1142 md
->capture_last
= number
;
1143 if (offset
>= md
->offset_max
) md
->offset_overflow
= TRUE
; else
1145 md
->offset_vector
[offset
] =
1146 md
->offset_vector
[md
->offset_end
- number
];
1147 md
->offset_vector
[offset
+1] = (int)(eptr
- md
->start_subject
);
1148 if (offset_top
<= offset
) offset_top
= offset
+ 2;
1154 /* End of the pattern, either real or forced. If we are in a top-level
1155 recursion, we should restore the offsets appropriately and continue from
1160 if (md
->recursive
!= NULL
&& md
->recursive
->group_num
== 0)
1162 recursion_info
*rec
= md
->recursive
;
1163 DPRINTF(("End of pattern in a (?0) recursion\n"));
1164 md
->recursive
= rec
->prevrec
;
1165 memmove(md
->offset_vector
, rec
->offset_save
,
1166 rec
->saved_max
* sizeof(int));
1167 offset_top
= rec
->save_offset_top
;
1169 ecode
= rec
->after_call
;
1173 /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1174 set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1175 the subject. In both cases, backtracking will then try other alternatives,
1178 if (eptr
== mstart
&&
1180 (md
->notempty_atstart
&&
1181 mstart
== md
->start_subject
+ md
->start_offset
)))
1182 MRRETURN(MATCH_NOMATCH
);
1184 /* Otherwise, we have a match. */
1186 md
->end_match_ptr
= eptr
; /* Record where we ended */
1187 md
->end_offset_top
= offset_top
; /* and how many extracts were taken */
1188 md
->start_match_ptr
= mstart
; /* and the start (\K can modify) */
1190 /* For some reason, the macros don't work properly if an expression is
1191 given as the argument to MRRETURN when the heap is in use. */
1193 rrc
= (op
== OP_END
)? MATCH_MATCH
: MATCH_ACCEPT
;
1196 /* Change option settings */
1201 DPRINTF(("ims set to %02lx\n", ims
));
1204 /* Assertion brackets. Check the alternative branches in turn - the
1205 matching won't pass the KET for an assertion. If any one branch matches,
1206 the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1207 start of each branch to move the current point backwards, so the code at
1208 this level is identical to the lookahead case. */
1214 RMATCH(eptr
, ecode
+ 1 + LINK_SIZE
, offset_top
, md
, ims
, NULL
, 0,
1216 if (rrc
== MATCH_MATCH
|| rrc
== MATCH_ACCEPT
)
1218 mstart
= md
->start_match_ptr
; /* In case \K reset it */
1221 if (rrc
!= MATCH_NOMATCH
&&
1222 (rrc
!= MATCH_THEN
|| md
->start_match_ptr
!= ecode
))
1224 ecode
+= GET(ecode
, 1);
1226 while (*ecode
== OP_ALT
);
1227 if (*ecode
== OP_KET
) MRRETURN(MATCH_NOMATCH
);
1229 /* If checking an assertion for a condition, return MATCH_MATCH. */
1231 if ((flags
& match_condassert
) != 0) RRETURN(MATCH_MATCH
);
1233 /* Continue from after the assertion, updating the offsets high water
1234 mark, since extracts may have been taken during the assertion. */
1236 do ecode
+= GET(ecode
,1); while (*ecode
== OP_ALT
);
1237 ecode
+= 1 + LINK_SIZE
;
1238 offset_top
= md
->end_offset_top
;
1241 /* Negative assertion: all branches must fail to match. Encountering SKIP,
1242 PRUNE, or COMMIT means we must assume failure without checking subsequent
1246 case OP_ASSERTBACK_NOT
:
1249 RMATCH(eptr
, ecode
+ 1 + LINK_SIZE
, offset_top
, md
, ims
, NULL
, 0,
1251 if (rrc
== MATCH_MATCH
|| rrc
== MATCH_ACCEPT
) MRRETURN(MATCH_NOMATCH
);
1252 if (rrc
== MATCH_SKIP
|| rrc
== MATCH_PRUNE
|| rrc
== MATCH_COMMIT
)
1254 do ecode
+= GET(ecode
,1); while (*ecode
== OP_ALT
);
1257 if (rrc
!= MATCH_NOMATCH
&&
1258 (rrc
!= MATCH_THEN
|| md
->start_match_ptr
!= ecode
))
1260 ecode
+= GET(ecode
,1);
1262 while (*ecode
== OP_ALT
);
1264 if ((flags
& match_condassert
) != 0) RRETURN(MATCH_MATCH
);
1266 ecode
+= 1 + LINK_SIZE
;
1269 /* Move the subject pointer back. This occurs only at the start of
1270 each branch of a lookbehind assertion. If we are too close to the start to
1271 move back, this match function fails. When working with UTF-8 we move
1272 back a number of characters, not bytes. */
1282 if (eptr
< md
->start_subject
) MRRETURN(MATCH_NOMATCH
);
1289 /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1292 eptr
-= GET(ecode
, 1);
1293 if (eptr
< md
->start_subject
) MRRETURN(MATCH_NOMATCH
);
1296 /* Save the earliest consulted character, then skip to next op code */
1298 if (eptr
< md
->start_used_ptr
) md
->start_used_ptr
= eptr
;
1299 ecode
+= 1 + LINK_SIZE
;
1302 /* The callout item calls an external function, if one is provided, passing
1303 details of the match so far. This is mainly for debugging, though the
1304 function is able to force a failure. */
1307 if (pcre_callout
!= NULL
)
1309 pcre_callout_block cb
;
1310 cb
.version
= 1; /* Version 1 of the callout block */
1311 cb
.callout_number
= ecode
[1];
1312 cb
.offset_vector
= md
->offset_vector
;
1313 cb
.subject
= (PCRE_SPTR
)md
->start_subject
;
1314 cb
.subject_length
= (int)(md
->end_subject
- md
->start_subject
);
1315 cb
.start_match
= (int)(mstart
- md
->start_subject
);
1316 cb
.current_position
= (int)(eptr
- md
->start_subject
);
1317 cb
.pattern_position
= GET(ecode
, 2);
1318 cb
.next_item_length
= GET(ecode
, 2 + LINK_SIZE
);
1319 cb
.capture_top
= offset_top
/2;
1320 cb
.capture_last
= md
->capture_last
;
1321 cb
.callout_data
= md
->callout_data
;
1322 if ((rrc
= (*pcre_callout
)(&cb
)) > 0) MRRETURN(MATCH_NOMATCH
);
1323 if (rrc
< 0) RRETURN(rrc
);
1325 ecode
+= 2 + 2*LINK_SIZE
;
1328 /* Recursion either matches the current regex, or some subexpression. The
1329 offset data is the offset to the starting bracket from the start of the
1330 whole pattern. (This is so that it works from duplicated subpatterns.)
1332 If there are any capturing brackets started but not finished, we have to
1333 save their starting points and reinstate them after the recursion. However,
1334 we don't know how many such there are (offset_top records the completed
1335 total) so we just have to save all the potential data. There may be up to
1336 65535 such values, which is too large to put on the stack, but using malloc
1337 for small numbers seems expensive. As a compromise, the stack is used when
1338 there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1339 is used. A problem is what to do if the malloc fails ... there is no way of
1340 returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1341 values on the stack, and accept that the rest may be wrong.
1343 There are also other values that have to be saved. We use a chained
1344 sequence of blocks that actually live on the stack. Thanks to Robin Houston
1345 for the original version of this logic. */
1349 callpat
= md
->start_code
+ GET(ecode
, 1);
1350 new_recursive
.group_num
= (callpat
== md
->start_code
)? 0 :
1351 GET2(callpat
, 1 + LINK_SIZE
);
1353 /* Add to "recursing stack" */
1355 new_recursive
.prevrec
= md
->recursive
;
1356 md
->recursive
= &new_recursive
;
1358 /* Find where to continue from afterwards */
1360 ecode
+= 1 + LINK_SIZE
;
1361 new_recursive
.after_call
= ecode
;
1363 /* Now save the offset data. */
1365 new_recursive
.saved_max
= md
->offset_end
;
1366 if (new_recursive
.saved_max
<= REC_STACK_SAVE_MAX
)
1367 new_recursive
.offset_save
= stacksave
;
1370 new_recursive
.offset_save
=
1371 (int *)(pcre_malloc
)(new_recursive
.saved_max
* sizeof(int));
1372 if (new_recursive
.offset_save
== NULL
) RRETURN(PCRE_ERROR_NOMEMORY
);
1375 memcpy(new_recursive
.offset_save
, md
->offset_vector
,
1376 new_recursive
.saved_max
* sizeof(int));
1377 new_recursive
.save_offset_top
= offset_top
;
1379 /* OK, now we can do the recursion. For each top-level alternative we
1380 restore the offset and recursion data. */
1382 DPRINTF(("Recursing into group %d\n", new_recursive
.group_num
));
1383 flags
= (*callpat
>= OP_SBRA
)? match_cbegroup
: 0;
1386 RMATCH(eptr
, callpat
+ _pcre_OP_lengths
[*callpat
], offset_top
,
1387 md
, ims
, eptrb
, flags
, RM6
);
1388 if (rrc
== MATCH_MATCH
|| rrc
== MATCH_ACCEPT
)
1390 DPRINTF(("Recursion matched\n"));
1391 md
->recursive
= new_recursive
.prevrec
;
1392 if (new_recursive
.offset_save
!= stacksave
)
1393 (pcre_free
)(new_recursive
.offset_save
);
1394 MRRETURN(MATCH_MATCH
);
1396 else if (rrc
!= MATCH_NOMATCH
&&
1397 (rrc
!= MATCH_THEN
|| md
->start_match_ptr
!= ecode
))
1399 DPRINTF(("Recursion gave error %d\n", rrc
));
1400 if (new_recursive
.offset_save
!= stacksave
)
1401 (pcre_free
)(new_recursive
.offset_save
);
1405 md
->recursive
= &new_recursive
;
1406 memcpy(md
->offset_vector
, new_recursive
.offset_save
,
1407 new_recursive
.saved_max
* sizeof(int));
1408 callpat
+= GET(callpat
, 1);
1410 while (*callpat
== OP_ALT
);
1412 DPRINTF(("Recursion didn't match\n"));
1413 md
->recursive
= new_recursive
.prevrec
;
1414 if (new_recursive
.offset_save
!= stacksave
)
1415 (pcre_free
)(new_recursive
.offset_save
);
1416 MRRETURN(MATCH_NOMATCH
);
1418 /* Control never reaches here */
1420 /* "Once" brackets are like assertion brackets except that after a match,
1421 the point in the subject string is not moved back. Thus there can never be
1422 a move back into the brackets. Friedl calls these "atomic" subpatterns.
1423 Check the alternative branches in turn - the matching won't pass the KET
1424 for this kind of subpattern. If any one branch matches, we carry on as at
1425 the end of a normal bracket, leaving the subject pointer, but resetting
1426 the start-of-match value in case it was changed by \K. */
1434 RMATCH(eptr
, ecode
+ 1 + LINK_SIZE
, offset_top
, md
, ims
, eptrb
, 0, RM7
);
1435 if (rrc
== MATCH_MATCH
) /* Note: _not_ MATCH_ACCEPT */
1437 mstart
= md
->start_match_ptr
;
1440 if (rrc
!= MATCH_NOMATCH
&&
1441 (rrc
!= MATCH_THEN
|| md
->start_match_ptr
!= ecode
))
1443 ecode
+= GET(ecode
,1);
1445 while (*ecode
== OP_ALT
);
1447 /* If hit the end of the group (which could be repeated), fail */
1449 if (*ecode
!= OP_ONCE
&& *ecode
!= OP_ALT
) RRETURN(MATCH_NOMATCH
);
1451 /* Continue as from after the assertion, updating the offsets high water
1452 mark, since extracts may have been taken. */
1454 do ecode
+= GET(ecode
, 1); while (*ecode
== OP_ALT
);
1456 offset_top
= md
->end_offset_top
;
1457 eptr
= md
->end_match_ptr
;
1459 /* For a non-repeating ket, just continue at this level. This also
1460 happens for a repeating ket if no characters were matched in the group.
1461 This is the forcible breaking of infinite loops as implemented in Perl
1462 5.005. If there is an options reset, it will get obeyed in the normal
1463 course of events. */
1465 if (*ecode
== OP_KET
|| eptr
== saved_eptr
)
1467 ecode
+= 1+LINK_SIZE
;
1471 /* The repeating kets try the rest of the pattern or restart from the
1472 preceding bracket, in the appropriate order. The second "call" of match()
1473 uses tail recursion, to avoid using another stack frame. We need to reset
1474 any options that changed within the bracket before re-running it, so
1475 check the next opcode. */
1477 if (ecode
[1+LINK_SIZE
] == OP_OPT
)
1479 ims
= (ims
& ~PCRE_IMS
) | ecode
[4];
1480 DPRINTF(("ims set to %02lx at group repeat\n", ims
));
1483 if (*ecode
== OP_KETRMIN
)
1485 RMATCH(eptr
, ecode
+ 1 + LINK_SIZE
, offset_top
, md
, ims
, eptrb
, 0, RM8
);
1486 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
1491 else /* OP_KETRMAX */
1493 RMATCH(eptr
, prev
, offset_top
, md
, ims
, eptrb
, match_cbegroup
, RM9
);
1494 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
1495 ecode
+= 1 + LINK_SIZE
;
1499 /* Control never gets here */
1501 /* An alternation is the end of a branch; scan along to find the end of the
1502 bracketed group and go to there. */
1505 do ecode
+= GET(ecode
,1); while (*ecode
== OP_ALT
);
1508 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1509 indicating that it may occur zero times. It may repeat infinitely, or not
1510 at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1511 with fixed upper repeat limits are compiled as a number of copies, with the
1512 optional ones preceded by BRAZERO or BRAMINZERO. */
1517 RMATCH(eptr
, next
, offset_top
, md
, ims
, eptrb
, 0, RM10
);
1518 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
1519 do next
+= GET(next
,1); while (*next
== OP_ALT
);
1520 ecode
= next
+ 1 + LINK_SIZE
;
1527 do next
+= GET(next
, 1); while (*next
== OP_ALT
);
1528 RMATCH(eptr
, next
+ 1+LINK_SIZE
, offset_top
, md
, ims
, eptrb
, 0, RM11
);
1529 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
1537 do next
+= GET(next
,1); while (*next
== OP_ALT
);
1538 ecode
= next
+ 1 + LINK_SIZE
;
1542 /* End of a group, repeated or non-repeating. */
1547 prev
= ecode
- GET(ecode
, 1);
1549 /* If this was a group that remembered the subject start, in order to break
1550 infinite repeats of empty string matches, retrieve the subject start from
1551 the chain. Otherwise, set it NULL. */
1553 if (*prev
>= OP_SBRA
)
1555 saved_eptr
= eptrb
->epb_saved_eptr
; /* Value at start of group */
1556 eptrb
= eptrb
->epb_prev
; /* Backup to previous group */
1558 else saved_eptr
= NULL
;
1560 /* If we are at the end of an assertion group or an atomic group, stop
1561 matching and return MATCH_MATCH, but record the current high water mark for
1562 use by positive assertions. We also need to record the match start in case
1563 it was changed by \K. */
1565 if (*prev
== OP_ASSERT
|| *prev
== OP_ASSERT_NOT
||
1566 *prev
== OP_ASSERTBACK
|| *prev
== OP_ASSERTBACK_NOT
||
1569 md
->end_match_ptr
= eptr
; /* For ONCE */
1570 md
->end_offset_top
= offset_top
;
1571 md
->start_match_ptr
= mstart
;
1572 MRRETURN(MATCH_MATCH
);
1575 /* For capturing groups we have to check the group number back at the start
1576 and if necessary complete handling an extraction by setting the offsets and
1577 bumping the high water mark. Note that whole-pattern recursion is coded as
1578 a recurse into group 0, so it won't be picked up here. Instead, we catch it
1579 when the OP_END is reached. Other recursion is handled here. */
1581 if (*prev
== OP_CBRA
|| *prev
== OP_SCBRA
)
1583 number
= GET2(prev
, 1+LINK_SIZE
);
1584 offset
= number
<< 1;
1587 printf("end bracket %d", number
);
1591 md
->capture_last
= number
;
1592 if (offset
>= md
->offset_max
) md
->offset_overflow
= TRUE
; else
1594 md
->offset_vector
[offset
] =
1595 md
->offset_vector
[md
->offset_end
- number
];
1596 md
->offset_vector
[offset
+1] = (int)(eptr
- md
->start_subject
);
1597 if (offset_top
<= offset
) offset_top
= offset
+ 2;
1600 /* Handle a recursively called group. Restore the offsets
1601 appropriately and continue from after the call. */
1603 if (md
->recursive
!= NULL
&& md
->recursive
->group_num
== number
)
1605 recursion_info
*rec
= md
->recursive
;
1606 DPRINTF(("Recursion (%d) succeeded - continuing\n", number
));
1607 md
->recursive
= rec
->prevrec
;
1608 memcpy(md
->offset_vector
, rec
->offset_save
,
1609 rec
->saved_max
* sizeof(int));
1610 offset_top
= rec
->save_offset_top
;
1611 ecode
= rec
->after_call
;
1617 /* For both capturing and non-capturing groups, reset the value of the ims
1618 flags, in case they got changed during the group. */
1621 DPRINTF(("ims reset to %02lx\n", ims
));
1623 /* For a non-repeating ket, just continue at this level. This also
1624 happens for a repeating ket if no characters were matched in the group.
1625 This is the forcible breaking of infinite loops as implemented in Perl
1626 5.005. If there is an options reset, it will get obeyed in the normal
1627 course of events. */
1629 if (*ecode
== OP_KET
|| eptr
== saved_eptr
)
1631 ecode
+= 1 + LINK_SIZE
;
1635 /* The repeating kets try the rest of the pattern or restart from the
1636 preceding bracket, in the appropriate order. In the second case, we can use
1637 tail recursion to avoid using another stack frame, unless we have an
1638 unlimited repeat of a group that can match an empty string. */
1640 flags
= (*prev
>= OP_SBRA
)? match_cbegroup
: 0;
1642 if (*ecode
== OP_KETRMIN
)
1644 RMATCH(eptr
, ecode
+ 1 + LINK_SIZE
, offset_top
, md
, ims
, eptrb
, 0, RM12
);
1645 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
1646 if (flags
!= 0) /* Could match an empty string */
1648 RMATCH(eptr
, prev
, offset_top
, md
, ims
, eptrb
, flags
, RM50
);
1654 else /* OP_KETRMAX */
1656 RMATCH(eptr
, prev
, offset_top
, md
, ims
, eptrb
, flags
, RM13
);
1657 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
1658 ecode
+= 1 + LINK_SIZE
;
1662 /* Control never gets here */
1664 /* Start of subject unless notbol, or after internal newline if multiline */
1667 if (md
->notbol
&& eptr
== md
->start_subject
) MRRETURN(MATCH_NOMATCH
);
1668 if ((ims
& PCRE_MULTILINE
) != 0)
1670 if (eptr
!= md
->start_subject
&&
1671 (eptr
== md
->end_subject
|| !WAS_NEWLINE(eptr
)))
1672 MRRETURN(MATCH_NOMATCH
);
1676 /* ... else fall through */
1678 /* Start of subject assertion */
1681 if (eptr
!= md
->start_subject
) MRRETURN(MATCH_NOMATCH
);
1685 /* Start of match assertion */
1688 if (eptr
!= md
->start_subject
+ md
->start_offset
) MRRETURN(MATCH_NOMATCH
);
1692 /* Reset the start of match point */
1699 /* Assert before internal newline if multiline, or before a terminating
1700 newline unless endonly is set, else end of subject unless noteol is set. */
1703 if ((ims
& PCRE_MULTILINE
) != 0)
1705 if (eptr
< md
->end_subject
)
1706 { if (!IS_NEWLINE(eptr
)) MRRETURN(MATCH_NOMATCH
); }
1709 if (md
->noteol
) MRRETURN(MATCH_NOMATCH
);
1715 else /* Not multiline */
1717 if (md
->noteol
) MRRETURN(MATCH_NOMATCH
);
1718 if (!md
->endonly
) goto ASSERT_NL_OR_EOS
;
1721 /* ... else fall through for endonly */
1723 /* End of subject assertion (\z) */
1726 if (eptr
< md
->end_subject
) MRRETURN(MATCH_NOMATCH
);
1731 /* End of subject or ending \n assertion (\Z) */
1735 if (eptr
< md
->end_subject
&&
1736 (!IS_NEWLINE(eptr
) || eptr
!= md
->end_subject
- md
->nllen
))
1737 MRRETURN(MATCH_NOMATCH
);
1739 /* Either at end of string or \n before end. */
1745 /* Word boundary assertions */
1747 case OP_NOT_WORD_BOUNDARY
:
1748 case OP_WORD_BOUNDARY
:
1751 /* Find out if the previous and current characters are "word" characters.
1752 It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1753 be "non-word" characters. Remember the earliest consulted character for
1754 partial matching. */
1759 /* Get status of previous character */
1761 if (eptr
== md
->start_subject
) prev_is_word
= FALSE
; else
1763 USPTR lastptr
= eptr
- 1;
1764 while((*lastptr
& 0xc0) == 0x80) lastptr
--;
1765 if (lastptr
< md
->start_used_ptr
) md
->start_used_ptr
= lastptr
;
1766 GETCHAR(c
, lastptr
);
1770 if (c
== '_') prev_is_word
= TRUE
; else
1772 int cat
= UCD_CATEGORY(c
);
1773 prev_is_word
= (cat
== ucp_L
|| cat
== ucp_N
);
1778 prev_is_word
= c
< 256 && (md
->ctypes
[c
] & ctype_word
) != 0;
1781 /* Get status of next character */
1783 if (eptr
>= md
->end_subject
)
1786 cur_is_word
= FALSE
;
1794 if (c
== '_') cur_is_word
= TRUE
; else
1796 int cat
= UCD_CATEGORY(c
);
1797 cur_is_word
= (cat
== ucp_L
|| cat
== ucp_N
);
1802 cur_is_word
= c
< 256 && (md
->ctypes
[c
] & ctype_word
) != 0;
1808 /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1809 consistency with the behaviour of \w we do use it in this case. */
1812 /* Get status of previous character */
1814 if (eptr
== md
->start_subject
) prev_is_word
= FALSE
; else
1816 if (eptr
<= md
->start_used_ptr
) md
->start_used_ptr
= eptr
- 1;
1821 if (c
== '_') prev_is_word
= TRUE
; else
1823 int cat
= UCD_CATEGORY(c
);
1824 prev_is_word
= (cat
== ucp_L
|| cat
== ucp_N
);
1829 prev_is_word
= ((md
->ctypes
[eptr
[-1]] & ctype_word
) != 0);
1832 /* Get status of next character */
1834 if (eptr
>= md
->end_subject
)
1837 cur_is_word
= FALSE
;
1844 if (c
== '_') cur_is_word
= TRUE
; else
1846 int cat
= UCD_CATEGORY(c
);
1847 cur_is_word
= (cat
== ucp_L
|| cat
== ucp_N
);
1852 cur_is_word
= ((md
->ctypes
[*eptr
] & ctype_word
) != 0);
1855 /* Now see if the situation is what we want */
1857 if ((*ecode
++ == OP_WORD_BOUNDARY
)?
1858 cur_is_word
== prev_is_word
: cur_is_word
!= prev_is_word
)
1859 MRRETURN(MATCH_NOMATCH
);
1863 /* Match a single character type; inline for speed */
1866 if (IS_NEWLINE(eptr
)) MRRETURN(MATCH_NOMATCH
);
1870 if (eptr
++ >= md
->end_subject
)
1873 MRRETURN(MATCH_NOMATCH
);
1875 if (utf8
) while (eptr
< md
->end_subject
&& (*eptr
& 0xc0) == 0x80) eptr
++;
1879 /* Match a single byte, even in UTF-8 mode. This opcode really does match
1880 any byte, even newline, independent of the setting of PCRE_DOTALL. */
1883 if (eptr
++ >= md
->end_subject
)
1886 MRRETURN(MATCH_NOMATCH
);
1892 if (eptr
>= md
->end_subject
)
1895 MRRETURN(MATCH_NOMATCH
);
1897 GETCHARINCTEST(c
, eptr
);
1902 (md
->ctypes
[c
] & ctype_digit
) != 0
1904 MRRETURN(MATCH_NOMATCH
);
1909 if (eptr
>= md
->end_subject
)
1912 MRRETURN(MATCH_NOMATCH
);
1914 GETCHARINCTEST(c
, eptr
);
1919 (md
->ctypes
[c
] & ctype_digit
) == 0
1921 MRRETURN(MATCH_NOMATCH
);
1925 case OP_NOT_WHITESPACE
:
1926 if (eptr
>= md
->end_subject
)
1929 MRRETURN(MATCH_NOMATCH
);
1931 GETCHARINCTEST(c
, eptr
);
1936 (md
->ctypes
[c
] & ctype_space
) != 0
1938 MRRETURN(MATCH_NOMATCH
);
1943 if (eptr
>= md
->end_subject
)
1946 MRRETURN(MATCH_NOMATCH
);
1948 GETCHARINCTEST(c
, eptr
);
1953 (md
->ctypes
[c
] & ctype_space
) == 0
1955 MRRETURN(MATCH_NOMATCH
);
1959 case OP_NOT_WORDCHAR
:
1960 if (eptr
>= md
->end_subject
)
1963 MRRETURN(MATCH_NOMATCH
);
1965 GETCHARINCTEST(c
, eptr
);
1970 (md
->ctypes
[c
] & ctype_word
) != 0
1972 MRRETURN(MATCH_NOMATCH
);
1977 if (eptr
>= md
->end_subject
)
1980 MRRETURN(MATCH_NOMATCH
);
1982 GETCHARINCTEST(c
, eptr
);
1987 (md
->ctypes
[c
] & ctype_word
) == 0
1989 MRRETURN(MATCH_NOMATCH
);
1994 if (eptr
>= md
->end_subject
)
1997 MRRETURN(MATCH_NOMATCH
);
1999 GETCHARINCTEST(c
, eptr
);
2002 default: MRRETURN(MATCH_NOMATCH
);
2004 if (eptr
< md
->end_subject
&& *eptr
== 0x0a) eptr
++;
2015 if (md
->bsr_anycrlf
) MRRETURN(MATCH_NOMATCH
);
2022 if (eptr
>= md
->end_subject
)
2025 MRRETURN(MATCH_NOMATCH
);
2027 GETCHARINCTEST(c
, eptr
);
2032 case 0x20: /* SPACE */
2033 case 0xa0: /* NBSP */
2034 case 0x1680: /* OGHAM SPACE MARK */
2035 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2036 case 0x2000: /* EN QUAD */
2037 case 0x2001: /* EM QUAD */
2038 case 0x2002: /* EN SPACE */
2039 case 0x2003: /* EM SPACE */
2040 case 0x2004: /* THREE-PER-EM SPACE */
2041 case 0x2005: /* FOUR-PER-EM SPACE */
2042 case 0x2006: /* SIX-PER-EM SPACE */
2043 case 0x2007: /* FIGURE SPACE */
2044 case 0x2008: /* PUNCTUATION SPACE */
2045 case 0x2009: /* THIN SPACE */
2046 case 0x200A: /* HAIR SPACE */
2047 case 0x202f: /* NARROW NO-BREAK SPACE */
2048 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2049 case 0x3000: /* IDEOGRAPHIC SPACE */
2050 MRRETURN(MATCH_NOMATCH
);
2056 if (eptr
>= md
->end_subject
)
2059 MRRETURN(MATCH_NOMATCH
);
2061 GETCHARINCTEST(c
, eptr
);
2064 default: MRRETURN(MATCH_NOMATCH
);
2066 case 0x20: /* SPACE */
2067 case 0xa0: /* NBSP */
2068 case 0x1680: /* OGHAM SPACE MARK */
2069 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2070 case 0x2000: /* EN QUAD */
2071 case 0x2001: /* EM QUAD */
2072 case 0x2002: /* EN SPACE */
2073 case 0x2003: /* EM SPACE */
2074 case 0x2004: /* THREE-PER-EM SPACE */
2075 case 0x2005: /* FOUR-PER-EM SPACE */
2076 case 0x2006: /* SIX-PER-EM SPACE */
2077 case 0x2007: /* FIGURE SPACE */
2078 case 0x2008: /* PUNCTUATION SPACE */
2079 case 0x2009: /* THIN SPACE */
2080 case 0x200A: /* HAIR SPACE */
2081 case 0x202f: /* NARROW NO-BREAK SPACE */
2082 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2083 case 0x3000: /* IDEOGRAPHIC SPACE */
2090 if (eptr
>= md
->end_subject
)
2093 MRRETURN(MATCH_NOMATCH
);
2095 GETCHARINCTEST(c
, eptr
);
2103 case 0x85: /* NEL */
2104 case 0x2028: /* LINE SEPARATOR */
2105 case 0x2029: /* PARAGRAPH SEPARATOR */
2106 MRRETURN(MATCH_NOMATCH
);
2112 if (eptr
>= md
->end_subject
)
2115 MRRETURN(MATCH_NOMATCH
);
2117 GETCHARINCTEST(c
, eptr
);
2120 default: MRRETURN(MATCH_NOMATCH
);
2125 case 0x85: /* NEL */
2126 case 0x2028: /* LINE SEPARATOR */
2127 case 0x2029: /* PARAGRAPH SEPARATOR */
2134 /* Check the next character by Unicode property. We will get here only
2135 if the support is in the binary; otherwise a compile-time error occurs. */
2139 if (eptr
>= md
->end_subject
)
2142 MRRETURN(MATCH_NOMATCH
);
2144 GETCHARINCTEST(c
, eptr
);
2146 int chartype
= UCD_CHARTYPE(c
);
2151 if (op
== OP_NOTPROP
) MRRETURN(MATCH_NOMATCH
);
2155 if ((chartype
== ucp_Lu
||
2156 chartype
== ucp_Ll
||
2157 chartype
== ucp_Lt
) == (op
== OP_NOTPROP
))
2158 MRRETURN(MATCH_NOMATCH
);
2162 if ((ecode
[2] != _pcre_ucp_gentype
[chartype
]) == (op
== OP_PROP
))
2163 MRRETURN(MATCH_NOMATCH
);
2167 if ((ecode
[2] != chartype
) == (op
== OP_PROP
))
2168 MRRETURN(MATCH_NOMATCH
);
2172 if ((ecode
[2] != UCD_SCRIPT(c
)) == (op
== OP_PROP
))
2173 MRRETURN(MATCH_NOMATCH
);
2176 /* These are specials */
2179 if ((_pcre_ucp_gentype
[chartype
] == ucp_L
||
2180 _pcre_ucp_gentype
[chartype
] == ucp_N
) == (op
== OP_NOTPROP
))
2181 MRRETURN(MATCH_NOMATCH
);
2184 case PT_SPACE
: /* Perl space */
2185 if ((_pcre_ucp_gentype
[chartype
] == ucp_Z
||
2186 c
== CHAR_HT
|| c
== CHAR_NL
|| c
== CHAR_FF
|| c
== CHAR_CR
)
2187 == (op
== OP_NOTPROP
))
2188 MRRETURN(MATCH_NOMATCH
);
2191 case PT_PXSPACE
: /* POSIX space */
2192 if ((_pcre_ucp_gentype
[chartype
] == ucp_Z
||
2193 c
== CHAR_HT
|| c
== CHAR_NL
|| c
== CHAR_VT
||
2194 c
== CHAR_FF
|| c
== CHAR_CR
)
2195 == (op
== OP_NOTPROP
))
2196 MRRETURN(MATCH_NOMATCH
);
2200 if ((_pcre_ucp_gentype
[chartype
] == ucp_L
||
2201 _pcre_ucp_gentype
[chartype
] == ucp_N
||
2202 c
== CHAR_UNDERSCORE
) == (op
== OP_NOTPROP
))
2203 MRRETURN(MATCH_NOMATCH
);
2206 /* This should never occur */
2209 RRETURN(PCRE_ERROR_INTERNAL
);
2216 /* Match an extended Unicode sequence. We will get here only if the support
2217 is in the binary; otherwise a compile-time error occurs. */
2220 if (eptr
>= md
->end_subject
)
2223 MRRETURN(MATCH_NOMATCH
);
2225 GETCHARINCTEST(c
, eptr
);
2227 int category
= UCD_CATEGORY(c
);
2228 if (category
== ucp_M
) MRRETURN(MATCH_NOMATCH
);
2229 while (eptr
< md
->end_subject
)
2232 if (!utf8
) c
= *eptr
; else
2234 GETCHARLEN(c
, eptr
, len
);
2236 category
= UCD_CATEGORY(c
);
2237 if (category
!= ucp_M
) break;
2246 /* Match a back reference, possibly repeatedly. Look past the end of the
2247 item to see if there is repeat information following. The code is similar
2248 to that for character classes, but repeated for efficiency. Then obey
2249 similar code to character type repeats - written out again for speed.
2250 However, if the referenced string is the empty string, always treat
2251 it as matched, any number of times (otherwise there could be infinite
2256 offset
= GET2(ecode
, 1) << 1; /* Doubled ref number */
2259 /* If the reference is unset, there are two possibilities:
2261 (a) In the default, Perl-compatible state, set the length to be longer
2262 than the amount of subject left; this ensures that every attempt at a
2263 match fails. We can't just fail here, because of the possibility of
2264 quantifiers with zero minima.
2266 (b) If the JavaScript compatibility flag is set, set the length to zero
2267 so that the back reference matches an empty string.
2269 Otherwise, set the length to the length of what was matched by the
2270 referenced subpattern. */
2272 if (offset
>= offset_top
|| md
->offset_vector
[offset
] < 0)
2273 length
= (md
->jscript_compat
)? 0 : (int)(md
->end_subject
- eptr
+ 1);
2275 length
= md
->offset_vector
[offset
+1] - md
->offset_vector
[offset
];
2277 /* Set up for repetition, or handle the non-repeated case */
2287 c
= *ecode
++ - OP_CRSTAR
;
2288 minimize
= (c
& 1) != 0;
2289 min
= rep_min
[c
]; /* Pick up values from tables; */
2290 max
= rep_max
[c
]; /* zero for max => infinity */
2291 if (max
== 0) max
= INT_MAX
;
2296 minimize
= (*ecode
== OP_CRMINRANGE
);
2297 min
= GET2(ecode
, 1);
2298 max
= GET2(ecode
, 3);
2299 if (max
== 0) max
= INT_MAX
;
2303 default: /* No repeat follows */
2304 if (!match_ref(offset
, eptr
, length
, md
, ims
))
2307 MRRETURN(MATCH_NOMATCH
);
2310 continue; /* With the main loop */
2313 /* If the length of the reference is zero, just continue with the
2316 if (length
== 0) continue;
2318 /* First, ensure the minimum number of matches are present. We get back
2319 the length of the reference string explicitly rather than passing the
2320 address of eptr, so that eptr can be a register variable. */
2322 for (i
= 1; i
<= min
; i
++)
2324 if (!match_ref(offset
, eptr
, length
, md
, ims
))
2327 MRRETURN(MATCH_NOMATCH
);
2332 /* If min = max, continue at the same level without recursion.
2333 They are not both allowed to be zero. */
2335 if (min
== max
) continue;
2337 /* If minimizing, keep trying and advancing the pointer */
2341 for (fi
= min
;; fi
++)
2343 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM14
);
2344 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
2345 if (fi
>= max
) MRRETURN(MATCH_NOMATCH
);
2346 if (!match_ref(offset
, eptr
, length
, md
, ims
))
2349 MRRETURN(MATCH_NOMATCH
);
2353 /* Control never gets here */
2356 /* If maximizing, find the longest string and work backwards */
2361 for (i
= min
; i
< max
; i
++)
2363 if (!match_ref(offset
, eptr
, length
, md
, ims
))
2372 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM15
);
2373 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
2376 MRRETURN(MATCH_NOMATCH
);
2379 /* Control never gets here */
2381 /* Match a bit-mapped character class, possibly repeatedly. This op code is
2382 used when all the characters in the class have values in the range 0-255,
2383 and either the matching is caseful, or the characters are in the range
2384 0-127 when UTF-8 processing is enabled. The only difference between
2385 OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2388 First, look past the end of the item to see if there is repeat information
2389 following. Then obey similar code to character type repeats - written out
2395 data
= ecode
+ 1; /* Save for matching */
2396 ecode
+= 33; /* Advance past the item */
2406 c
= *ecode
++ - OP_CRSTAR
;
2407 minimize
= (c
& 1) != 0;
2408 min
= rep_min
[c
]; /* Pick up values from tables; */
2409 max
= rep_max
[c
]; /* zero for max => infinity */
2410 if (max
== 0) max
= INT_MAX
;
2415 minimize
= (*ecode
== OP_CRMINRANGE
);
2416 min
= GET2(ecode
, 1);
2417 max
= GET2(ecode
, 3);
2418 if (max
== 0) max
= INT_MAX
;
2422 default: /* No repeat follows */
2427 /* First, ensure the minimum number of matches are present. */
2433 for (i
= 1; i
<= min
; i
++)
2435 if (eptr
>= md
->end_subject
)
2438 MRRETURN(MATCH_NOMATCH
);
2440 GETCHARINC(c
, eptr
);
2443 if (op
== OP_CLASS
) MRRETURN(MATCH_NOMATCH
);
2447 if ((data
[c
/8] & (1 << (c
&7))) == 0) MRRETURN(MATCH_NOMATCH
);
2453 /* Not UTF-8 mode */
2455 for (i
= 1; i
<= min
; i
++)
2457 if (eptr
>= md
->end_subject
)
2460 MRRETURN(MATCH_NOMATCH
);
2463 if ((data
[c
/8] & (1 << (c
&7))) == 0) MRRETURN(MATCH_NOMATCH
);
2467 /* If max == min we can continue with the main loop without the
2470 if (min
== max
) continue;
2472 /* If minimizing, keep testing the rest of the expression and advancing
2473 the pointer while it matches the class. */
2481 for (fi
= min
;; fi
++)
2483 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM16
);
2484 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
2485 if (fi
>= max
) MRRETURN(MATCH_NOMATCH
);
2486 if (eptr
>= md
->end_subject
)
2489 MRRETURN(MATCH_NOMATCH
);
2491 GETCHARINC(c
, eptr
);
2494 if (op
== OP_CLASS
) MRRETURN(MATCH_NOMATCH
);
2498 if ((data
[c
/8] & (1 << (c
&7))) == 0) MRRETURN(MATCH_NOMATCH
);
2504 /* Not UTF-8 mode */
2506 for (fi
= min
;; fi
++)
2508 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM17
);
2509 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
2510 if (fi
>= max
) MRRETURN(MATCH_NOMATCH
);
2511 if (eptr
>= md
->end_subject
)
2514 MRRETURN(MATCH_NOMATCH
);
2517 if ((data
[c
/8] & (1 << (c
&7))) == 0) MRRETURN(MATCH_NOMATCH
);
2520 /* Control never gets here */
2523 /* If maximizing, find the longest possible run, then work backwards. */
2533 for (i
= min
; i
< max
; i
++)
2536 if (eptr
>= md
->end_subject
)
2541 GETCHARLEN(c
, eptr
, len
);
2544 if (op
== OP_CLASS
) break;
2548 if ((data
[c
/8] & (1 << (c
&7))) == 0) break;
2554 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM18
);
2555 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
2556 if (eptr
-- == pp
) break; /* Stop if tried at original pos */
2562 /* Not UTF-8 mode */
2564 for (i
= min
; i
< max
; i
++)
2566 if (eptr
>= md
->end_subject
)
2572 if ((data
[c
/8] & (1 << (c
&7))) == 0) break;
2577 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM19
);
2578 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
2583 MRRETURN(MATCH_NOMATCH
);
2586 /* Control never gets here */
2589 /* Match an extended character class. This opcode is encountered only
2590 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2591 mode, because Unicode properties are supported in non-UTF-8 mode. */
2596 data
= ecode
+ 1 + LINK_SIZE
; /* Save for matching */
2597 ecode
+= GET(ecode
, 1); /* Advance past the item */
2607 c
= *ecode
++ - OP_CRSTAR
;
2608 minimize
= (c
& 1) != 0;
2609 min
= rep_min
[c
]; /* Pick up values from tables; */
2610 max
= rep_max
[c
]; /* zero for max => infinity */
2611 if (max
== 0) max
= INT_MAX
;
2616 minimize
= (*ecode
== OP_CRMINRANGE
);
2617 min
= GET2(ecode
, 1);
2618 max
= GET2(ecode
, 3);
2619 if (max
== 0) max
= INT_MAX
;
2623 default: /* No repeat follows */
2628 /* First, ensure the minimum number of matches are present. */
2630 for (i
= 1; i
<= min
; i
++)
2632 if (eptr
>= md
->end_subject
)
2635 MRRETURN(MATCH_NOMATCH
);
2637 GETCHARINCTEST(c
, eptr
);
2638 if (!_pcre_xclass(c
, data
)) MRRETURN(MATCH_NOMATCH
);
2641 /* If max == min we can continue with the main loop without the
2644 if (min
== max
) continue;
2646 /* If minimizing, keep testing the rest of the expression and advancing
2647 the pointer while it matches the class. */
2651 for (fi
= min
;; fi
++)
2653 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM20
);
2654 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
2655 if (fi
>= max
) MRRETURN(MATCH_NOMATCH
);
2656 if (eptr
>= md
->end_subject
)
2659 MRRETURN(MATCH_NOMATCH
);
2661 GETCHARINCTEST(c
, eptr
);
2662 if (!_pcre_xclass(c
, data
)) MRRETURN(MATCH_NOMATCH
);
2664 /* Control never gets here */
2667 /* If maximizing, find the longest possible run, then work backwards. */
2672 for (i
= min
; i
< max
; i
++)
2675 if (eptr
>= md
->end_subject
)
2680 GETCHARLENTEST(c
, eptr
, len
);
2681 if (!_pcre_xclass(c
, data
)) break;
2686 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM21
);
2687 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
2688 if (eptr
-- == pp
) break; /* Stop if tried at original pos */
2689 if (utf8
) BACKCHAR(eptr
);
2691 MRRETURN(MATCH_NOMATCH
);
2694 /* Control never gets here */
2696 #endif /* End of XCLASS */
2698 /* Match a single character, casefully */
2706 GETCHARLEN(fc
, ecode
, length
);
2707 if (length
> md
->end_subject
- eptr
)
2709 CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2710 MRRETURN(MATCH_NOMATCH
);
2712 while (length
-- > 0) if (*ecode
++ != *eptr
++) MRRETURN(MATCH_NOMATCH
);
2717 /* Non-UTF-8 mode */
2719 if (md
->end_subject
- eptr
< 1)
2721 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2722 MRRETURN(MATCH_NOMATCH
);
2724 if (ecode
[1] != *eptr
++) MRRETURN(MATCH_NOMATCH
);
2729 /* Match a single character, caselessly */
2737 GETCHARLEN(fc
, ecode
, length
);
2739 if (length
> md
->end_subject
- eptr
)
2741 CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2742 MRRETURN(MATCH_NOMATCH
);
2745 /* If the pattern character's value is < 128, we have only one byte, and
2746 can use the fast lookup table. */
2750 if (md
->lcc
[*ecode
++] != md
->lcc
[*eptr
++]) MRRETURN(MATCH_NOMATCH
);
2753 /* Otherwise we must pick up the subject character */
2758 GETCHARINC(dc
, eptr
);
2761 /* If we have Unicode property support, we can use it to test the other
2762 case of the character, if there is one. */
2767 if (dc
!= UCD_OTHERCASE(fc
))
2769 MRRETURN(MATCH_NOMATCH
);
2774 #endif /* SUPPORT_UTF8 */
2776 /* Non-UTF-8 mode */
2778 if (md
->end_subject
- eptr
< 1)
2780 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2781 MRRETURN(MATCH_NOMATCH
);
2783 if (md
->lcc
[ecode
[1]] != md
->lcc
[*eptr
++]) MRRETURN(MATCH_NOMATCH
);
2788 /* Match a single character repeatedly. */
2791 min
= max
= GET2(ecode
, 1);
2802 max
= GET2(ecode
, 1);
2803 minimize
= *ecode
== OP_MINUPTO
;
2834 c
= *ecode
++ - OP_STAR
;
2835 minimize
= (c
& 1) != 0;
2837 min
= rep_min
[c
]; /* Pick up values from tables; */
2838 max
= rep_max
[c
]; /* zero for max => infinity */
2839 if (max
== 0) max
= INT_MAX
;
2841 /* Common code for all repeated single-character matches. */
2849 GETCHARLEN(fc
, ecode
, length
);
2852 /* Handle multibyte character matching specially here. There is
2853 support for caseless matching if UCP support is present. */
2858 unsigned int othercase
;
2859 if ((ims
& PCRE_CASELESS
) != 0 &&
2860 (othercase
= UCD_OTHERCASE(fc
)) != fc
)
2861 oclength
= _pcre_ord2utf8(othercase
, occhars
);
2863 #endif /* SUPPORT_UCP */
2865 for (i
= 1; i
<= min
; i
++)
2867 if (eptr
<= md
->end_subject
- length
&&
2868 memcmp(eptr
, charptr
, length
) == 0) eptr
+= length
;
2870 else if (oclength
> 0 &&
2871 eptr
<= md
->end_subject
- oclength
&&
2872 memcmp(eptr
, occhars
, oclength
) == 0) eptr
+= oclength
;
2873 #endif /* SUPPORT_UCP */
2877 MRRETURN(MATCH_NOMATCH
);
2881 if (min
== max
) continue;
2885 for (fi
= min
;; fi
++)
2887 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM22
);
2888 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
2889 if (fi
>= max
) MRRETURN(MATCH_NOMATCH
);
2890 if (eptr
<= md
->end_subject
- length
&&
2891 memcmp(eptr
, charptr
, length
) == 0) eptr
+= length
;
2893 else if (oclength
> 0 &&
2894 eptr
<= md
->end_subject
- oclength
&&
2895 memcmp(eptr
, occhars
, oclength
) == 0) eptr
+= oclength
;
2896 #endif /* SUPPORT_UCP */
2900 MRRETURN(MATCH_NOMATCH
);
2903 /* Control never gets here */
2909 for (i
= min
; i
< max
; i
++)
2911 if (eptr
<= md
->end_subject
- length
&&
2912 memcmp(eptr
, charptr
, length
) == 0) eptr
+= length
;
2914 else if (oclength
> 0 &&
2915 eptr
<= md
->end_subject
- oclength
&&
2916 memcmp(eptr
, occhars
, oclength
) == 0) eptr
+= oclength
;
2917 #endif /* SUPPORT_UCP */
2925 if (possessive
) continue;
2929 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM23
);
2930 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
2931 if (eptr
== pp
) { MRRETURN(MATCH_NOMATCH
); }
2935 #else /* without SUPPORT_UCP */
2937 #endif /* SUPPORT_UCP */
2940 /* Control never gets here */
2943 /* If the length of a UTF-8 character is 1, we fall through here, and
2944 obey the code as for non-UTF-8 characters below, though in this case the
2945 value of fc will always be < 128. */
2948 #endif /* SUPPORT_UTF8 */
2950 /* When not in UTF-8 mode, load a single-byte character. */
2954 /* The value of fc at this point is always less than 256, though we may or
2955 may not be in UTF-8 mode. The code is duplicated for the caseless and
2956 caseful cases, for speed, since matching characters is likely to be quite
2957 common. First, ensure the minimum number of matches are present. If min =
2958 max, continue at the same level without recursing. Otherwise, if
2959 minimizing, keep trying the rest of the expression and advancing one
2960 matching character if failing, up to the maximum. Alternatively, if
2961 maximizing, find the maximum number of characters and work backwards. */
2963 DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc
, min
, max
,
2966 if ((ims
& PCRE_CASELESS
) != 0)
2969 for (i
= 1; i
<= min
; i
++)
2971 if (eptr
>= md
->end_subject
)
2974 MRRETURN(MATCH_NOMATCH
);
2976 if (fc
!= md
->lcc
[*eptr
++]) MRRETURN(MATCH_NOMATCH
);
2978 if (min
== max
) continue;
2981 for (fi
= min
;; fi
++)
2983 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM24
);
2984 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
2985 if (fi
>= max
) MRRETURN(MATCH_NOMATCH
);
2986 if (eptr
>= md
->end_subject
)
2989 MRRETURN(MATCH_NOMATCH
);
2991 if (fc
!= md
->lcc
[*eptr
++]) MRRETURN(MATCH_NOMATCH
);
2993 /* Control never gets here */
2998 for (i
= min
; i
< max
; i
++)
3000 if (eptr
>= md
->end_subject
)
3005 if (fc
!= md
->lcc
[*eptr
]) break;
3009 if (possessive
) continue;
3013 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM25
);
3015 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
3017 MRRETURN(MATCH_NOMATCH
);
3019 /* Control never gets here */
3022 /* Caseful comparisons (includes all multi-byte characters) */
3026 for (i
= 1; i
<= min
; i
++)
3028 if (eptr
>= md
->end_subject
)
3031 MRRETURN(MATCH_NOMATCH
);
3033 if (fc
!= *eptr
++) MRRETURN(MATCH_NOMATCH
);
3036 if (min
== max
) continue;
3040 for (fi
= min
;; fi
++)
3042 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM26
);
3043 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
3044 if (fi
>= max
) MRRETURN(MATCH_NOMATCH
);
3045 if (eptr
>= md
->end_subject
)
3048 MRRETURN(MATCH_NOMATCH
);
3050 if (fc
!= *eptr
++) MRRETURN(MATCH_NOMATCH
);
3052 /* Control never gets here */
3057 for (i
= min
; i
< max
; i
++)
3059 if (eptr
>= md
->end_subject
)
3064 if (fc
!= *eptr
) break;
3067 if (possessive
) continue;
3071 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM27
);
3073 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
3075 MRRETURN(MATCH_NOMATCH
);
3078 /* Control never gets here */
3080 /* Match a negated single one-byte character. The character we are
3081 checking can be multibyte. */
3084 if (eptr
>= md
->end_subject
)
3087 MRRETURN(MATCH_NOMATCH
);
3090 GETCHARINCTEST(c
, eptr
);
3091 if ((ims
& PCRE_CASELESS
) != 0)
3097 if (md
->lcc
[*ecode
++] == c
) MRRETURN(MATCH_NOMATCH
);
3101 if (*ecode
++ == c
) MRRETURN(MATCH_NOMATCH
);
3105 /* Match a negated single one-byte character repeatedly. This is almost a
3106 repeat of the code for a repeated single character, but I haven't found a
3107 nice way of commoning these up that doesn't require a test of the
3108 positive/negative option for each character match. Maybe that wouldn't add
3109 very much to the time taken, but character matching *is* what this is all
3113 min
= max
= GET2(ecode
, 1);
3120 max
= GET2(ecode
, 1);
3121 minimize
= *ecode
== OP_NOTMINUPTO
;
3139 case OP_NOTPOSQUERY
:
3149 max
= GET2(ecode
, 1);
3158 case OP_NOTMINQUERY
:
3159 c
= *ecode
++ - OP_NOTSTAR
;
3160 minimize
= (c
& 1) != 0;
3161 min
= rep_min
[c
]; /* Pick up values from tables; */
3162 max
= rep_max
[c
]; /* zero for max => infinity */
3163 if (max
== 0) max
= INT_MAX
;
3165 /* Common code for all repeated single-byte matches. */
3170 /* The code is duplicated for the caseless and caseful cases, for speed,
3171 since matching characters is likely to be quite common. First, ensure the
3172 minimum number of matches are present. If min = max, continue at the same
3173 level without recursing. Otherwise, if minimizing, keep trying the rest of
3174 the expression and advancing one matching character if failing, up to the
3175 maximum. Alternatively, if maximizing, find the maximum number of
3176 characters and work backwards. */
3178 DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc
, min
, max
,
3181 if ((ims
& PCRE_CASELESS
) != 0)
3189 register unsigned int d
;
3190 for (i
= 1; i
<= min
; i
++)
3192 if (eptr
>= md
->end_subject
)
3195 MRRETURN(MATCH_NOMATCH
);
3197 GETCHARINC(d
, eptr
);
3198 if (d
< 256) d
= md
->lcc
[d
];
3199 if (fc
== d
) MRRETURN(MATCH_NOMATCH
);
3205 /* Not UTF-8 mode */
3207 for (i
= 1; i
<= min
; i
++)
3209 if (eptr
>= md
->end_subject
)
3212 MRRETURN(MATCH_NOMATCH
);
3214 if (fc
== md
->lcc
[*eptr
++]) MRRETURN(MATCH_NOMATCH
);
3218 if (min
== max
) continue;
3226 register unsigned int d
;
3227 for (fi
= min
;; fi
++)
3229 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM28
);
3230 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
3231 if (fi
>= max
) MRRETURN(MATCH_NOMATCH
);
3232 if (eptr
>= md
->end_subject
)
3235 MRRETURN(MATCH_NOMATCH
);
3237 GETCHARINC(d
, eptr
);
3238 if (d
< 256) d
= md
->lcc
[d
];
3239 if (fc
== d
) MRRETURN(MATCH_NOMATCH
);
3244 /* Not UTF-8 mode */
3246 for (fi
= min
;; fi
++)
3248 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM29
);
3249 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
3250 if (fi
>= max
) MRRETURN(MATCH_NOMATCH
);
3251 if (eptr
>= md
->end_subject
)
3254 MRRETURN(MATCH_NOMATCH
);
3256 if (fc
== md
->lcc
[*eptr
++]) MRRETURN(MATCH_NOMATCH
);
3259 /* Control never gets here */
3272 register unsigned int d
;
3273 for (i
= min
; i
< max
; i
++)
3276 if (eptr
>= md
->end_subject
)
3281 GETCHARLEN(d
, eptr
, len
);
3282 if (d
< 256) d
= md
->lcc
[d
];
3286 if (possessive
) continue;
3289 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM30
);
3290 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
3291 if (eptr
-- == pp
) break; /* Stop if tried at original pos */
3297 /* Not UTF-8 mode */
3299 for (i
= min
; i
< max
; i
++)
3301 if (eptr
>= md
->end_subject
)
3306 if (fc
== md
->lcc
[*eptr
]) break;
3309 if (possessive
) continue;
3312 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM31
);
3313 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
3318 MRRETURN(MATCH_NOMATCH
);
3320 /* Control never gets here */
3323 /* Caseful comparisons */
3331 register unsigned int d
;
3332 for (i
= 1; i
<= min
; i
++)
3334 if (eptr
>= md
->end_subject
)
3337 MRRETURN(MATCH_NOMATCH
);
3339 GETCHARINC(d
, eptr
);
3340 if (fc
== d
) MRRETURN(MATCH_NOMATCH
);
3345 /* Not UTF-8 mode */
3347 for (i
= 1; i
<= min
; i
++)
3349 if (eptr
>= md
->end_subject
)
3352 MRRETURN(MATCH_NOMATCH
);
3354 if (fc
== *eptr
++) MRRETURN(MATCH_NOMATCH
);
3358 if (min
== max
) continue;
3366 register unsigned int d
;
3367 for (fi
= min
;; fi
++)
3369 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM32
);
3370 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
3371 if (fi
>= max
) MRRETURN(MATCH_NOMATCH
);
3372 if (eptr
>= md
->end_subject
)
3375 MRRETURN(MATCH_NOMATCH
);
3377 GETCHARINC(d
, eptr
);
3378 if (fc
== d
) MRRETURN(MATCH_NOMATCH
);
3383 /* Not UTF-8 mode */
3385 for (fi
= min
;; fi
++)
3387 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM33
);
3388 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
3389 if (fi
>= max
) MRRETURN(MATCH_NOMATCH
);
3390 if (eptr
>= md
->end_subject
)
3393 MRRETURN(MATCH_NOMATCH
);
3395 if (fc
== *eptr
++) MRRETURN(MATCH_NOMATCH
);
3398 /* Control never gets here */
3411 register unsigned int d
;
3412 for (i
= min
; i
< max
; i
++)
3415 if (eptr
>= md
->end_subject
)
3420 GETCHARLEN(d
, eptr
, len
);
3424 if (possessive
) continue;
3427 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM34
);
3428 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
3429 if (eptr
-- == pp
) break; /* Stop if tried at original pos */
3435 /* Not UTF-8 mode */
3437 for (i
= min
; i
< max
; i
++)
3439 if (eptr
>= md
->end_subject
)
3444 if (fc
== *eptr
) break;
3447 if (possessive
) continue;
3450 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM35
);
3451 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
3456 MRRETURN(MATCH_NOMATCH
);
3459 /* Control never gets here */
3461 /* Match a single character type repeatedly; several different opcodes
3462 share code. This is very similar to the code for single characters, but we
3463 repeat it in the interests of efficiency. */
3466 min
= max
= GET2(ecode
, 1);
3472 case OP_TYPEMINUPTO
:
3474 max
= GET2(ecode
, 1);
3475 minimize
= *ecode
== OP_TYPEMINUPTO
;
3479 case OP_TYPEPOSSTAR
:
3486 case OP_TYPEPOSPLUS
:
3493 case OP_TYPEPOSQUERY
:
3500 case OP_TYPEPOSUPTO
:
3503 max
= GET2(ecode
, 1);
3508 case OP_TYPEMINSTAR
:
3510 case OP_TYPEMINPLUS
:
3512 case OP_TYPEMINQUERY
:
3513 c
= *ecode
++ - OP_TYPESTAR
;
3514 minimize
= (c
& 1) != 0;
3515 min
= rep_min
[c
]; /* Pick up values from tables; */
3516 max
= rep_max
[c
]; /* zero for max => infinity */
3517 if (max
== 0) max
= INT_MAX
;
3519 /* Common code for all repeated single character type matches. Note that
3520 in UTF-8 mode, '.' matches a character of any length, but for the other
3521 character types, the valid characters are all one-byte long. */
3524 ctype
= *ecode
++; /* Code for the character type */
3527 if (ctype
== OP_PROP
|| ctype
== OP_NOTPROP
)
3529 prop_fail_result
= ctype
== OP_NOTPROP
;
3530 prop_type
= *ecode
++;
3531 prop_value
= *ecode
++;
3533 else prop_type
= -1;
3536 /* First, ensure the minimum number of matches are present. Use inline
3537 code for maximizing the speed, and do the type test once at the start
3538 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3539 is tidier. Also separate the UCP code, which can be the same for both UTF-8
3540 and single-bytes. */
3550 if (prop_fail_result
) MRRETURN(MATCH_NOMATCH
);
3551 for (i
= 1; i
<= min
; i
++)
3553 if (eptr
>= md
->end_subject
)
3556 MRRETURN(MATCH_NOMATCH
);
3558 GETCHARINCTEST(c
, eptr
);
3563 for (i
= 1; i
<= min
; i
++)
3565 if (eptr
>= md
->end_subject
)
3568 MRRETURN(MATCH_NOMATCH
);
3570 GETCHARINCTEST(c
, eptr
);
3571 prop_chartype
= UCD_CHARTYPE(c
);
3572 if ((prop_chartype
== ucp_Lu
||
3573 prop_chartype
== ucp_Ll
||
3574 prop_chartype
== ucp_Lt
) == prop_fail_result
)
3575 MRRETURN(MATCH_NOMATCH
);
3580 for (i
= 1; i
<= min
; i
++)
3582 if (eptr
>= md
->end_subject
)
3585 MRRETURN(MATCH_NOMATCH
);
3587 GETCHARINCTEST(c
, eptr
);
3588 prop_category
= UCD_CATEGORY(c
);
3589 if ((prop_category
== prop_value
) == prop_fail_result
)
3590 MRRETURN(MATCH_NOMATCH
);
3595 for (i
= 1; i
<= min
; i
++)
3597 if (eptr
>= md
->end_subject
)
3600 MRRETURN(MATCH_NOMATCH
);
3602 GETCHARINCTEST(c
, eptr
);
3603 prop_chartype
= UCD_CHARTYPE(c
);
3604 if ((prop_chartype
== prop_value
) == prop_fail_result
)
3605 MRRETURN(MATCH_NOMATCH
);
3610 for (i
= 1; i
<= min
; i
++)
3612 if (eptr
>= md
->end_subject
)
3615 MRRETURN(MATCH_NOMATCH
);
3617 GETCHARINCTEST(c
, eptr
);
3618 prop_script
= UCD_SCRIPT(c
);
3619 if ((prop_script
== prop_value
) == prop_fail_result
)
3620 MRRETURN(MATCH_NOMATCH
);
3625 for (i
= 1; i
<= min
; i
++)
3627 if (eptr
>= md
->end_subject
)
3630 MRRETURN(MATCH_NOMATCH
);
3632 GETCHARINCTEST(c
, eptr
);
3633 prop_category
= UCD_CATEGORY(c
);
3634 if ((prop_category
== ucp_L
|| prop_category
== ucp_N
)
3635 == prop_fail_result
)
3636 MRRETURN(MATCH_NOMATCH
);
3640 case PT_SPACE
: /* Perl space */
3641 for (i
= 1; i
<= min
; i
++)
3643 if (eptr
>= md
->end_subject
)
3646 MRRETURN(MATCH_NOMATCH
);
3648 GETCHARINCTEST(c
, eptr
);
3649 prop_category
= UCD_CATEGORY(c
);
3650 if ((prop_category
== ucp_Z
|| c
== CHAR_HT
|| c
== CHAR_NL
||
3651 c
== CHAR_FF
|| c
== CHAR_CR
)
3652 == prop_fail_result
)
3653 MRRETURN(MATCH_NOMATCH
);
3657 case PT_PXSPACE
: /* POSIX space */
3658 for (i
= 1; i
<= min
; i
++)
3660 if (eptr
>= md
->end_subject
)
3663 MRRETURN(MATCH_NOMATCH
);
3665 GETCHARINCTEST(c
, eptr
);
3666 prop_category
= UCD_CATEGORY(c
);
3667 if ((prop_category
== ucp_Z
|| c
== CHAR_HT
|| c
== CHAR_NL
||
3668 c
== CHAR_VT
|| c
== CHAR_FF
|| c
== CHAR_CR
)
3669 == prop_fail_result
)
3670 MRRETURN(MATCH_NOMATCH
);
3675 for (i
= 1; i
<= min
; i
++)
3677 if (eptr
>= md
->end_subject
)
3680 MRRETURN(MATCH_NOMATCH
);
3682 GETCHARINCTEST(c
, eptr
);
3683 prop_category
= UCD_CATEGORY(c
);
3684 if ((prop_category
== ucp_L
|| prop_category
== ucp_N
||
3685 c
== CHAR_UNDERSCORE
)
3686 == prop_fail_result
)
3687 MRRETURN(MATCH_NOMATCH
);
3691 /* This should not occur */
3694 RRETURN(PCRE_ERROR_INTERNAL
);
3698 /* Match extended Unicode sequences. We will get here only if the
3699 support is in the binary; otherwise a compile-time error occurs. */
3701 else if (ctype
== OP_EXTUNI
)
3703 for (i
= 1; i
<= min
; i
++)
3705 if (eptr
>= md
->end_subject
)
3708 MRRETURN(MATCH_NOMATCH
);
3710 GETCHARINCTEST(c
, eptr
);
3711 prop_category
= UCD_CATEGORY(c
);
3712 if (prop_category
== ucp_M
) MRRETURN(MATCH_NOMATCH
);
3713 while (eptr
< md
->end_subject
)
3716 if (!utf8
) c
= *eptr
;
3717 else { GETCHARLEN(c
, eptr
, len
); }
3718 prop_category
= UCD_CATEGORY(c
);
3719 if (prop_category
!= ucp_M
) break;
3726 #endif /* SUPPORT_UCP */
3728 /* Handle all other cases when the coding is UTF-8 */
3731 if (utf8
) switch(ctype
)
3734 for (i
= 1; i
<= min
; i
++)
3736 if (eptr
>= md
->end_subject
)
3739 MRRETURN(MATCH_NOMATCH
);
3741 if (IS_NEWLINE(eptr
)) MRRETURN(MATCH_NOMATCH
);
3743 while (eptr
< md
->end_subject
&& (*eptr
& 0xc0) == 0x80) eptr
++;
3748 for (i
= 1; i
<= min
; i
++)
3750 if (eptr
>= md
->end_subject
)
3753 MRRETURN(MATCH_NOMATCH
);
3756 while (eptr
< md
->end_subject
&& (*eptr
& 0xc0) == 0x80) eptr
++;
3761 if (eptr
> md
->end_subject
- min
) MRRETURN(MATCH_NOMATCH
);
3766 for (i
= 1; i
<= min
; i
++)
3768 if (eptr
>= md
->end_subject
)
3771 MRRETURN(MATCH_NOMATCH
);
3773 GETCHARINC(c
, eptr
);
3776 default: MRRETURN(MATCH_NOMATCH
);
3778 if (eptr
< md
->end_subject
&& *eptr
== 0x0a) eptr
++;
3789 if (md
->bsr_anycrlf
) MRRETURN(MATCH_NOMATCH
);
3796 for (i
= 1; i
<= min
; i
++)
3798 if (eptr
>= md
->end_subject
)
3801 MRRETURN(MATCH_NOMATCH
);
3803 GETCHARINC(c
, eptr
);
3808 case 0x20: /* SPACE */
3809 case 0xa0: /* NBSP */
3810 case 0x1680: /* OGHAM SPACE MARK */
3811 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3812 case 0x2000: /* EN QUAD */
3813 case 0x2001: /* EM QUAD */
3814 case 0x2002: /* EN SPACE */
3815 case 0x2003: /* EM SPACE */
3816 case 0x2004: /* THREE-PER-EM SPACE */
3817 case 0x2005: /* FOUR-PER-EM SPACE */
3818 case 0x2006: /* SIX-PER-EM SPACE */
3819 case 0x2007: /* FIGURE SPACE */
3820 case 0x2008: /* PUNCTUATION SPACE */
3821 case 0x2009: /* THIN SPACE */
3822 case 0x200A: /* HAIR SPACE */
3823 case 0x202f: /* NARROW NO-BREAK SPACE */
3824 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3825 case 0x3000: /* IDEOGRAPHIC SPACE */
3826 MRRETURN(MATCH_NOMATCH
);
3832 for (i
= 1; i
<= min
; i
++)
3834 if (eptr
>= md
->end_subject
)
3837 MRRETURN(MATCH_NOMATCH
);
3839 GETCHARINC(c
, eptr
);
3842 default: MRRETURN(MATCH_NOMATCH
);
3844 case 0x20: /* SPACE */
3845 case 0xa0: /* NBSP */
3846 case 0x1680: /* OGHAM SPACE MARK */
3847 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3848 case 0x2000: /* EN QUAD */
3849 case 0x2001: /* EM QUAD */
3850 case 0x2002: /* EN SPACE */
3851 case 0x2003: /* EM SPACE */
3852 case 0x2004: /* THREE-PER-EM SPACE */
3853 case 0x2005: /* FOUR-PER-EM SPACE */
3854 case 0x2006: /* SIX-PER-EM SPACE */
3855 case 0x2007: /* FIGURE SPACE */
3856 case 0x2008: /* PUNCTUATION SPACE */
3857 case 0x2009: /* THIN SPACE */
3858 case 0x200A: /* HAIR SPACE */
3859 case 0x202f: /* NARROW NO-BREAK SPACE */
3860 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3861 case 0x3000: /* IDEOGRAPHIC SPACE */
3868 for (i
= 1; i
<= min
; i
++)
3870 if (eptr
>= md
->end_subject
)
3873 MRRETURN(MATCH_NOMATCH
);
3875 GETCHARINC(c
, eptr
);
3883 case 0x85: /* NEL */
3884 case 0x2028: /* LINE SEPARATOR */
3885 case 0x2029: /* PARAGRAPH SEPARATOR */
3886 MRRETURN(MATCH_NOMATCH
);
3892 for (i
= 1; i
<= min
; i
++)
3894 if (eptr
>= md
->end_subject
)
3897 MRRETURN(MATCH_NOMATCH
);
3899 GETCHARINC(c
, eptr
);
3902 default: MRRETURN(MATCH_NOMATCH
);
3907 case 0x85: /* NEL */
3908 case 0x2028: /* LINE SEPARATOR */
3909 case 0x2029: /* PARAGRAPH SEPARATOR */
3916 for (i
= 1; i
<= min
; i
++)
3918 if (eptr
>= md
->end_subject
)
3921 MRRETURN(MATCH_NOMATCH
);
3923 GETCHARINC(c
, eptr
);
3924 if (c
< 128 && (md
->ctypes
[c
] & ctype_digit
) != 0)
3925 MRRETURN(MATCH_NOMATCH
);
3930 for (i
= 1; i
<= min
; i
++)
3932 if (eptr
>= md
->end_subject
)
3935 MRRETURN(MATCH_NOMATCH
);
3937 if (*eptr
>= 128 || (md
->ctypes
[*eptr
++] & ctype_digit
) == 0)
3938 MRRETURN(MATCH_NOMATCH
);
3939 /* No need to skip more bytes - we know it's a 1-byte character */
3943 case OP_NOT_WHITESPACE
:
3944 for (i
= 1; i
<= min
; i
++)
3946 if (eptr
>= md
->end_subject
)
3949 MRRETURN(MATCH_NOMATCH
);
3951 if (*eptr
< 128 && (md
->ctypes
[*eptr
] & ctype_space
) != 0)
3952 MRRETURN(MATCH_NOMATCH
);
3953 while (++eptr
< md
->end_subject
&& (*eptr
& 0xc0) == 0x80);
3958 for (i
= 1; i
<= min
; i
++)
3960 if (eptr
>= md
->end_subject
)
3963 MRRETURN(MATCH_NOMATCH
);
3965 if (*eptr
>= 128 || (md
->ctypes
[*eptr
++] & ctype_space
) == 0)
3966 MRRETURN(MATCH_NOMATCH
);
3967 /* No need to skip more bytes - we know it's a 1-byte character */
3971 case OP_NOT_WORDCHAR
:
3972 for (i
= 1; i
<= min
; i
++)
3974 if (eptr
>= md
->end_subject
)
3977 MRRETURN(MATCH_NOMATCH
);
3979 if (*eptr
< 128 && (md
->ctypes
[*eptr
] & ctype_word
) != 0)
3980 MRRETURN(MATCH_NOMATCH
);
3981 while (++eptr
< md
->end_subject
&& (*eptr
& 0xc0) == 0x80);
3986 for (i
= 1; i
<= min
; i
++)
3988 if (eptr
>= md
->end_subject
)
3991 MRRETURN(MATCH_NOMATCH
);
3993 if (*eptr
>= 128 || (md
->ctypes
[*eptr
++] & ctype_word
) == 0)
3994 MRRETURN(MATCH_NOMATCH
);
3995 /* No need to skip more bytes - we know it's a 1-byte character */
4000 RRETURN(PCRE_ERROR_INTERNAL
);
4001 } /* End switch(ctype) */
4004 #endif /* SUPPORT_UTF8 */
4006 /* Code for the non-UTF-8 case for minimum matching of operators other
4007 than OP_PROP and OP_NOTPROP. */
4012 for (i
= 1; i
<= min
; i
++)
4014 if (eptr
>= md
->end_subject
)
4017 MRRETURN(MATCH_NOMATCH
);
4019 if (IS_NEWLINE(eptr
)) MRRETURN(MATCH_NOMATCH
);
4025 if (eptr
> md
->end_subject
- min
)
4028 MRRETURN(MATCH_NOMATCH
);
4034 if (eptr
> md
->end_subject
- min
)
4037 MRRETURN(MATCH_NOMATCH
);
4043 for (i
= 1; i
<= min
; i
++)
4045 if (eptr
>= md
->end_subject
)
4048 MRRETURN(MATCH_NOMATCH
);
4052 default: MRRETURN(MATCH_NOMATCH
);
4054 if (eptr
< md
->end_subject
&& *eptr
== 0x0a) eptr
++;
4062 if (md
->bsr_anycrlf
) MRRETURN(MATCH_NOMATCH
);
4069 for (i
= 1; i
<= min
; i
++)
4071 if (eptr
>= md
->end_subject
)
4074 MRRETURN(MATCH_NOMATCH
);
4080 case 0x20: /* SPACE */
4081 case 0xa0: /* NBSP */
4082 MRRETURN(MATCH_NOMATCH
);
4088 for (i
= 1; i
<= min
; i
++)
4090 if (eptr
>= md
->end_subject
)
4093 MRRETURN(MATCH_NOMATCH
);
4097 default: MRRETURN(MATCH_NOMATCH
);
4099 case 0x20: /* SPACE */
4100 case 0xa0: /* NBSP */
4107 for (i
= 1; i
<= min
; i
++)
4109 if (eptr
>= md
->end_subject
)
4112 MRRETURN(MATCH_NOMATCH
);
4121 case 0x85: /* NEL */
4122 MRRETURN(MATCH_NOMATCH
);
4128 for (i
= 1; i
<= min
; i
++)
4130 if (eptr
>= md
->end_subject
)
4133 MRRETURN(MATCH_NOMATCH
);
4137 default: MRRETURN(MATCH_NOMATCH
);
4142 case 0x85: /* NEL */
4149 for (i
= 1; i
<= min
; i
++)
4151 if (eptr
>= md
->end_subject
)
4154 MRRETURN(MATCH_NOMATCH
);
4156 if ((md
->ctypes
[*eptr
++] & ctype_digit
) != 0) MRRETURN(MATCH_NOMATCH
);
4161 for (i
= 1; i
<= min
; i
++)
4163 if (eptr
>= md
->end_subject
)
4166 MRRETURN(MATCH_NOMATCH
);
4168 if ((md
->ctypes
[*eptr
++] & ctype_digit
) == 0) MRRETURN(MATCH_NOMATCH
);
4172 case OP_NOT_WHITESPACE
:
4173 for (i
= 1; i
<= min
; i
++)
4175 if (eptr
>= md
->end_subject
)
4178 MRRETURN(MATCH_NOMATCH
);
4180 if ((md
->ctypes
[*eptr
++] & ctype_space
) != 0) MRRETURN(MATCH_NOMATCH
);
4185 for (i
= 1; i
<= min
; i
++)
4187 if (eptr
>= md
->end_subject
)
4190 MRRETURN(MATCH_NOMATCH
);
4192 if ((md
->ctypes
[*eptr
++] & ctype_space
) == 0) MRRETURN(MATCH_NOMATCH
);
4196 case OP_NOT_WORDCHAR
:
4197 for (i
= 1; i
<= min
; i
++)
4199 if (eptr
>= md
->end_subject
)
4202 MRRETURN(MATCH_NOMATCH
);
4204 if ((md
->ctypes
[*eptr
++] & ctype_word
) != 0)
4205 MRRETURN(MATCH_NOMATCH
);
4210 for (i
= 1; i
<= min
; i
++)
4212 if (eptr
>= md
->end_subject
)
4215 MRRETURN(MATCH_NOMATCH
);
4217 if ((md
->ctypes
[*eptr
++] & ctype_word
) == 0)
4218 MRRETURN(MATCH_NOMATCH
);
4223 RRETURN(PCRE_ERROR_INTERNAL
);
4227 /* If min = max, continue at the same level without recursing */
4229 if (min
== max
) continue;
4231 /* If minimizing, we have to test the rest of the pattern before each
4232 subsequent match. Again, separate the UTF-8 case for speed, and also
4233 separate the UCP cases. */
4243 for (fi
= min
;; fi
++)
4245 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM36
);
4246 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
4247 if (fi
>= max
) MRRETURN(MATCH_NOMATCH
);
4248 if (eptr
>= md
->end_subject
)
4251 MRRETURN(MATCH_NOMATCH
);
4253 GETCHARINCTEST(c
, eptr
);
4254 if (prop_fail_result
) MRRETURN(MATCH_NOMATCH
);
4256 /* Control never gets here */
4259 for (fi
= min
;; fi
++)
4261 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM37
);
4262 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
4263 if (fi
>= max
) MRRETURN(MATCH_NOMATCH
);
4264 if (eptr
>= md
->end_subject
)
4267 MRRETURN(MATCH_NOMATCH
);
4269 GETCHARINCTEST(c
, eptr
);
4270 prop_chartype
= UCD_CHARTYPE(c
);
4271 if ((prop_chartype
== ucp_Lu
||
4272 prop_chartype
== ucp_Ll
||
4273 prop_chartype
== ucp_Lt
) == prop_fail_result
)
4274 MRRETURN(MATCH_NOMATCH
);
4276 /* Control never gets here */
4279 for (fi
= min
;; fi
++)
4281 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM38
);
4282 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
4283 if (fi
>= max
) MRRETURN(MATCH_NOMATCH
);
4284 if (eptr
>= md
->end_subject
)
4287 MRRETURN(MATCH_NOMATCH
);
4289 GETCHARINCTEST(c
, eptr
);
4290 prop_category
= UCD_CATEGORY(c
);
4291 if ((prop_category
== prop_value
) == prop_fail_result
)
4292 MRRETURN(MATCH_NOMATCH
);
4294 /* Control never gets here */
4297 for (fi
= min
;; fi
++)
4299 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM39
);
4300 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
4301 if (fi
>= max
) MRRETURN(MATCH_NOMATCH
);
4302 if (eptr
>= md
->end_subject
)
4305 MRRETURN(MATCH_NOMATCH
);
4307 GETCHARINCTEST(c
, eptr
);
4308 prop_chartype
= UCD_CHARTYPE(c
);
4309 if ((prop_chartype
== prop_value
) == prop_fail_result
)
4310 MRRETURN(MATCH_NOMATCH
);
4312 /* Control never gets here */
4315 for (fi
= min
;; fi
++)
4317 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM40
);
4318 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
4319 if (fi
>= max
) MRRETURN(MATCH_NOMATCH
);
4320 if (eptr
>= md
->end_subject
)
4323 MRRETURN(MATCH_NOMATCH
);
4325 GETCHARINCTEST(c
, eptr
);
4326 prop_script
= UCD_SCRIPT(c
);
4327 if ((prop_script
== prop_value
) == prop_fail_result
)
4328 MRRETURN(MATCH_NOMATCH
);
4330 /* Control never gets here */
4333 for (fi
= min
;; fi
++)
4335 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM59
);
4336 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
4337 if (fi
>= max
) MRRETURN(MATCH_NOMATCH
);
4338 if (eptr
>= md
->end_subject
)
4341 MRRETURN(MATCH_NOMATCH
);
4343 GETCHARINCTEST(c
, eptr
);
4344 prop_category
= UCD_CATEGORY(c
);
4345 if ((prop_category
== ucp_L
|| prop_category
== ucp_N
)
4346 == prop_fail_result
)
4347 MRRETURN(MATCH_NOMATCH
);
4349 /* Control never gets here */
4351 case PT_SPACE
: /* Perl space */
4352 for (fi
= min
;; fi
++)
4354 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM60
);
4355 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
4356 if (fi
>= max
) MRRETURN(MATCH_NOMATCH
);
4357 if (eptr
>= md
->end_subject
)
4360 MRRETURN(MATCH_NOMATCH
);
4362 GETCHARINCTEST(c
, eptr
);
4363 prop_category
= UCD_CATEGORY(c
);
4364 if ((prop_category
== ucp_Z
|| c
== CHAR_HT
|| c
== CHAR_NL
||
4365 c
== CHAR_FF
|| c
== CHAR_CR
)
4366 == prop_fail_result
)
4367 MRRETURN(MATCH_NOMATCH
);
4369 /* Control never gets here */
4371 case PT_PXSPACE
: /* POSIX space */
4372 for (fi
= min
;; fi
++)
4374 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM61
);
4375 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
4376 if (fi
>= max
) MRRETURN(MATCH_NOMATCH
);
4377 if (eptr
>= md
->end_subject
)
4380 MRRETURN(MATCH_NOMATCH
);
4382 GETCHARINCTEST(c
, eptr
);
4383 prop_category
= UCD_CATEGORY(c
);
4384 if ((prop_category
== ucp_Z
|| c
== CHAR_HT
|| c
== CHAR_NL
||
4385 c
== CHAR_VT
|| c
== CHAR_FF
|| c
== CHAR_CR
)
4386 == prop_fail_result
)
4387 MRRETURN(MATCH_NOMATCH
);
4389 /* Control never gets here */
4392 for (fi
= min
;; fi
++)
4394 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM62
);
4395 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
4396 if (fi
>= max
) MRRETURN(MATCH_NOMATCH
);
4397 if (eptr
>= md
->end_subject
)
4400 MRRETURN(MATCH_NOMATCH
);
4402 GETCHARINCTEST(c
, eptr
);
4403 prop_category
= UCD_CATEGORY(c
);
4404 if ((prop_category
== ucp_L
||
4405 prop_category
== ucp_N
||
4406 c
== CHAR_UNDERSCORE
)
4407 == prop_fail_result
)
4408 MRRETURN(MATCH_NOMATCH
);
4410 /* Control never gets here */
4412 /* This should never occur */
4415 RRETURN(PCRE_ERROR_INTERNAL
);
4419 /* Match extended Unicode sequences. We will get here only if the
4420 support is in the binary; otherwise a compile-time error occurs. */
4422 else if (ctype
== OP_EXTUNI
)
4424 for (fi
= min
;; fi
++)
4426 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM41
);
4427 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
4428 if (fi
>= max
) MRRETURN(MATCH_NOMATCH
);
4429 if (eptr
>= md
->end_subject
)
4432 MRRETURN(MATCH_NOMATCH
);
4434 GETCHARINCTEST(c
, eptr
);
4435 prop_category
= UCD_CATEGORY(c
);
4436 if (prop_category
== ucp_M
) MRRETURN(MATCH_NOMATCH
);
4437 while (eptr
< md
->end_subject
)
4440 if (!utf8
) c
= *eptr
;
4441 else { GETCHARLEN(c
, eptr
, len
); }
4442 prop_category
= UCD_CATEGORY(c
);
4443 if (prop_category
!= ucp_M
) break;
4450 #endif /* SUPPORT_UCP */
4456 for (fi
= min
;; fi
++)
4458 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM42
);
4459 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
4460 if (fi
>= max
) MRRETURN(MATCH_NOMATCH
);
4461 if (eptr
>= md
->end_subject
)
4464 MRRETURN(MATCH_NOMATCH
);
4466 if (ctype
== OP_ANY
&& IS_NEWLINE(eptr
))
4467 MRRETURN(MATCH_NOMATCH
);
4468 GETCHARINC(c
, eptr
);
4471 case OP_ANY
: /* This is the non-NL case */
4479 default: MRRETURN(MATCH_NOMATCH
);
4481 if (eptr
< md
->end_subject
&& *eptr
== 0x0a) eptr
++;
4491 if (md
->bsr_anycrlf
) MRRETURN(MATCH_NOMATCH
);
4501 case 0x20: /* SPACE */
4502 case 0xa0: /* NBSP */
4503 case 0x1680: /* OGHAM SPACE MARK */
4504 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4505 case 0x2000: /* EN QUAD */
4506 case 0x2001: /* EM QUAD */
4507 case 0x2002: /* EN SPACE */
4508 case 0x2003: /* EM SPACE */
4509 case 0x2004: /* THREE-PER-EM SPACE */
4510 case 0x2005: /* FOUR-PER-EM SPACE */
4511 case 0x2006: /* SIX-PER-EM SPACE */
4512 case 0x2007: /* FIGURE SPACE */
4513 case 0x2008: /* PUNCTUATION SPACE */
4514 case 0x2009: /* THIN SPACE */
4515 case 0x200A: /* HAIR SPACE */
4516 case 0x202f: /* NARROW NO-BREAK SPACE */
4517 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4518 case 0x3000: /* IDEOGRAPHIC SPACE */
4519 MRRETURN(MATCH_NOMATCH
);
4526 default: MRRETURN(MATCH_NOMATCH
);
4528 case 0x20: /* SPACE */
4529 case 0xa0: /* NBSP */
4530 case 0x1680: /* OGHAM SPACE MARK */
4531 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4532 case 0x2000: /* EN QUAD */
4533 case 0x2001: /* EM QUAD */
4534 case 0x2002: /* EN SPACE */
4535 case 0x2003: /* EM SPACE */
4536 case 0x2004: /* THREE-PER-EM SPACE */
4537 case 0x2005: /* FOUR-PER-EM SPACE */
4538 case 0x2006: /* SIX-PER-EM SPACE */
4539 case 0x2007: /* FIGURE SPACE */
4540 case 0x2008: /* PUNCTUATION SPACE */
4541 case 0x2009: /* THIN SPACE */
4542 case 0x200A: /* HAIR SPACE */
4543 case 0x202f: /* NARROW NO-BREAK SPACE */
4544 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4545 case 0x3000: /* IDEOGRAPHIC SPACE */
4558 case 0x85: /* NEL */
4559 case 0x2028: /* LINE SEPARATOR */
4560 case 0x2029: /* PARAGRAPH SEPARATOR */
4561 MRRETURN(MATCH_NOMATCH
);
4568 default: MRRETURN(MATCH_NOMATCH
);
4573 case 0x85: /* NEL */
4574 case 0x2028: /* LINE SEPARATOR */
4575 case 0x2029: /* PARAGRAPH SEPARATOR */
4581 if (c
< 256 && (md
->ctypes
[c
] & ctype_digit
) != 0)
4582 MRRETURN(MATCH_NOMATCH
);
4586 if (c
>= 256 || (md
->ctypes
[c
] & ctype_digit
) == 0)
4587 MRRETURN(MATCH_NOMATCH
);
4590 case OP_NOT_WHITESPACE
:
4591 if (c
< 256 && (md
->ctypes
[c
] & ctype_space
) != 0)
4592 MRRETURN(MATCH_NOMATCH
);
4596 if (c
>= 256 || (md
->ctypes
[c
] & ctype_space
) == 0)
4597 MRRETURN(MATCH_NOMATCH
);
4600 case OP_NOT_WORDCHAR
:
4601 if (c
< 256 && (md
->ctypes
[c
] & ctype_word
) != 0)
4602 MRRETURN(MATCH_NOMATCH
);
4606 if (c
>= 256 || (md
->ctypes
[c
] & ctype_word
) == 0)
4607 MRRETURN(MATCH_NOMATCH
);
4611 RRETURN(PCRE_ERROR_INTERNAL
);
4617 /* Not UTF-8 mode */
4619 for (fi
= min
;; fi
++)
4621 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM43
);
4622 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
4623 if (fi
>= max
) MRRETURN(MATCH_NOMATCH
);
4624 if (eptr
>= md
->end_subject
)
4627 MRRETURN(MATCH_NOMATCH
);
4629 if (ctype
== OP_ANY
&& IS_NEWLINE(eptr
))
4630 MRRETURN(MATCH_NOMATCH
);
4634 case OP_ANY
: /* This is the non-NL case */
4642 default: MRRETURN(MATCH_NOMATCH
);
4644 if (eptr
< md
->end_subject
&& *eptr
== 0x0a) eptr
++;
4653 if (md
->bsr_anycrlf
) MRRETURN(MATCH_NOMATCH
);
4663 case 0x20: /* SPACE */
4664 case 0xa0: /* NBSP */
4665 MRRETURN(MATCH_NOMATCH
);
4672 default: MRRETURN(MATCH_NOMATCH
);
4674 case 0x20: /* SPACE */
4675 case 0xa0: /* NBSP */
4688 case 0x85: /* NEL */
4689 MRRETURN(MATCH_NOMATCH
);
4696 default: MRRETURN(MATCH_NOMATCH
);
4701 case 0x85: /* NEL */
4707 if ((md
->ctypes
[c
] & ctype_digit
) != 0) MRRETURN(MATCH_NOMATCH
);
4711 if ((md
->ctypes
[c
] & ctype_digit
) == 0) MRRETURN(MATCH_NOMATCH
);
4714 case OP_NOT_WHITESPACE
:
4715 if ((md
->ctypes
[c
] & ctype_space
) != 0) MRRETURN(MATCH_NOMATCH
);
4719 if ((md
->ctypes
[c
] & ctype_space
) == 0) MRRETURN(MATCH_NOMATCH
);
4722 case OP_NOT_WORDCHAR
:
4723 if ((md
->ctypes
[c
] & ctype_word
) != 0) MRRETURN(MATCH_NOMATCH
);
4727 if ((md
->ctypes
[c
] & ctype_word
) == 0) MRRETURN(MATCH_NOMATCH
);
4731 RRETURN(PCRE_ERROR_INTERNAL
);
4735 /* Control never gets here */
4738 /* If maximizing, it is worth using inline code for speed, doing the type
4739 test once at the start (i.e. keep it out of the loop). Again, keep the
4740 UTF-8 and UCP stuff separate. */
4744 pp
= eptr
; /* Remember where we started */
4752 for (i
= min
; i
< max
; i
++)
4755 if (eptr
>= md
->end_subject
)
4760 GETCHARLENTEST(c
, eptr
, len
);
4761 if (prop_fail_result
) break;
4767 for (i
= min
; i
< max
; i
++)
4770 if (eptr
>= md
->end_subject
)
4775 GETCHARLENTEST(c
, eptr
, len
);
4776 prop_chartype
= UCD_CHARTYPE(c
);
4777 if ((prop_chartype
== ucp_Lu
||
4778 prop_chartype
== ucp_Ll
||
4779 prop_chartype
== ucp_Lt
) == prop_fail_result
)
4786 for (i
= min
; i
< max
; i
++)
4789 if (eptr
>= md
->end_subject
)
4794 GETCHARLENTEST(c
, eptr
, len
);
4795 prop_category
= UCD_CATEGORY(c
);
4796 if ((prop_category
== prop_value
) == prop_fail_result
)
4803 for (i
= min
; i
< max
; i
++)
4806 if (eptr
>= md
->end_subject
)
4811 GETCHARLENTEST(c
, eptr
, len
);
4812 prop_chartype
= UCD_CHARTYPE(c
);
4813 if ((prop_chartype
== prop_value
) == prop_fail_result
)
4820 for (i
= min
; i
< max
; i
++)
4823 if (eptr
>= md
->end_subject
)
4828 GETCHARLENTEST(c
, eptr
, len
);
4829 prop_script
= UCD_SCRIPT(c
);
4830 if ((prop_script
== prop_value
) == prop_fail_result
)
4837 for (i
= min
; i
< max
; i
++)
4840 if (eptr
>= md
->end_subject
)
4845 GETCHARLENTEST(c
, eptr
, len
);
4846 prop_category
= UCD_CATEGORY(c
);
4847 if ((prop_category
== ucp_L
|| prop_category
== ucp_N
)
4848 == prop_fail_result
)
4854 case PT_SPACE
: /* Perl space */
4855 for (i
= min
; i
< max
; i
++)
4858 if (eptr
>= md
->end_subject
)
4863 GETCHARLENTEST(c
, eptr
, len
);
4864 prop_category
= UCD_CATEGORY(c
);
4865 if ((prop_category
== ucp_Z
|| c
== CHAR_HT
|| c
== CHAR_NL
||
4866 c
== CHAR_FF
|| c
== CHAR_CR
)
4867 == prop_fail_result
)
4873 case PT_PXSPACE
: /* POSIX space */
4874 for (i
= min
; i
< max
; i
++)
4877 if (eptr
>= md
->end_subject
)
4882 GETCHARLENTEST(c
, eptr
, len
);
4883 prop_category
= UCD_CATEGORY(c
);
4884 if ((prop_category
== ucp_Z
|| c
== CHAR_HT
|| c
== CHAR_NL
||
4885 c
== CHAR_VT
|| c
== CHAR_FF
|| c
== CHAR_CR
)
4886 == prop_fail_result
)
4893 for (i
= min
; i
< max
; i
++)
4896 if (eptr
>= md
->end_subject
)
4901 GETCHARLENTEST(c
, eptr
, len
);
4902 prop_category
= UCD_CATEGORY(c
);
4903 if ((prop_category
== ucp_L
|| prop_category
== ucp_N
||
4904 c
== CHAR_UNDERSCORE
) == prop_fail_result
)
4911 RRETURN(PCRE_ERROR_INTERNAL
);
4914 /* eptr is now past the end of the maximum run */
4916 if (possessive
) continue;
4919 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM44
);
4920 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
4921 if (eptr
-- == pp
) break; /* Stop if tried at original pos */
4922 if (utf8
) BACKCHAR(eptr
);
4926 /* Match extended Unicode sequences. We will get here only if the
4927 support is in the binary; otherwise a compile-time error occurs. */
4929 else if (ctype
== OP_EXTUNI
)
4931 for (i
= min
; i
< max
; i
++)
4933 if (eptr
>= md
->end_subject
)
4938 GETCHARINCTEST(c
, eptr
);
4939 prop_category
= UCD_CATEGORY(c
);
4940 if (prop_category
== ucp_M
) break;
4941 while (eptr
< md
->end_subject
)
4944 if (!utf8
) c
= *eptr
; else
4946 GETCHARLEN(c
, eptr
, len
);
4948 prop_category
= UCD_CATEGORY(c
);
4949 if (prop_category
!= ucp_M
) break;
4954 /* eptr is now past the end of the maximum run */
4956 if (possessive
) continue;
4960 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM45
);
4961 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
4962 if (eptr
-- == pp
) break; /* Stop if tried at original pos */
4963 for (;;) /* Move back over one extended */
4966 if (!utf8
) c
= *eptr
; else
4969 GETCHARLEN(c
, eptr
, len
);
4971 prop_category
= UCD_CATEGORY(c
);
4972 if (prop_category
!= ucp_M
) break;
4979 #endif /* SUPPORT_UCP */
4991 for (i
= min
; i
< max
; i
++)
4993 if (eptr
>= md
->end_subject
)
4998 if (IS_NEWLINE(eptr
)) break;
5000 while (eptr
< md
->end_subject
&& (*eptr
& 0xc0) == 0x80) eptr
++;
5004 /* Handle unlimited UTF-8 repeat */
5008 for (i
= min
; i
< max
; i
++)
5010 if (eptr
>= md
->end_subject
)
5015 if (IS_NEWLINE(eptr
)) break;
5017 while (eptr
< md
->end_subject
&& (*eptr
& 0xc0) == 0x80) eptr
++;
5025 for (i
= min
; i
< max
; i
++)
5027 if (eptr
>= md
->end_subject
)
5033 while (eptr
< md
->end_subject
&& (*eptr
& 0xc0) == 0x80) eptr
++;
5036 else eptr
= md
->end_subject
; /* Unlimited UTF-8 repeat */
5039 /* The byte case is the same as non-UTF8 */
5043 if (c
> (unsigned int)(md
->end_subject
- eptr
))
5045 eptr
= md
->end_subject
;
5052 for (i
= min
; i
< max
; i
++)
5055 if (eptr
>= md
->end_subject
)
5060 GETCHARLEN(c
, eptr
, len
);
5063 if (++eptr
>= md
->end_subject
) break;
5064 if (*eptr
== 0x000a) eptr
++;
5070 (c
!= 0x000b && c
!= 0x000c &&
5071 c
!= 0x0085 && c
!= 0x2028 && c
!= 0x2029)))
5080 for (i
= min
; i
< max
; i
++)
5084 if (eptr
>= md
->end_subject
)
5089 GETCHARLEN(c
, eptr
, len
);
5092 default: gotspace
= FALSE
; break;
5094 case 0x20: /* SPACE */
5095 case 0xa0: /* NBSP */
5096 case 0x1680: /* OGHAM SPACE MARK */
5097 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
5098 case 0x2000: /* EN QUAD */
5099 case 0x2001: /* EM QUAD */
5100 case 0x2002: /* EN SPACE */
5101 case 0x2003: /* EM SPACE */
5102 case 0x2004: /* THREE-PER-EM SPACE */
5103 case 0x2005: /* FOUR-PER-EM SPACE */
5104 case 0x2006: /* SIX-PER-EM SPACE */
5105 case 0x2007: /* FIGURE SPACE */
5106 case 0x2008: /* PUNCTUATION SPACE */
5107 case 0x2009: /* THIN SPACE */
5108 case 0x200A: /* HAIR SPACE */
5109 case 0x202f: /* NARROW NO-BREAK SPACE */
5110 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
5111 case 0x3000: /* IDEOGRAPHIC SPACE */
5115 if (gotspace
== (ctype
== OP_NOT_HSPACE
)) break;
5122 for (i
= min
; i
< max
; i
++)
5126 if (eptr
>= md
->end_subject
)
5131 GETCHARLEN(c
, eptr
, len
);
5134 default: gotspace
= FALSE
; break;
5139 case 0x85: /* NEL */
5140 case 0x2028: /* LINE SEPARATOR */
5141 case 0x2029: /* PARAGRAPH SEPARATOR */
5145 if (gotspace
== (ctype
== OP_NOT_VSPACE
)) break;
5151 for (i
= min
; i
< max
; i
++)
5154 if (eptr
>= md
->end_subject
)
5159 GETCHARLEN(c
, eptr
, len
);
5160 if (c
< 256 && (md
->ctypes
[c
] & ctype_digit
) != 0) break;
5166 for (i
= min
; i
< max
; i
++)
5169 if (eptr
>= md
->end_subject
)
5174 GETCHARLEN(c
, eptr
, len
);
5175 if (c
>= 256 ||(md
->ctypes
[c
] & ctype_digit
) == 0) break;
5180 case OP_NOT_WHITESPACE
:
5181 for (i
= min
; i
< max
; i
++)
5184 if (eptr
>= md
->end_subject
)
5189 GETCHARLEN(c
, eptr
, len
);
5190 if (c
< 256 && (md
->ctypes
[c
] & ctype_space
) != 0) break;
5196 for (i
= min
; i
< max
; i
++)
5199 if (eptr
>= md
->end_subject
)
5204 GETCHARLEN(c
, eptr
, len
);
5205 if (c
>= 256 ||(md
->ctypes
[c
] & ctype_space
) == 0) break;
5210 case OP_NOT_WORDCHAR
:
5211 for (i
= min
; i
< max
; i
++)
5214 if (eptr
>= md
->end_subject
)
5219 GETCHARLEN(c
, eptr
, len
);
5220 if (c
< 256 && (md
->ctypes
[c
] & ctype_word
) != 0) break;
5226 for (i
= min
; i
< max
; i
++)
5229 if (eptr
>= md
->end_subject
)
5234 GETCHARLEN(c
, eptr
, len
);
5235 if (c
>= 256 || (md
->ctypes
[c
] & ctype_word
) == 0) break;
5241 RRETURN(PCRE_ERROR_INTERNAL
);
5244 /* eptr is now past the end of the maximum run */
5246 if (possessive
) continue;
5249 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM46
);
5250 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
5251 if (eptr
-- == pp
) break; /* Stop if tried at original pos */
5256 #endif /* SUPPORT_UTF8 */
5258 /* Not UTF-8 mode */
5263 for (i
= min
; i
< max
; i
++)
5265 if (eptr
>= md
->end_subject
)
5270 if (IS_NEWLINE(eptr
)) break;
5278 if (c
> (unsigned int)(md
->end_subject
- eptr
))
5280 eptr
= md
->end_subject
;
5287 for (i
= min
; i
< max
; i
++)
5289 if (eptr
>= md
->end_subject
)
5297 if (++eptr
>= md
->end_subject
) break;
5298 if (*eptr
== 0x000a) eptr
++;
5304 (c
!= 0x000b && c
!= 0x000c && c
!= 0x0085)))
5312 for (i
= min
; i
< max
; i
++)
5314 if (eptr
>= md
->end_subject
)
5320 if (c
== 0x09 || c
== 0x20 || c
== 0xa0) break;
5326 for (i
= min
; i
< max
; i
++)
5328 if (eptr
>= md
->end_subject
)
5334 if (c
!= 0x09 && c
!= 0x20 && c
!= 0xa0) break;
5340 for (i
= min
; i
< max
; i
++)
5342 if (eptr
>= md
->end_subject
)
5348 if (c
== 0x0a || c
== 0x0b || c
== 0x0c || c
== 0x0d || c
== 0x85)
5355 for (i
= min
; i
< max
; i
++)
5357 if (eptr
>= md
->end_subject
)
5363 if (c
!= 0x0a && c
!= 0x0b && c
!= 0x0c && c
!= 0x0d && c
!= 0x85)
5370 for (i
= min
; i
< max
; i
++)
5372 if (eptr
>= md
->end_subject
)
5377 if ((md
->ctypes
[*eptr
] & ctype_digit
) != 0) break;
5383 for (i
= min
; i
< max
; i
++)
5385 if (eptr
>= md
->end_subject
)
5390 if ((md
->ctypes
[*eptr
] & ctype_digit
) == 0) break;
5395 case OP_NOT_WHITESPACE
:
5396 for (i
= min
; i
< max
; i
++)
5398 if (eptr
>= md
->end_subject
)
5403 if ((md
->ctypes
[*eptr
] & ctype_space
) != 0) break;
5409 for (i
= min
; i
< max
; i
++)
5411 if (eptr
>= md
->end_subject
)
5416 if ((md
->ctypes
[*eptr
] & ctype_space
) == 0) break;
5421 case OP_NOT_WORDCHAR
:
5422 for (i
= min
; i
< max
; i
++)
5424 if (eptr
>= md
->end_subject
)
5429 if ((md
->ctypes
[*eptr
] & ctype_word
) != 0) break;
5435 for (i
= min
; i
< max
; i
++)
5437 if (eptr
>= md
->end_subject
)
5442 if ((md
->ctypes
[*eptr
] & ctype_word
) == 0) break;
5448 RRETURN(PCRE_ERROR_INTERNAL
);
5451 /* eptr is now past the end of the maximum run */
5453 if (possessive
) continue;
5456 RMATCH(eptr
, ecode
, offset_top
, md
, ims
, eptrb
, 0, RM47
);
5458 if (rrc
!= MATCH_NOMATCH
) RRETURN(rrc
);
5462 /* Get here if we can't make it match with any permitted repetitions */
5464 MRRETURN(MATCH_NOMATCH
);
5466 /* Control never gets here */
5468 /* There's been some horrible disaster. Arrival here can only mean there is
5469 something seriously wrong in the code above or the OP_xxx definitions. */
5472 DPRINTF(("Unknown opcode %d\n", *ecode
));
5473 RRETURN(PCRE_ERROR_UNKNOWN_OPCODE
);
5476 /* Do not stick any code in here without much thought; it is assumed
5477 that "continue" in the code above comes out to here to repeat the main
5480 } /* End of main loop */
5481 /* Control never reaches here */
5484 /* When compiling to use the heap rather than the stack for recursive calls to
5485 match(), the RRETURN() macro jumps here. The number that is saved in
5486 frame->Xwhere indicates which label we actually want to return to. */
5489 #define LBL(val) case val: goto L_RM##val;
5491 switch (frame
->Xwhere
)
5493 LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
5494 LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
5495 LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
5496 LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
5497 LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58)
5499 LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
5500 LBL(32) LBL(34) LBL(42) LBL(46)
5502 LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
5503 LBL(59) LBL(60) LBL(61) LBL(62)
5504 #endif /* SUPPORT_UCP */
5505 #endif /* SUPPORT_UTF8 */
5507 DPRINTF(("jump error in pcre match: label %d non-existent\n", frame
->Xwhere
));
5508 return PCRE_ERROR_INTERNAL
;
5511 #endif /* NO_RECURSE */
5515 /***************************************************************************
5516 ****************************************************************************
5517 RECURSION IN THE match() FUNCTION
5519 Undefine all the macros that were defined above to handle this. */
5538 #undef new_recursive
5553 #undef save_capture_last
5563 /* These two are defined as macros in both cases */
5568 /***************************************************************************
5569 ***************************************************************************/
5573 /*************************************************
5574 * Execute a Regular Expression *
5575 *************************************************/
5577 /* This function applies a compiled re to a subject string and picks out
5578 portions of the string if it matches. Two elements in the vector are set for
5579 each substring: the offsets to the start and end of the substring.
5582 argument_re points to the compiled expression
5583 extra_data points to extra data or is NULL
5584 subject points to the subject string
5585 length length of subject string (may contain binary zeros)
5586 start_offset where to start in the subject string
5588 offsets points to a vector of ints to be filled in with offsets
5589 offsetcount the number of elements in the vector
5591 Returns: > 0 => success; value is the number of elements filled in
5592 = 0 => success, but offsets is not big enough
5593 -1 => failed to match
5594 < -1 => some kind of unexpected problem
5597 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
5598 pcre_exec(const pcre
*argument_re
, const pcre_extra
*extra_data
,
5599 PCRE_SPTR subject
, int length
, int start_offset
, int options
, int *offsets
,
5602 int rc
, resetcount
, ocount
;
5603 int first_byte
= -1;
5607 unsigned long int ims
;
5608 BOOL using_temporary_offsets
= FALSE
;
5612 BOOL first_byte_caseless
= FALSE
;
5613 BOOL req_byte_caseless
= FALSE
;
5615 match_data match_block
;
5616 match_data
*md
= &match_block
;
5617 const uschar
*tables
;
5618 const uschar
*start_bits
= NULL
;
5619 USPTR start_match
= (USPTR
)subject
+ start_offset
;
5621 USPTR start_partial
= NULL
;
5622 USPTR req_byte_ptr
= start_match
- 1;
5624 pcre_study_data internal_study
;
5625 const pcre_study_data
*study
;
5627 real_pcre internal_re
;
5628 const real_pcre
*external_re
= (const real_pcre
*)argument_re
;
5629 const real_pcre
*re
= external_re
;
5631 /* Plausibility checks */
5632 if ((options
& ~PUBLIC_EXEC_OPTIONS
) != 0) return PCRE_ERROR_BADOPTION
;
5633 if (re
== NULL
|| subject
== NULL
||
5634 (offsets
== NULL
&& offsetcount
> 0)) return PCRE_ERROR_NULL
;
5635 if (offsetcount
< 0) return PCRE_ERROR_BADCOUNT
;
5636 if (start_offset
< 0 || start_offset
> length
) return PCRE_ERROR_BADOFFSET
;
5638 /* This information is for finding all the numbers associated with a given
5639 name, for condition testing. */
5641 md
->name_table
= (uschar
*)re
+ re
->name_table_offset
;
5642 md
->name_count
= re
->name_count
;
5643 md
->name_entry_size
= re
->name_entry_size
;
5645 /* Fish out the optional data from the extra_data structure, first setting
5646 the default values. */
5649 md
->match_limit
= MATCH_LIMIT
;
5650 md
->match_limit_recursion
= MATCH_LIMIT_RECURSION
;
5651 md
->callout_data
= NULL
;
5653 /* The table pointer is always in native byte order. */
5655 tables
= external_re
->tables
;
5657 if (extra_data
!= NULL
)
5659 register unsigned int flags
= extra_data
->flags
;
5660 if ((flags
& PCRE_EXTRA_STUDY_DATA
) != 0)
5661 study
= (const pcre_study_data
*)extra_data
->study_data
;
5662 if ((flags
& PCRE_EXTRA_MATCH_LIMIT
) != 0)
5663 md
->match_limit
= extra_data
->match_limit
;
5664 if ((flags
& PCRE_EXTRA_MATCH_LIMIT_RECURSION
) != 0)
5665 md
->match_limit_recursion
= extra_data
->match_limit_recursion
;
5666 if ((flags
& PCRE_EXTRA_CALLOUT_DATA
) != 0)
5667 md
->callout_data
= extra_data
->callout_data
;
5668 if ((flags
& PCRE_EXTRA_TABLES
) != 0) tables
= extra_data
->tables
;
5671 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
5672 is a feature that makes it possible to save compiled regex and re-use them
5673 in other programs later. */
5675 if (tables
== NULL
) tables
= _pcre_default_tables
;
5677 /* Check that the first field in the block is the magic number. If it is not,
5678 test for a regex that was compiled on a host of opposite endianness. If this is
5679 the case, flipped values are put in internal_re and internal_study if there was
5682 if (re
->magic_number
!= MAGIC_NUMBER
)
5684 re
= _pcre_try_flipped(re
, &internal_re
, study
, &internal_study
);
5685 if (re
== NULL
) return PCRE_ERROR_BADMAGIC
;
5686 if (study
!= NULL
) study
= &internal_study
;
5689 /* Set up other data */
5691 anchored
= ((re
->options
| options
) & PCRE_ANCHORED
) != 0;
5692 startline
= (re
->flags
& PCRE_STARTLINE
) != 0;
5693 firstline
= (re
->options
& PCRE_FIRSTLINE
) != 0;
5695 /* The code starts after the real_pcre block and the capture name table. */
5697 md
->start_code
= (const uschar
*)external_re
+ re
->name_table_offset
+
5698 re
->name_count
* re
->name_entry_size
;
5700 md
->start_subject
= (USPTR
)subject
;
5701 md
->start_offset
= start_offset
;
5702 md
->end_subject
= md
->start_subject
+ length
;
5703 end_subject
= md
->end_subject
;
5705 md
->endonly
= (re
->options
& PCRE_DOLLAR_ENDONLY
) != 0;
5706 utf8
= md
->utf8
= (re
->options
& PCRE_UTF8
) != 0;
5707 md
->use_ucp
= (re
->options
& PCRE_UCP
) != 0;
5708 md
->jscript_compat
= (re
->options
& PCRE_JAVASCRIPT_COMPAT
) != 0;
5710 md
->notbol
= (options
& PCRE_NOTBOL
) != 0;
5711 md
->noteol
= (options
& PCRE_NOTEOL
) != 0;
5712 md
->notempty
= (options
& PCRE_NOTEMPTY
) != 0;
5713 md
->notempty_atstart
= (options
& PCRE_NOTEMPTY_ATSTART
) != 0;
5714 md
->partial
= ((options
& PCRE_PARTIAL_HARD
) != 0)? 2 :
5715 ((options
& PCRE_PARTIAL_SOFT
) != 0)? 1 : 0;
5717 md
->mark
= NULL
; /* In case never set */
5719 md
->recursive
= NULL
; /* No recursion at top level */
5721 md
->lcc
= tables
+ lcc_offset
;
5722 md
->ctypes
= tables
+ ctypes_offset
;
5724 /* Handle different \R options. */
5726 switch (options
& (PCRE_BSR_ANYCRLF
|PCRE_BSR_UNICODE
))
5729 if ((re
->options
& (PCRE_BSR_ANYCRLF
|PCRE_BSR_UNICODE
)) != 0)
5730 md
->bsr_anycrlf
= (re
->options
& PCRE_BSR_ANYCRLF
) != 0;
5733 md
->bsr_anycrlf
= TRUE
;
5735 md
->bsr_anycrlf
= FALSE
;
5739 case PCRE_BSR_ANYCRLF
:
5740 md
->bsr_anycrlf
= TRUE
;
5743 case PCRE_BSR_UNICODE
:
5744 md
->bsr_anycrlf
= FALSE
;
5747 default: return PCRE_ERROR_BADNEWLINE
;
5750 /* Handle different types of newline. The three bits give eight cases. If
5751 nothing is set at run time, whatever was used at compile time applies. */
5753 switch ((((options
& PCRE_NEWLINE_BITS
) == 0)? re
->options
:
5754 (pcre_uint32
)options
) & PCRE_NEWLINE_BITS
)
5756 case 0: newline
= NEWLINE
; break; /* Compile-time default */
5757 case PCRE_NEWLINE_CR
: newline
= CHAR_CR
; break;
5758 case PCRE_NEWLINE_LF
: newline
= CHAR_NL
; break;
5759 case PCRE_NEWLINE_CR
+
5760 PCRE_NEWLINE_LF
: newline
= (CHAR_CR
<< 8) | CHAR_NL
; break;
5761 case PCRE_NEWLINE_ANY
: newline
= -1; break;
5762 case PCRE_NEWLINE_ANYCRLF
: newline
= -2; break;
5763 default: return PCRE_ERROR_BADNEWLINE
;
5768 md
->nltype
= NLTYPE_ANYCRLF
;
5770 else if (newline
< 0)
5772 md
->nltype
= NLTYPE_ANY
;
5776 md
->nltype
= NLTYPE_FIXED
;
5780 md
->nl
[0] = (newline
>> 8) & 255;
5781 md
->nl
[1] = newline
& 255;
5786 md
->nl
[0] = newline
;
5790 /* Partial matching was originally supported only for a restricted set of
5791 regexes; from release 8.00 there are no restrictions, but the bits are still
5792 defined (though never set). So there's no harm in leaving this code. */
5794 if (md
->partial
&& (re
->flags
& PCRE_NOPARTIAL
) != 0)
5795 return PCRE_ERROR_BADPARTIAL
;
5797 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
5798 back the character offset. */
5801 if (utf8
&& (options
& PCRE_NO_UTF8_CHECK
) == 0)
5804 if ((tb
= _pcre_valid_utf8((USPTR
)subject
, length
)) >= 0)
5805 return (tb
== length
&& md
->partial
> 1)?
5806 PCRE_ERROR_SHORTUTF8
: PCRE_ERROR_BADUTF8
;
5807 if (start_offset
> 0 && start_offset
< length
)
5809 tb
= ((USPTR
)subject
)[start_offset
] & 0xc0;
5810 if (tb
== 0x80) return PCRE_ERROR_BADUTF8_OFFSET
;
5815 /* The ims options can vary during the matching as a result of the presence
5816 of (?ims) items in the pattern. They are kept in a local variable so that
5817 restoring at the exit of a group is easy. */
5819 ims
= re
->options
& (PCRE_CASELESS
|PCRE_MULTILINE
|PCRE_DOTALL
);
5821 /* If the expression has got more back references than the offsets supplied can
5822 hold, we get a temporary chunk of working store to use during the matching.
5823 Otherwise, we can use the vector supplied, rounding down its size to a multiple
5826 ocount
= offsetcount
- (offsetcount
% 3);
5828 if (re
->top_backref
> 0 && re
->top_backref
>= ocount
/3)
5830 ocount
= re
->top_backref
* 3 + 3;
5831 md
->offset_vector
= (int *)(pcre_malloc
)(ocount
* sizeof(int));
5832 if (md
->offset_vector
== NULL
) return PCRE_ERROR_NOMEMORY
;
5833 using_temporary_offsets
= TRUE
;
5834 DPRINTF(("Got memory to hold back references\n"));
5836 else md
->offset_vector
= offsets
;
5838 md
->offset_end
= ocount
;
5839 md
->offset_max
= (2*ocount
)/3;
5840 md
->offset_overflow
= FALSE
;
5841 md
->capture_last
= -1;
5843 /* Compute the minimum number of offsets that we need to reset each time. Doing
5844 this makes a huge difference to execution time when there aren't many brackets
5847 resetcount
= 2 + re
->top_bracket
* 2;
5848 if (resetcount
> offsetcount
) resetcount
= ocount
;
5850 /* Reset the working variable associated with each extraction. These should
5851 never be used unless previously set, but they get saved and restored, and so we
5852 initialize them to avoid reading uninitialized locations. */
5854 if (md
->offset_vector
!= NULL
)
5856 register int *iptr
= md
->offset_vector
+ ocount
;
5857 register int *iend
= iptr
- resetcount
/2 + 1;
5858 while (--iptr
>= iend
) *iptr
= -1;
5861 /* Set up the first character to match, if available. The first_byte value is
5862 never set for an anchored regular expression, but the anchoring may be forced
5863 at run time, so we have to test for anchoring. The first char may be unset for
5864 an unanchored pattern, of course. If there's no first char and the pattern was
5865 studied, there may be a bitmap of possible first characters. */
5869 if ((re
->flags
& PCRE_FIRSTSET
) != 0)
5871 first_byte
= re
->first_byte
& 255;
5872 if ((first_byte_caseless
= ((re
->first_byte
& REQ_CASELESS
) != 0)) == TRUE
)
5873 first_byte
= md
->lcc
[first_byte
];
5876 if (!startline
&& study
!= NULL
&&
5877 (study
->flags
& PCRE_STUDY_MAPPED
) != 0)
5878 start_bits
= study
->start_bits
;
5881 /* For anchored or unanchored matches, there may be a "last known required
5884 if ((re
->flags
& PCRE_REQCHSET
) != 0)
5886 req_byte
= re
->req_byte
& 255;
5887 req_byte_caseless
= (re
->req_byte
& REQ_CASELESS
) != 0;
5888 req_byte2
= (tables
+ fcc_offset
)[req_byte
]; /* case flipped */
5892 /* ==========================================================================*/
5894 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
5895 the loop runs just once. */
5899 USPTR save_end_subject
= end_subject
;
5900 USPTR new_start_match
;
5902 /* Reset the maximum number of extractions we might see. */
5904 if (md
->offset_vector
!= NULL
)
5906 register int *iptr
= md
->offset_vector
;
5907 register int *iend
= iptr
+ resetcount
;
5908 while (iptr
< iend
) *iptr
++ = -1;
5911 /* If firstline is TRUE, the start of the match is constrained to the first
5912 line of a multiline string. That is, the match must be before or at the first
5913 newline. Implement this by temporarily adjusting end_subject so that we stop
5914 scanning at a newline. If the match fails at the newline, later code breaks
5919 USPTR t
= start_match
;
5923 while (t
< md
->end_subject
&& !IS_NEWLINE(t
))
5926 while (t
< end_subject
&& (*t
& 0xc0) == 0x80) t
++;
5931 while (t
< md
->end_subject
&& !IS_NEWLINE(t
)) t
++;
5935 /* There are some optimizations that avoid running the match if a known
5936 starting point is not found, or if a known later character is not present.
5937 However, there is an option that disables these, for testing and for ensuring
5938 that all callouts do actually occur. The option can be set in the regex by
5939 (*NO_START_OPT) or passed in match-time options. */
5941 if (((options
| re
->options
) & PCRE_NO_START_OPTIMIZE
) == 0)
5943 /* Advance to a unique first byte if there is one. */
5945 if (first_byte
>= 0)
5947 if (first_byte_caseless
)
5948 while (start_match
< end_subject
&& md
->lcc
[*start_match
] != first_byte
)
5951 while (start_match
< end_subject
&& *start_match
!= first_byte
)
5955 /* Or to just after a linebreak for a multiline match */
5959 if (start_match
> md
->start_subject
+ start_offset
)
5964 while (start_match
< end_subject
&& !WAS_NEWLINE(start_match
))
5967 while(start_match
< end_subject
&& (*start_match
& 0xc0) == 0x80)
5973 while (start_match
< end_subject
&& !WAS_NEWLINE(start_match
))
5976 /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
5977 and we are now at a LF, advance the match position by one more character.
5980 if (start_match
[-1] == CHAR_CR
&&
5981 (md
->nltype
== NLTYPE_ANY
|| md
->nltype
== NLTYPE_ANYCRLF
) &&
5982 start_match
< end_subject
&&
5983 *start_match
== CHAR_NL
)
5988 /* Or to a non-unique first byte after study */
5990 else if (start_bits
!= NULL
)
5992 while (start_match
< end_subject
)
5994 register unsigned int c
= *start_match
;
5995 if ((start_bits
[c
/8] & (1 << (c
&7))) == 0)
6000 while(start_match
< end_subject
&& (*start_match
& 0xc0) == 0x80)
6007 } /* Starting optimizations */
6009 /* Restore fudged end_subject */
6011 end_subject
= save_end_subject
;
6013 /* The following two optimizations are disabled for partial matching or if
6014 disabling is explicitly requested. */
6016 if ((options
& PCRE_NO_START_OPTIMIZE
) == 0 && !md
->partial
)
6018 /* If the pattern was studied, a minimum subject length may be set. This is
6019 a lower bound; no actual string of that length may actually match the
6020 pattern. Although the value is, strictly, in characters, we treat it as
6021 bytes to avoid spending too much time in this optimization. */
6023 if (study
!= NULL
&& (study
->flags
& PCRE_STUDY_MINLEN
) != 0 &&
6024 (pcre_uint32
)(end_subject
- start_match
) < study
->minlength
)
6030 /* If req_byte is set, we know that that character must appear in the
6031 subject for the match to succeed. If the first character is set, req_byte
6032 must be later in the subject; otherwise the test starts at the match point.
6033 This optimization can save a huge amount of backtracking in patterns with
6034 nested unlimited repeats that aren't going to match. Writing separate code
6035 for cased/caseless versions makes it go faster, as does using an
6036 autoincrement and backing off on a match.
6038 HOWEVER: when the subject string is very, very long, searching to its end
6039 can take a long time, and give bad performance on quite ordinary patterns.
6040 This showed up when somebody was matching something like /^\d+C/ on a
6041 32-megabyte string... so we don't do this when the string is sufficiently
6044 if (req_byte
>= 0 && end_subject
- start_match
< REQ_BYTE_MAX
)
6046 register USPTR p
= start_match
+ ((first_byte
>= 0)? 1 : 0);
6048 /* We don't need to repeat the search if we haven't yet reached the
6049 place we found it at last time. */
6051 if (p
> req_byte_ptr
)
6053 if (req_byte_caseless
)
6055 while (p
< end_subject
)
6057 register int pp
= *p
++;
6058 if (pp
== req_byte
|| pp
== req_byte2
) { p
--; break; }
6063 while (p
< end_subject
)
6065 if (*p
++ == req_byte
) { p
--; break; }
6069 /* If we can't find the required character, break the matching loop,
6070 forcing a match failure. */
6072 if (p
>= end_subject
)
6078 /* If we have found the required character, save the point where we
6079 found it, so that we don't search again next time round the loop if
6080 the start hasn't passed this character yet. */
6087 #ifdef PCRE_DEBUG /* Sigh. Some compilers never learn. */
6088 printf(">>>> Match against: ");
6089 pchars(start_match
, end_subject
- start_match
, TRUE
, md
);
6093 /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6094 first starting point for which a partial match was found. */
6096 md
->start_match_ptr
= start_match
;
6097 md
->start_used_ptr
= start_match
;
6098 md
->match_call_count
= 0;
6099 rc
= match(start_match
, md
->start_code
, start_match
, NULL
, 2, md
, ims
, NULL
,
6101 if (md
->hitend
&& start_partial
== NULL
) start_partial
= md
->start_used_ptr
;
6105 /* SKIP passes back the next starting point explicitly, but if it is the
6106 same as the match we have just done, treat it as NOMATCH. */
6109 if (md
->start_match_ptr
!= start_match
)
6111 new_start_match
= md
->start_match_ptr
;
6116 /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6117 the SKIP's arg was not found. We also treat this as NOMATCH. */
6119 case MATCH_SKIP_ARG
:
6122 /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6123 exactly like PRUNE. */
6128 new_start_match
= start_match
+ 1;
6131 while(new_start_match
< end_subject
&& (*new_start_match
& 0xc0) == 0x80)
6136 /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6142 /* Any other return is either a match, or some kind of error. */
6148 /* Control reaches here for the various types of "no match at this point"
6149 result. Reset the code to MATCH_NOMATCH for subsequent checking. */
6153 /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
6154 newline in the subject (though it may continue over the newline). Therefore,
6155 if we have just failed to match, starting at a newline, do not continue. */
6157 if (firstline
&& IS_NEWLINE(start_match
)) break;
6159 /* Advance to new matching position */
6161 start_match
= new_start_match
;
6163 /* Break the loop if the pattern is anchored or if we have passed the end of
6166 if (anchored
|| start_match
> end_subject
) break;
6168 /* If we have just passed a CR and we are now at a LF, and the pattern does
6169 not contain any explicit matches for \r or \n, and the newline option is CRLF
6170 or ANY or ANYCRLF, advance the match position by one more character. */
6172 if (start_match
[-1] == CHAR_CR
&&
6173 start_match
< end_subject
&&
6174 *start_match
== CHAR_NL
&&
6175 (re
->flags
& PCRE_HASCRORLF
) == 0 &&
6176 (md
->nltype
== NLTYPE_ANY
||
6177 md
->nltype
== NLTYPE_ANYCRLF
||
6181 md
->mark
= NULL
; /* Reset for start of next match attempt */
6182 } /* End of for(;;) "bumpalong" loop */
6184 /* ==========================================================================*/
6186 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
6189 (1) The pattern is anchored or the match was failed by (*COMMIT);
6191 (2) We are past the end of the subject;
6193 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
6194 this option requests that a match occur at or before the first newline in
6197 When we have a match and the offset vector is big enough to deal with any
6198 backreferences, captured substring offsets will already be set up. In the case
6199 where we had to get some local store to hold offsets for backreference
6200 processing, copy those that we can. In this case there need not be overflow if
6201 certain parts of the pattern were not used, even though there are more
6202 capturing parentheses than vector slots. */
6206 if (rc
== MATCH_MATCH
|| rc
== MATCH_ACCEPT
)
6208 if (using_temporary_offsets
)
6210 if (offsetcount
>= 4)
6212 memcpy(offsets
+ 2, md
->offset_vector
+ 2,
6213 (offsetcount
- 2) * sizeof(int));
6214 DPRINTF(("Copied offsets from temporary memory\n"));
6216 if (md
->end_offset_top
> offsetcount
) md
->offset_overflow
= TRUE
;
6217 DPRINTF(("Freeing temporary memory\n"));
6218 (pcre_free
)(md
->offset_vector
);
6221 /* Set the return code to the number of captured strings, or 0 if there are
6222 too many to fit into the vector. */
6224 rc
= md
->offset_overflow
? 0 : md
->end_offset_top
/2;
6226 /* If there is space, set up the whole thing as substring 0. The value of
6227 md->start_match_ptr might be modified if \K was encountered on the success
6230 if (offsetcount
< 2) rc
= 0; else
6232 offsets
[0] = (int)(md
->start_match_ptr
- md
->start_subject
);
6233 offsets
[1] = (int)(md
->end_match_ptr
- md
->start_subject
);
6236 DPRINTF((">>>> returning %d\n", rc
));
6240 /* Control gets here if there has been an error, or if the overall match
6241 attempt has failed at all permitted starting positions. */
6243 if (using_temporary_offsets
)
6245 DPRINTF(("Freeing temporary memory\n"));
6246 (pcre_free
)(md
->offset_vector
);
6249 /* For anything other than nomatch or partial match, just return the code. */
6251 if (rc
!= MATCH_NOMATCH
&& rc
!= PCRE_ERROR_PARTIAL
)
6253 DPRINTF((">>>> error: returning %d\n", rc
));
6257 /* Handle partial matches - disable any mark data */
6259 if (start_partial
!= NULL
)
6261 DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
6263 if (offsetcount
> 1)
6265 offsets
[0] = (int)(start_partial
- (USPTR
)subject
);
6266 offsets
[1] = (int)(end_subject
- (USPTR
)subject
);
6268 rc
= PCRE_ERROR_PARTIAL
;
6271 /* This is the classic nomatch case */
6275 DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
6276 rc
= PCRE_ERROR_NOMATCH
;
6279 /* Return the MARK data if it has been requested. */
6283 if (extra_data
!= NULL
&& (extra_data
->flags
& PCRE_EXTRA_MARK
) != 0)
6284 *(extra_data
->mark
) = (unsigned char *)(md
->mark
);
6288 /* End of pcre_exec.c */