glib/pcre/pcre_exec.c

   1 /*************************************************
   2 *      Perl-Compatible Regular Expressions       *
   3 *************************************************/
   4
   5 /* PCRE is a library of functions to support regular expressions whose syntax
   6 and semantics are as close as possible to those of the Perl 5 language.
   7
   8                        Written by Philip Hazel
   9            Copyright (c) 1997-2012 University of Cambridge
  10
  11 -----------------------------------------------------------------------------
  12 Redistribution and use in source and binary forms, with or without
  13 modification, are permitted provided that the following conditions are met:
  14
  15     * Redistributions of source code must retain the above copyright notice,
  16       this list of conditions and the following disclaimer.
  17
  18     * Redistributions in binary form must reproduce the above copyright
  19       notice, this list of conditions and the following disclaimer in the
  20       documentation and/or other materials provided with the distribution.
  21
  22     * Neither the name of the University of Cambridge nor the names of its
  23       contributors may be used to endorse or promote products derived from
  24       this software without specific prior written permission.
  25
  26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  36 POSSIBILITY OF SUCH DAMAGE.
  37 -----------------------------------------------------------------------------
  38 */
  39
  40 /* This module contains pcre_exec(), the externally visible function that does
  41 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
  42 possible. There are also some static supporting functions. */
  43
  44 #include "config.h"
  45
  46 #define NLBLOCK md             /* Block containing newline information */
  47 #define PSSTART start_subject  /* Field containing processed string start */
  48 #define PSEND   end_subject    /* Field containing processed string end */
  49
  50 #include "pcre_internal.h"
  51
  52 /* Undefine some potentially clashing cpp symbols */
  53
  54 #undef min
  55 #undef max
  56
  57 /* Values for setting in md->match_function_type to indicate two special types
  58 of call to match(). We do it this way to save on using another stack variable,
  59 as stack usage is to be discouraged. */
  60
  61 #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
  62 #define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
  63
  64 /* Non-error returns from the match() function. Error returns are externally
  65 defined PCRE_ERROR_xxx codes, which are all negative. */
  66
  67 #define MATCH_MATCH        1
  68 #define MATCH_NOMATCH      0
  69
  70 /* Special internal returns from the match() function. Make them sufficiently
  71 negative to avoid the external error codes. */
  72
  73 #define MATCH_ACCEPT       (-999)
  74 #define MATCH_COMMIT       (-998)
  75 #define MATCH_KETRPOS      (-997)
  76 #define MATCH_ONCE         (-996)
  77 #define MATCH_PRUNE        (-995)
  78 #define MATCH_SKIP         (-994)
  79 #define MATCH_SKIP_ARG     (-993)
  80 #define MATCH_THEN         (-992)
  81
  82 /* Maximum number of ints of offset to save on the stack for recursive calls.
  83 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
  84 because the offset vector is always a multiple of 3 long. */
  85
  86 #define REC_STACK_SAVE_MAX 30
  87
  88 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
  89
  90 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
  91 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
  92
  93
  94
  95 #ifdef PCRE_DEBUG
  96 /*************************************************
  97 *        Debugging function to print chars       *
  98 *************************************************/
  99
 100 /* Print a sequence of chars in printable format, stopping at the end of the
 101 subject if the requested.
 102
 103 Arguments:
 104   p           points to characters
 105   length      number to print
 106   is_subject  TRUE if printing from within md->start_subject
 107   md          pointer to matching data block, if is_subject is TRUE
 108
 109 Returns:     nothing
 110 */
 111
 112 static void
 113 pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
 114 {
 115 unsigned int c;
 116 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
 117 while (length-- > 0)
 118   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
 119 }
 120 #endif
 121
 122
 123
 124 /*************************************************
 125 *          Match a back-reference                *
 126 *************************************************/
 127
 128 /* Normally, if a back reference hasn't been set, the length that is passed is
 129 negative, so the match always fails. However, in JavaScript compatibility mode,
 130 the length passed is zero. Note that in caseless UTF-8 mode, the number of
 131 subject bytes matched may be different to the number of reference bytes.
 132
 133 Arguments:
 134   offset      index into the offset vector
 135   eptr        pointer into the subject
 136   length      length of reference to be matched (number of bytes)
 137   md          points to match data block
 138   caseless    TRUE if caseless
 139
 140 Returns:      >= 0 the number of subject bytes matched
 141               -1 no match
 142               -2 partial match; always given if at end subject
 143 */
 144
 145 static int
 146 match_ref(int offset, PCRE_PUCHAR eptr, int length, match_data *md,
 147   BOOL caseless)
 148 {
 149 PCRE_PUCHAR eptr_start = eptr;
 150 PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
 151
 152 #ifdef PCRE_DEBUG
 153 if (eptr >= md->end_subject)
 154   printf("matching subject <null>");
 155 else
 156   {
 157   printf("matching subject ");
 158   pchars(eptr, length, TRUE, md);
 159   }
 160 printf(" against backref ");
 161 pchars(p, length, FALSE, md);
 162 printf("\n");
 163 #endif
 164
 165 /* Always fail if reference not set (and not JavaScript compatible - in that
 166 case the length is passed as zero). */
 167
 168 if (length < 0) return -1;
 169
 170 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
 171 properly if Unicode properties are supported. Otherwise, we can check only
 172 ASCII characters. */
 173
 174 if (caseless)
 175   {
 176 #ifdef SUPPORT_UTF
 177 #ifdef SUPPORT_UCP
 178   if (md->utf)
 179     {
 180     /* Match characters up to the end of the reference. NOTE: the number of
 181     bytes matched may differ, because there are some characters whose upper and
 182     lower case versions code as different numbers of bytes. For example, U+023A
 183     (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
 184     a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
 185     the latter. It is important, therefore, to check the length along the
 186     reference, not along the subject (earlier code did this wrong). */
 187
 188     PCRE_PUCHAR endptr = p + length;
 189     while (p < endptr)
 190       {
 191       int c, d;
 192       if (eptr >= md->end_subject) return -2;   /* Partial match */
 193       GETCHARINC(c, eptr);
 194       GETCHARINC(d, p);
 195       if (c != d && c != UCD_OTHERCASE(d)) return -1;
 196       }
 197     }
 198   else
 199 #endif
 200 #endif
 201
 202   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
 203   is no UCP support. */
 204     {
 205     while (length-- > 0)
 206       {
 207       if (eptr >= md->end_subject) return -2;   /* Partial match */
 208       if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
 209       p++;
 210       eptr++;
 211       }
 212     }
 213   }
 214
 215 /* In the caseful case, we can just compare the bytes, whether or not we
 216 are in UTF-8 mode. */
 217
 218 else
 219   {
 220   while (length-- > 0)
 221     {
 222     if (eptr >= md->end_subject) return -2;   /* Partial match */
 223     if (*p++ != *eptr++) return -1;
 224     }
 225   }
 226
 227 return (int)(eptr - eptr_start);
 228 }
 229
 230
 231
 232 /***************************************************************************
 233 ****************************************************************************
 234                    RECURSION IN THE match() FUNCTION
 235
 236 The match() function is highly recursive, though not every recursive call
 237 increases the recursive depth. Nevertheless, some regular expressions can cause
 238 it to recurse to a great depth. I was writing for Unix, so I just let it call
 239 itself recursively. This uses the stack for saving everything that has to be
 240 saved for a recursive call. On Unix, the stack can be large, and this works
 241 fine.
 242
 243 It turns out that on some non-Unix-like systems there are problems with
 244 programs that use a lot of stack. (This despite the fact that every last chip
 245 has oodles of memory these days, and techniques for extending the stack have
 246 been known for decades.) So....
 247
 248 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
 249 calls by keeping local variables that need to be preserved in blocks of memory
 250 obtained from malloc() instead instead of on the stack. Macros are used to
 251 achieve this so that the actual code doesn't look very different to what it
 252 always used to.
 253
 254 The original heap-recursive code used longjmp(). However, it seems that this
 255 can be very slow on some operating systems. Following a suggestion from Stan
 256 Switzer, the use of longjmp() has been abolished, at the cost of having to
 257 provide a unique number for each call to RMATCH. There is no way of generating
 258 a sequence of numbers at compile time in C. I have given them names, to make
 259 them stand out more clearly.
 260
 261 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
 262 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
 263 tests. Furthermore, not using longjmp() means that local dynamic variables
 264 don't have indeterminate values; this has meant that the frame size can be
 265 reduced because the result can be "passed back" by straight setting of the
 266 variable instead of being passed in the frame.
 267 ****************************************************************************
 268 ***************************************************************************/
 269
 270 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
 271 below must be updated in sync.  */
 272
 273 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
 274        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
 275        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
 276        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
 277        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
 278        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
 279        RM61,  RM62, RM63, RM64, RM65, RM66 };
 280
 281 /* These versions of the macros use the stack, as normal. There are debugging
 282 versions and production versions. Note that the "rw" argument of RMATCH isn't
 283 actually used in this definition. */
 284
 285 #ifndef NO_RECURSE
 286
 287 #ifdef PCRE_DEBUG
 288 #define RMATCH(ra,rb,rc,rd,re,rw) \
 289   { \
 290   printf("match() called in line %d\n", __LINE__); \
 291   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
 292   printf("to line %d\n", __LINE__); \
 293   }
 294 #define RRETURN(ra) \
 295   { \
 296   printf("match() returned %d from line %d ", ra, __LINE__); \
 297   return ra; \
 298   }
 299 #else
 300 #define RMATCH(ra,rb,rc,rd,re,rw) \
 301   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
 302 #define RRETURN(ra) return ra
 303 #endif
 304
 305 #else
 306
 307
 308 /* These versions of the macros manage a private stack on the heap. Note that
 309 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
 310 argument of match(), which never changes. */
 311
 312 #define RMATCH(ra,rb,rc,rd,re,rw)\
 313   {\
 314   heapframe *newframe = frame->Xnextframe;\
 315   if (newframe == NULL)\
 316     {\
 317     newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
 318     if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
 319     newframe->Xnextframe = NULL;\
 320     frame->Xnextframe = newframe;\
 321     }\
 322   frame->Xwhere = rw;\
 323   newframe->Xeptr = ra;\
 324   newframe->Xecode = rb;\
 325   newframe->Xmstart = mstart;\
 326   newframe->Xoffset_top = rc;\
 327   newframe->Xeptrb = re;\
 328   newframe->Xrdepth = frame->Xrdepth + 1;\
 329   newframe->Xprevframe = frame;\
 330   frame = newframe;\
 331   DPRINTF(("restarting from line %d\n", __LINE__));\
 332   goto HEAP_RECURSE;\
 333   L_##rw:\
 334   DPRINTF(("jumped back to line %d\n", __LINE__));\
 335   }
 336
 337 #define RRETURN(ra)\
 338   {\
 339   heapframe *oldframe = frame;\
 340   frame = oldframe->Xprevframe;\
 341   if (frame != NULL)\
 342     {\
 343     rrc = ra;\
 344     goto HEAP_RETURN;\
 345     }\
 346   return ra;\
 347   }
 348
 349
 350 /* Structure for remembering the local variables in a private frame */
 351
 352 typedef struct heapframe {
 353   struct heapframe *Xprevframe;
 354   struct heapframe *Xnextframe;
 355
 356   /* Function arguments that may change */
 357
 358   PCRE_PUCHAR Xeptr;
 359   const pcre_uchar *Xecode;
 360   PCRE_PUCHAR Xmstart;
 361   int Xoffset_top;
 362   eptrblock *Xeptrb;
 363   unsigned int Xrdepth;
 364
 365   /* Function local variables */
 366
 367   PCRE_PUCHAR Xcallpat;
 368 #ifdef SUPPORT_UTF
 369   PCRE_PUCHAR Xcharptr;
 370 #endif
 371   PCRE_PUCHAR Xdata;
 372   PCRE_PUCHAR Xnext;
 373   PCRE_PUCHAR Xpp;
 374   PCRE_PUCHAR Xprev;
 375   PCRE_PUCHAR Xsaved_eptr;
 376
 377   recursion_info Xnew_recursive;
 378
 379   BOOL Xcur_is_word;
 380   BOOL Xcondition;
 381   BOOL Xprev_is_word;
 382
 383 #ifdef SUPPORT_UCP
 384   int Xprop_type;
 385   int Xprop_value;
 386   int Xprop_fail_result;
 387   int Xoclength;
 388   pcre_uchar Xocchars[6];
 389 #endif
 390
 391   int Xcodelink;
 392   int Xctype;
 393   unsigned int Xfc;
 394   int Xfi;
 395   int Xlength;
 396   int Xmax;
 397   int Xmin;
 398   int Xnumber;
 399   int Xoffset;
 400   int Xop;
 401   int Xsave_capture_last;
 402   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
 403   int Xstacksave[REC_STACK_SAVE_MAX];
 404
 405   eptrblock Xnewptrb;
 406
 407   /* Where to jump back to */
 408
 409   int Xwhere;
 410
 411 } heapframe;
 412
 413 #endif
 414
 415
 416 /***************************************************************************
 417 ***************************************************************************/
 418
 419
 420
 421 /*************************************************
 422 *         Match from current position            *
 423 *************************************************/
 424
 425 /* This function is called recursively in many circumstances. Whenever it
 426 returns a negative (error) response, the outer incarnation must also return the
 427 same response. */
 428
 429 /* These macros pack up tests that are used for partial matching, and which
 430 appear several times in the code. We set the "hit end" flag if the pointer is
 431 at the end of the subject and also past the start of the subject (i.e.
 432 something has been matched). For hard partial matching, we then return
 433 immediately. The second one is used when we already know we are past the end of
 434 the subject. */
 435
 436 #define CHECK_PARTIAL()\
 437   if (md->partial != 0 && eptr >= md->end_subject && \
 438       eptr > md->start_used_ptr) \
 439     { \
 440     md->hitend = TRUE; \
 441     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
 442     }
 443
 444 #define SCHECK_PARTIAL()\
 445   if (md->partial != 0 && eptr > md->start_used_ptr) \
 446     { \
 447     md->hitend = TRUE; \
 448     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
 449     }
 450
 451
 452 /* Performance note: It might be tempting to extract commonly used fields from
 453 the md structure (e.g. utf, end_subject) into individual variables to improve
 454 performance. Tests using gcc on a SPARC disproved this; in the first case, it
 455 made performance worse.
 456
 457 Arguments:
 458    eptr        pointer to current character in subject
 459    ecode       pointer to current position in compiled code
 460    mstart      pointer to the current match start position (can be modified
 461                  by encountering \K)
 462    offset_top  current top pointer
 463    md          pointer to "static" info for the match
 464    eptrb       pointer to chain of blocks containing eptr at start of
 465                  brackets - for testing for empty matches
 466    rdepth      the recursion depth
 467
 468 Returns:       MATCH_MATCH if matched            )  these values are >= 0
 469                MATCH_NOMATCH if failed to match  )
 470                a negative MATCH_xxx value for PRUNE, SKIP, etc
 471                a negative PCRE_ERROR_xxx value if aborted by an error condition
 472                  (e.g. stopped by repeated call or recursion limit)
 473 */
 474
 475 static int
 476 match(PCRE_PUCHAR eptr, const pcre_uchar *ecode,
 477   PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
 478   unsigned int rdepth)
 479 {
 480 /* These variables do not need to be preserved over recursion in this function,
 481 so they can be ordinary variables in all cases. Mark some of them with
 482 "register" because they are used a lot in loops. */
 483
 484 int  rrc;         /* Returns from recursive calls */
 485 int  i;           /* Used for loops not involving calls to RMATCH() */
 486 unsigned int c;   /* Character values not kept over RMATCH() calls */
 487 BOOL utf;         /* Local copy of UTF flag for speed */
 488
 489 BOOL minimize, possessive; /* Quantifier options */
 490 BOOL caseless;
 491 int condcode;
 492
 493 /* When recursion is not being used, all "local" variables that have to be
 494 preserved over calls to RMATCH() are part of a "frame". We set up the top-level
 495 frame on the stack here; subsequent instantiations are obtained from the heap
 496 whenever RMATCH() does a "recursion". See the macro definitions above. Putting
 497 the top-level on the stack rather than malloc-ing them all gives a performance
 498 boost in many cases where there is not much "recursion". */
 499
 500 #ifdef NO_RECURSE
 501 heapframe *frame = (heapframe *)md->match_frames_base;
 502
 503 /* Copy in the original argument variables */
 504
 505 frame->Xeptr = eptr;
 506 frame->Xecode = ecode;
 507 frame->Xmstart = mstart;
 508 frame->Xoffset_top = offset_top;
 509 frame->Xeptrb = eptrb;
 510 frame->Xrdepth = rdepth;
 511
 512 /* This is where control jumps back to to effect "recursion" */
 513
 514 HEAP_RECURSE:
 515
 516 /* Macros make the argument variables come from the current frame */
 517
 518 #define eptr               frame->Xeptr
 519 #define ecode              frame->Xecode
 520 #define mstart             frame->Xmstart
 521 #define offset_top         frame->Xoffset_top
 522 #define eptrb              frame->Xeptrb
 523 #define rdepth             frame->Xrdepth
 524
 525 /* Ditto for the local variables */
 526
 527 #ifdef SUPPORT_UTF
 528 #define charptr            frame->Xcharptr
 529 #endif
 530 #define callpat            frame->Xcallpat
 531 #define codelink           frame->Xcodelink
 532 #define data               frame->Xdata
 533 #define next               frame->Xnext
 534 #define pp                 frame->Xpp
 535 #define prev               frame->Xprev
 536 #define saved_eptr         frame->Xsaved_eptr
 537
 538 #define new_recursive      frame->Xnew_recursive
 539
 540 #define cur_is_word        frame->Xcur_is_word
 541 #define condition          frame->Xcondition
 542 #define prev_is_word       frame->Xprev_is_word
 543
 544 #ifdef SUPPORT_UCP
 545 #define prop_type          frame->Xprop_type
 546 #define prop_value         frame->Xprop_value
 547 #define prop_fail_result   frame->Xprop_fail_result
 548 #define oclength           frame->Xoclength
 549 #define occhars            frame->Xocchars
 550 #endif
 551
 552 #define ctype              frame->Xctype
 553 #define fc                 frame->Xfc
 554 #define fi                 frame->Xfi
 555 #define length             frame->Xlength
 556 #define max                frame->Xmax
 557 #define min                frame->Xmin
 558 #define number             frame->Xnumber
 559 #define offset             frame->Xoffset
 560 #define op                 frame->Xop
 561 #define save_capture_last  frame->Xsave_capture_last
 562 #define save_offset1       frame->Xsave_offset1
 563 #define save_offset2       frame->Xsave_offset2
 564 #define save_offset3       frame->Xsave_offset3
 565 #define stacksave          frame->Xstacksave
 566
 567 #define newptrb            frame->Xnewptrb
 568
 569 /* When recursion is being used, local variables are allocated on the stack and
 570 get preserved during recursion in the normal way. In this environment, fi and
 571 i, and fc and c, can be the same variables. */
 572
 573 #else         /* NO_RECURSE not defined */
 574 #define fi i
 575 #define fc c
 576
 577 /* Many of the following variables are used only in small blocks of the code.
 578 My normal style of coding would have declared them within each of those blocks.
 579 However, in order to accommodate the version of this code that uses an external
 580 "stack" implemented on the heap, it is easier to declare them all here, so the
 581 declarations can be cut out in a block. The only declarations within blocks
 582 below are for variables that do not have to be preserved over a recursive call
 583 to RMATCH(). */
 584
 585 #ifdef SUPPORT_UTF
 586 const pcre_uchar *charptr;
 587 #endif
 588 const pcre_uchar *callpat;
 589 const pcre_uchar *data;
 590 const pcre_uchar *next;
 591 PCRE_PUCHAR       pp;
 592 const pcre_uchar *prev;
 593 PCRE_PUCHAR       saved_eptr;
 594
 595 recursion_info new_recursive;
 596
 597 BOOL cur_is_word;
 598 BOOL condition;
 599 BOOL prev_is_word;
 600
 601 #ifdef SUPPORT_UCP
 602 int prop_type;
 603 int prop_value;
 604 int prop_fail_result;
 605 int oclength;
 606 pcre_uchar occhars[6];
 607 #endif
 608
 609 int codelink;
 610 int ctype;
 611 int length;
 612 int max;
 613 int min;
 614 int number;
 615 int offset;
 616 int op;
 617 int save_capture_last;
 618 int save_offset1, save_offset2, save_offset3;
 619 int stacksave[REC_STACK_SAVE_MAX];
 620
 621 eptrblock newptrb;
 622
 623 /* There is a special fudge for calling match() in a way that causes it to
 624 measure the size of its basic stack frame when the stack is being used for
 625 recursion. The second argument (ecode) being NULL triggers this behaviour. It
 626 cannot normally ever be NULL. The return is the negated value of the frame
 627 size. */
 628
 629 if (ecode == NULL)
 630   {
 631   if (rdepth == 0)
 632     return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
 633   else
 634     {
 635     int len = (char *)&rdepth - (char *)eptr;
 636     return (len > 0)? -len : len;
 637     }
 638   }
 639 #endif     /* NO_RECURSE */
 640
 641 /* To save space on the stack and in the heap frame, I have doubled up on some
 642 of the local variables that are used only in localised parts of the code, but
 643 still need to be preserved over recursive calls of match(). These macros define
 644 the alternative names that are used. */
 645
 646 #define allow_zero    cur_is_word
 647 #define cbegroup      condition
 648 #define code_offset   codelink
 649 #define condassert    condition
 650 #define matched_once  prev_is_word
 651 #define foc           number
 652 #define save_mark     data
 653
 654 /* These statements are here to stop the compiler complaining about unitialized
 655 variables. */
 656
 657 #ifdef SUPPORT_UCP
 658 prop_value = 0;
 659 prop_fail_result = 0;
 660 #endif
 661
 662
 663 /* This label is used for tail recursion, which is used in a few cases even
 664 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
 665 used. Thanks to Ian Taylor for noticing this possibility and sending the
 666 original patch. */
 667
 668 TAIL_RECURSE:
 669
 670 /* OK, now we can get on with the real code of the function. Recursive calls
 671 are specified by the macro RMATCH and RRETURN is used to return. When
 672 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
 673 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
 674 defined). However, RMATCH isn't like a function call because it's quite a
 675 complicated macro. It has to be used in one particular way. This shouldn't,
 676 however, impact performance when true recursion is being used. */
 677
 678 #ifdef SUPPORT_UTF
 679 utf = md->utf;       /* Local copy of the flag */
 680 #else
 681 utf = FALSE;
 682 #endif
 683
 684 /* First check that we haven't called match() too many times, or that we
 685 haven't exceeded the recursive call limit. */
 686
 687 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
 688 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
 689
 690 /* At the start of a group with an unlimited repeat that may match an empty
 691 string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
 692 done this way to save having to use another function argument, which would take
 693 up space on the stack. See also MATCH_CONDASSERT below.
 694
 695 When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
 696 such remembered pointers, to be checked when we hit the closing ket, in order
 697 to break infinite loops that match no characters. When match() is called in
 698 other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
 699 NOT be used with tail recursion, because the memory block that is used is on
 700 the stack, so a new one may be required for each match(). */
 701
 702 if (md->match_function_type == MATCH_CBEGROUP)
 703   {
 704   newptrb.epb_saved_eptr = eptr;
 705   newptrb.epb_prev = eptrb;
 706   eptrb = &newptrb;
 707   md->match_function_type = 0;
 708   }
 709
 710 /* Now start processing the opcodes. */
 711
 712 for (;;)
 713   {
 714   minimize = possessive = FALSE;
 715   op = *ecode;
 716
 717   switch(op)
 718     {
 719     case OP_MARK:
 720     md->nomatch_mark = ecode + 2;
 721     md->mark = NULL;    /* In case previously set by assertion */
 722     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
 723       eptrb, RM55);
 724     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
 725          md->mark == NULL) md->mark = ecode + 2;
 726
 727     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
 728     argument, and we must check whether that argument matches this MARK's
 729     argument. It is passed back in md->start_match_ptr (an overloading of that
 730     variable). If it does match, we reset that variable to the current subject
 731     position and return MATCH_SKIP. Otherwise, pass back the return code
 732     unaltered. */
 733
 734     else if (rrc == MATCH_SKIP_ARG &&
 735         STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0)
 736       {
 737       md->start_match_ptr = eptr;
 738       RRETURN(MATCH_SKIP);
 739       }
 740     RRETURN(rrc);
 741
 742     case OP_FAIL:
 743     RRETURN(MATCH_NOMATCH);
 744
 745     /* COMMIT overrides PRUNE, SKIP, and THEN */
 746
 747     case OP_COMMIT:
 748     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
 749       eptrb, RM52);
 750     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
 751         rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
 752         rrc != MATCH_THEN)
 753       RRETURN(rrc);
 754     RRETURN(MATCH_COMMIT);
 755
 756     /* PRUNE overrides THEN */
 757
 758     case OP_PRUNE:
 759     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
 760       eptrb, RM51);
 761     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
 762     RRETURN(MATCH_PRUNE);
 763
 764     case OP_PRUNE_ARG:
 765     md->nomatch_mark = ecode + 2;
 766     md->mark = NULL;    /* In case previously set by assertion */
 767     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
 768       eptrb, RM56);
 769     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
 770          md->mark == NULL) md->mark = ecode + 2;
 771     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
 772     RRETURN(MATCH_PRUNE);
 773
 774     /* SKIP overrides PRUNE and THEN */
 775
 776     case OP_SKIP:
 777     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
 778       eptrb, RM53);
 779     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
 780       RRETURN(rrc);
 781     md->start_match_ptr = eptr;   /* Pass back current position */
 782     RRETURN(MATCH_SKIP);
 783
 784     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
 785     nomatch_mark. There is a flag that disables this opcode when re-matching a
 786     pattern that ended with a SKIP for which there was not a matching MARK. */
 787
 788     case OP_SKIP_ARG:
 789     if (md->ignore_skip_arg)
 790       {
 791       ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
 792       break;
 793       }
 794     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
 795       eptrb, RM57);
 796     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
 797       RRETURN(rrc);
 798
 799     /* Pass back the current skip name by overloading md->start_match_ptr and
 800     returning the special MATCH_SKIP_ARG return code. This will either be
 801     caught by a matching MARK, or get to the top, where it causes a rematch
 802     with the md->ignore_skip_arg flag set. */
 803
 804     md->start_match_ptr = ecode + 2;
 805     RRETURN(MATCH_SKIP_ARG);
 806
 807     /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
 808     the branch in which it occurs can be determined. Overload the start of
 809     match pointer to do this. */
 810
 811     case OP_THEN:
 812     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
 813       eptrb, RM54);
 814     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 815     md->start_match_ptr = ecode;
 816     RRETURN(MATCH_THEN);
 817
 818     case OP_THEN_ARG:
 819     md->nomatch_mark = ecode + 2;
 820     md->mark = NULL;    /* In case previously set by assertion */
 821     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
 822       md, eptrb, RM58);
 823     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
 824          md->mark == NULL) md->mark = ecode + 2;
 825     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 826     md->start_match_ptr = ecode;
 827     RRETURN(MATCH_THEN);
 828
 829     /* Handle an atomic group that does not contain any capturing parentheses.
 830     This can be handled like an assertion. Prior to 8.13, all atomic groups
 831     were handled this way. In 8.13, the code was changed as below for ONCE, so
 832     that backups pass through the group and thereby reset captured values.
 833     However, this uses a lot more stack, so in 8.20, atomic groups that do not
 834     contain any captures generate OP_ONCE_NC, which can be handled in the old,
 835     less stack intensive way.
 836
 837     Check the alternative branches in turn - the matching won't pass the KET
 838     for this kind of subpattern. If any one branch matches, we carry on as at
 839     the end of a normal bracket, leaving the subject pointer, but resetting
 840     the start-of-match value in case it was changed by \K. */
 841
 842     case OP_ONCE_NC:
 843     prev = ecode;
 844     saved_eptr = eptr;
 845     save_mark = md->mark;
 846     do
 847       {
 848       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
 849       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
 850         {
 851         mstart = md->start_match_ptr;
 852         break;
 853         }
 854       if (rrc == MATCH_THEN)
 855         {
 856         next = ecode + GET(ecode,1);
 857         if (md->start_match_ptr < next &&
 858             (*ecode == OP_ALT || *next == OP_ALT))
 859           rrc = MATCH_NOMATCH;
 860         }
 861
 862       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 863       ecode += GET(ecode,1);
 864       md->mark = save_mark;
 865       }
 866     while (*ecode == OP_ALT);
 867
 868     /* If hit the end of the group (which could be repeated), fail */
 869
 870     if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
 871
 872     /* Continue as from after the group, updating the offsets high water
 873     mark, since extracts may have been taken. */
 874
 875     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
 876
 877     offset_top = md->end_offset_top;
 878     eptr = md->end_match_ptr;
 879
 880     /* For a non-repeating ket, just continue at this level. This also
 881     happens for a repeating ket if no characters were matched in the group.
 882     This is the forcible breaking of infinite loops as implemented in Perl
 883     5.005. */
 884
 885     if (*ecode == OP_KET || eptr == saved_eptr)
 886       {
 887       ecode += 1+LINK_SIZE;
 888       break;
 889       }
 890
 891     /* The repeating kets try the rest of the pattern or restart from the
 892     preceding bracket, in the appropriate order. The second "call" of match()
 893     uses tail recursion, to avoid using another stack frame. */
 894
 895     if (*ecode == OP_KETRMIN)
 896       {
 897       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
 898       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 899       ecode = prev;
 900       goto TAIL_RECURSE;
 901       }
 902     else  /* OP_KETRMAX */
 903       {
 904       RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
 905       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 906       ecode += 1 + LINK_SIZE;
 907       goto TAIL_RECURSE;
 908       }
 909     /* Control never gets here */
 910
 911     /* Handle a capturing bracket, other than those that are possessive with an
 912     unlimited repeat. If there is space in the offset vector, save the current
 913     subject position in the working slot at the top of the vector. We mustn't
 914     change the current values of the data slot, because they may be set from a
 915     previous iteration of this group, and be referred to by a reference inside
 916     the group. A failure to match might occur after the group has succeeded,
 917     if something later on doesn't match. For this reason, we need to restore
 918     the working value and also the values of the final offsets, in case they
 919     were set by a previous iteration of the same bracket.
 920
 921     If there isn't enough space in the offset vector, treat this as if it were
 922     a non-capturing bracket. Don't worry about setting the flag for the error
 923     case here; that is handled in the code for KET. */
 924
 925     case OP_CBRA:
 926     case OP_SCBRA:
 927     number = GET2(ecode, 1+LINK_SIZE);
 928     offset = number << 1;
 929
 930 #ifdef PCRE_DEBUG
 931     printf("start bracket %d\n", number);
 932     printf("subject=");
 933     pchars(eptr, 16, TRUE, md);
 934     printf("\n");
 935 #endif
 936
 937     if (offset < md->offset_max)
 938       {
 939       save_offset1 = md->offset_vector[offset];
 940       save_offset2 = md->offset_vector[offset+1];
 941       save_offset3 = md->offset_vector[md->offset_end - number];
 942       save_capture_last = md->capture_last;
 943       save_mark = md->mark;
 944
 945       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
 946       md->offset_vector[md->offset_end - number] =
 947         (int)(eptr - md->start_subject);
 948
 949       for (;;)
 950         {
 951         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
 952         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
 953           eptrb, RM1);
 954         if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
 955
 956         /* If we backed up to a THEN, check whether it is within the current
 957         branch by comparing the address of the THEN that is passed back with
 958         the end of the branch. If it is within the current branch, and the
 959         branch is one of two or more alternatives (it either starts or ends
 960         with OP_ALT), we have reached the limit of THEN's action, so convert
 961         the return code to NOMATCH, which will cause normal backtracking to
 962         happen from now on. Otherwise, THEN is passed back to an outer
 963         alternative. This implements Perl's treatment of parenthesized groups,
 964         where a group not containing | does not affect the current alternative,
 965         that is, (X) is NOT the same as (X|(*F)). */
 966
 967         if (rrc == MATCH_THEN)
 968           {
 969           next = ecode + GET(ecode,1);
 970           if (md->start_match_ptr < next &&
 971               (*ecode == OP_ALT || *next == OP_ALT))
 972             rrc = MATCH_NOMATCH;
 973           }
 974
 975         /* Anything other than NOMATCH is passed back. */
 976
 977         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 978         md->capture_last = save_capture_last;
 979         ecode += GET(ecode, 1);
 980         md->mark = save_mark;
 981         if (*ecode != OP_ALT) break;
 982         }
 983
 984       DPRINTF(("bracket %d failed\n", number));
 985       md->offset_vector[offset] = save_offset1;
 986       md->offset_vector[offset+1] = save_offset2;
 987       md->offset_vector[md->offset_end - number] = save_offset3;
 988
 989       /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
 990
 991       RRETURN(rrc);
 992       }
 993
 994     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
 995     as a non-capturing bracket. */
 996
 997     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 998     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 999
1000     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1001
1002     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1003     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1004
1005     /* Non-capturing or atomic group, except for possessive with unlimited
1006     repeat and ONCE group with no captures. Loop for all the alternatives.
1007
1008     When we get to the final alternative within the brackets, we used to return
1009     the result of a recursive call to match() whatever happened so it was
1010     possible to reduce stack usage by turning this into a tail recursion,
1011     except in the case of a possibly empty group. However, now that there is
1012     the possiblity of (*THEN) occurring in the final alternative, this
1013     optimization is no longer always possible.
1014
1015     We can optimize if we know there are no (*THEN)s in the pattern; at present
1016     this is the best that can be done.
1017
1018     MATCH_ONCE is returned when the end of an atomic group is successfully
1019     reached, but subsequent matching fails. It passes back up the tree (causing
1020     captured values to be reset) until the original atomic group level is
1021     reached. This is tested by comparing md->once_target with the start of the
1022     group. At this point, the return is converted into MATCH_NOMATCH so that
1023     previous backup points can be taken. */
1024
1025     case OP_ONCE:
1026     case OP_BRA:
1027     case OP_SBRA:
1028     DPRINTF(("start non-capturing bracket\n"));
1029
1030     for (;;)
1031       {
1032       if (op >= OP_SBRA || op == OP_ONCE)
1033         md->match_function_type = MATCH_CBEGROUP;
1034
1035       /* If this is not a possibly empty group, and there are no (*THEN)s in
1036       the pattern, and this is the final alternative, optimize as described
1037       above. */
1038
1039       else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1040         {
1041         ecode += PRIV(OP_lengths)[*ecode];
1042         goto TAIL_RECURSE;
1043         }
1044
1045       /* In all other cases, we have to make another call to match(). */
1046
1047       save_mark = md->mark;
1048       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1049         RM2);
1050
1051       /* See comment in the code for capturing groups above about handling
1052       THEN. */
1053
1054       if (rrc == MATCH_THEN)
1055         {
1056         next = ecode + GET(ecode,1);
1057         if (md->start_match_ptr < next &&
1058             (*ecode == OP_ALT || *next == OP_ALT))
1059           rrc = MATCH_NOMATCH;
1060         }
1061
1062       if (rrc != MATCH_NOMATCH)
1063         {
1064         if (rrc == MATCH_ONCE)
1065           {
1066           const pcre_uchar *scode = ecode;
1067           if (*scode != OP_ONCE)           /* If not at start, find it */
1068             {
1069             while (*scode == OP_ALT) scode += GET(scode, 1);
1070             scode -= GET(scode, 1);
1071             }
1072           if (md->once_target == scode) rrc = MATCH_NOMATCH;
1073           }
1074         RRETURN(rrc);
1075         }
1076       ecode += GET(ecode, 1);
1077       md->mark = save_mark;
1078       if (*ecode != OP_ALT) break;
1079       }
1080
1081     RRETURN(MATCH_NOMATCH);
1082
1083     /* Handle possessive capturing brackets with an unlimited repeat. We come
1084     here from BRAZERO with allow_zero set TRUE. The offset_vector values are
1085     handled similarly to the normal case above. However, the matching is
1086     different. The end of these brackets will always be OP_KETRPOS, which
1087     returns MATCH_KETRPOS without going further in the pattern. By this means
1088     we can handle the group by iteration rather than recursion, thereby
1089     reducing the amount of stack needed. */
1090
1091     case OP_CBRAPOS:
1092     case OP_SCBRAPOS:
1093     allow_zero = FALSE;
1094
1095     POSSESSIVE_CAPTURE:
1096     number = GET2(ecode, 1+LINK_SIZE);
1097     offset = number << 1;
1098
1099 #ifdef PCRE_DEBUG
1100     printf("start possessive bracket %d\n", number);
1101     printf("subject=");
1102     pchars(eptr, 16, TRUE, md);
1103     printf("\n");
1104 #endif
1105
1106     if (offset < md->offset_max)
1107       {
1108       matched_once = FALSE;
1109       code_offset = (int)(ecode - md->start_code);
1110
1111       save_offset1 = md->offset_vector[offset];
1112       save_offset2 = md->offset_vector[offset+1];
1113       save_offset3 = md->offset_vector[md->offset_end - number];
1114       save_capture_last = md->capture_last;
1115
1116       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1117
1118       /* Each time round the loop, save the current subject position for use
1119       when the group matches. For MATCH_MATCH, the group has matched, so we
1120       restart it with a new subject starting position, remembering that we had
1121       at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
1122       usual. If we haven't matched any alternatives in any iteration, check to
1123       see if a previous iteration matched. If so, the group has matched;
1124       continue from afterwards. Otherwise it has failed; restore the previous
1125       capture values before returning NOMATCH. */
1126
1127       for (;;)
1128         {
1129         md->offset_vector[md->offset_end - number] =
1130           (int)(eptr - md->start_subject);
1131         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1132         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1133           eptrb, RM63);
1134         if (rrc == MATCH_KETRPOS)
1135           {
1136           offset_top = md->end_offset_top;
1137           eptr = md->end_match_ptr;
1138           ecode = md->start_code + code_offset;
1139           save_capture_last = md->capture_last;
1140           matched_once = TRUE;
1141           continue;
1142           }
1143
1144         /* See comment in the code for capturing groups above about handling
1145         THEN. */
1146
1147         if (rrc == MATCH_THEN)
1148           {
1149           next = ecode + GET(ecode,1);
1150           if (md->start_match_ptr < next &&
1151               (*ecode == OP_ALT || *next == OP_ALT))
1152             rrc = MATCH_NOMATCH;
1153           }
1154
1155         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1156         md->capture_last = save_capture_last;
1157         ecode += GET(ecode, 1);
1158         if (*ecode != OP_ALT) break;
1159         }
1160
1161       if (!matched_once)
1162         {
1163         md->offset_vector[offset] = save_offset1;
1164         md->offset_vector[offset+1] = save_offset2;
1165         md->offset_vector[md->offset_end - number] = save_offset3;
1166         }
1167
1168       if (allow_zero || matched_once)
1169         {
1170         ecode += 1 + LINK_SIZE;
1171         break;
1172         }
1173
1174       RRETURN(MATCH_NOMATCH);
1175       }
1176
1177     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1178     as a non-capturing bracket. */
1179
1180     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1181     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1182
1183     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1184
1185     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1186     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1187
1188     /* Non-capturing possessive bracket with unlimited repeat. We come here
1189     from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1190     without the capturing complication. It is written out separately for speed
1191     and cleanliness. */
1192
1193     case OP_BRAPOS:
1194     case OP_SBRAPOS:
1195     allow_zero = FALSE;
1196
1197     POSSESSIVE_NON_CAPTURE:
1198     matched_once = FALSE;
1199     code_offset = (int)(ecode - md->start_code);
1200
1201     for (;;)
1202       {
1203       if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1204       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1205         eptrb, RM48);
1206       if (rrc == MATCH_KETRPOS)
1207         {
1208         offset_top = md->end_offset_top;
1209         eptr = md->end_match_ptr;
1210         ecode = md->start_code + code_offset;
1211         matched_once = TRUE;
1212         continue;
1213         }
1214
1215       /* See comment in the code for capturing groups above about handling
1216       THEN. */
1217
1218       if (rrc == MATCH_THEN)
1219         {
1220         next = ecode + GET(ecode,1);
1221         if (md->start_match_ptr < next &&
1222             (*ecode == OP_ALT || *next == OP_ALT))
1223           rrc = MATCH_NOMATCH;
1224         }
1225
1226       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1227       ecode += GET(ecode, 1);
1228       if (*ecode != OP_ALT) break;
1229       }
1230
1231     if (matched_once || allow_zero)
1232       {
1233       ecode += 1 + LINK_SIZE;
1234       break;
1235       }
1236     RRETURN(MATCH_NOMATCH);
1237
1238     /* Control never reaches here. */
1239
1240     /* Conditional group: compilation checked that there are no more than
1241     two branches. If the condition is false, skipping the first branch takes us
1242     past the end if there is only one branch, but that's OK because that is
1243     exactly what going to the ket would do. */
1244
1245     case OP_COND:
1246     case OP_SCOND:
1247     codelink = GET(ecode, 1);
1248
1249     /* Because of the way auto-callout works during compile, a callout item is
1250     inserted between OP_COND and an assertion condition. */
1251
1252     if (ecode[LINK_SIZE+1] == OP_CALLOUT)
1253       {
1254       if (PUBL(callout) != NULL)
1255         {
1256         PUBL(callout_block) cb;
1257         cb.version          = 2;   /* Version 1 of the callout block */
1258         cb.callout_number   = ecode[LINK_SIZE+2];
1259         cb.offset_vector    = md->offset_vector;
1260 #ifdef COMPILE_PCRE8
1261         cb.subject          = (PCRE_SPTR)md->start_subject;
1262 #else
1263         cb.subject          = (PCRE_SPTR16)md->start_subject;
1264 #endif
1265         cb.subject_length   = (int)(md->end_subject - md->start_subject);
1266         cb.start_match      = (int)(mstart - md->start_subject);
1267         cb.current_position = (int)(eptr - md->start_subject);
1268         cb.pattern_position = GET(ecode, LINK_SIZE + 3);
1269         cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
1270         cb.capture_top      = offset_top/2;
1271         cb.capture_last     = md->capture_last;
1272         cb.callout_data     = md->callout_data;
1273         cb.mark             = md->nomatch_mark;
1274         if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1275         if (rrc < 0) RRETURN(rrc);
1276         }
1277       ecode += PRIV(OP_lengths)[OP_CALLOUT];
1278       }
1279
1280     condcode = ecode[LINK_SIZE+1];
1281
1282     /* Now see what the actual condition is */
1283
1284     if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
1285       {
1286       if (md->recursive == NULL)                /* Not recursing => FALSE */
1287         {
1288         condition = FALSE;
1289         ecode += GET(ecode, 1);
1290         }
1291       else
1292         {
1293         int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
1294         condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1295
1296         /* If the test is for recursion into a specific subpattern, and it is
1297         false, but the test was set up by name, scan the table to see if the
1298         name refers to any other numbers, and test them. The condition is true
1299         if any one is set. */
1300
1301         if (!condition && condcode == OP_NRREF)
1302           {
1303           pcre_uchar *slotA = md->name_table;
1304           for (i = 0; i < md->name_count; i++)
1305             {
1306             if (GET2(slotA, 0) == recno) break;
1307             slotA += md->name_entry_size;
1308             }
1309
1310           /* Found a name for the number - there can be only one; duplicate
1311           names for different numbers are allowed, but not vice versa. First
1312           scan down for duplicates. */
1313
1314           if (i < md->name_count)
1315             {
1316             pcre_uchar *slotB = slotA;
1317             while (slotB > md->name_table)
1318               {
1319               slotB -= md->name_entry_size;
1320               if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1321                 {
1322                 condition = GET2(slotB, 0) == md->recursive->group_num;
1323                 if (condition) break;
1324                 }
1325               else break;
1326               }
1327
1328             /* Scan up for duplicates */
1329
1330             if (!condition)
1331               {
1332               slotB = slotA;
1333               for (i++; i < md->name_count; i++)
1334                 {
1335                 slotB += md->name_entry_size;
1336                 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1337                   {
1338                   condition = GET2(slotB, 0) == md->recursive->group_num;
1339                   if (condition) break;
1340                   }
1341                 else break;
1342                 }
1343               }
1344             }
1345           }
1346
1347         /* Chose branch according to the condition */
1348
1349         ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1350         }
1351       }
1352
1353     else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
1354       {
1355       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
1356       condition = offset < offset_top && md->offset_vector[offset] >= 0;
1357
1358       /* If the numbered capture is unset, but the reference was by name,
1359       scan the table to see if the name refers to any other numbers, and test
1360       them. The condition is true if any one is set. This is tediously similar
1361       to the code above, but not close enough to try to amalgamate. */
1362
1363       if (!condition && condcode == OP_NCREF)
1364         {
1365         int refno = offset >> 1;
1366         pcre_uchar *slotA = md->name_table;
1367
1368         for (i = 0; i < md->name_count; i++)
1369           {
1370           if (GET2(slotA, 0) == refno) break;
1371           slotA += md->name_entry_size;
1372           }
1373
1374         /* Found a name for the number - there can be only one; duplicate names
1375         for different numbers are allowed, but not vice versa. First scan down
1376         for duplicates. */
1377
1378         if (i < md->name_count)
1379           {
1380           pcre_uchar *slotB = slotA;
1381           while (slotB > md->name_table)
1382             {
1383             slotB -= md->name_entry_size;
1384             if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1385               {
1386               offset = GET2(slotB, 0) << 1;
1387               condition = offset < offset_top &&
1388                 md->offset_vector[offset] >= 0;
1389               if (condition) break;
1390               }
1391             else break;
1392             }
1393
1394           /* Scan up for duplicates */
1395
1396           if (!condition)
1397             {
1398             slotB = slotA;
1399             for (i++; i < md->name_count; i++)
1400               {
1401               slotB += md->name_entry_size;
1402               if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1403                 {
1404                 offset = GET2(slotB, 0) << 1;
1405                 condition = offset < offset_top &&
1406                   md->offset_vector[offset] >= 0;
1407                 if (condition) break;
1408                 }
1409               else break;
1410               }
1411             }
1412           }
1413         }
1414
1415       /* Chose branch according to the condition */
1416
1417       ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1418       }
1419
1420     else if (condcode == OP_DEF)     /* DEFINE - always false */
1421       {
1422       condition = FALSE;
1423       ecode += GET(ecode, 1);
1424       }
1425
1426     /* The condition is an assertion. Call match() to evaluate it - setting
1427     md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of
1428     an assertion. */
1429
1430     else
1431       {
1432       md->match_function_type = MATCH_CONDASSERT;
1433       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
1434       if (rrc == MATCH_MATCH)
1435         {
1436         if (md->end_offset_top > offset_top)
1437           offset_top = md->end_offset_top;  /* Captures may have happened */
1438         condition = TRUE;
1439         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1440         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1441         }
1442
1443       /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1444       assertion; it is therefore treated as NOMATCH. */
1445
1446       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1447         {
1448         RRETURN(rrc);         /* Need braces because of following else */
1449         }
1450       else
1451         {
1452         condition = FALSE;
1453         ecode += codelink;
1454         }
1455       }
1456
1457     /* We are now at the branch that is to be obeyed. As there is only one, can
1458     use tail recursion to avoid using another stack frame, except when there is
1459     unlimited repeat of a possibly empty group. In the latter case, a recursive
1460     call to match() is always required, unless the second alternative doesn't
1461     exist, in which case we can just plough on. Note that, for compatibility
1462     with Perl, the | in a conditional group is NOT treated as creating two
1463     alternatives. If a THEN is encountered in the branch, it propagates out to
1464     the enclosing alternative (unless nested in a deeper set of alternatives,
1465     of course). */
1466
1467     if (condition || *ecode == OP_ALT)
1468       {
1469       if (op != OP_SCOND)
1470         {
1471         ecode += 1 + LINK_SIZE;
1472         goto TAIL_RECURSE;
1473         }
1474
1475       md->match_function_type = MATCH_CBEGROUP;
1476       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
1477       RRETURN(rrc);
1478       }
1479
1480      /* Condition false & no alternative; continue after the group. */
1481
1482     else
1483       {
1484       ecode += 1 + LINK_SIZE;
1485       }
1486     break;
1487
1488
1489     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1490     to close any currently open capturing brackets. */
1491
1492     case OP_CLOSE:
1493     number = GET2(ecode, 1);
1494     offset = number << 1;
1495
1496 #ifdef PCRE_DEBUG
1497       printf("end bracket %d at *ACCEPT", number);
1498       printf("\n");
1499 #endif
1500
1501     md->capture_last = number;
1502     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1503       {
1504       md->offset_vector[offset] =
1505         md->offset_vector[md->offset_end - number];
1506       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1507       if (offset_top <= offset) offset_top = offset + 2;
1508       }
1509     ecode += 1 + IMM2_SIZE;
1510     break;
1511
1512
1513     /* End of the pattern, either real or forced. */
1514
1515     case OP_END:
1516     case OP_ACCEPT:
1517     case OP_ASSERT_ACCEPT:
1518
1519     /* If we have matched an empty string, fail if not in an assertion and not
1520     in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1521     is set and we have matched at the start of the subject. In both cases,
1522     backtracking will then try other alternatives, if any. */
1523
1524     if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1525          md->recursive == NULL &&
1526          (md->notempty ||
1527            (md->notempty_atstart &&
1528              mstart == md->start_subject + md->start_offset)))
1529       RRETURN(MATCH_NOMATCH);
1530
1531     /* Otherwise, we have a match. */
1532
1533     md->end_match_ptr = eptr;           /* Record where we ended */
1534     md->end_offset_top = offset_top;    /* and how many extracts were taken */
1535     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1536
1537     /* For some reason, the macros don't work properly if an expression is
1538     given as the argument to RRETURN when the heap is in use. */
1539
1540     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1541     RRETURN(rrc);
1542
1543     /* Assertion brackets. Check the alternative branches in turn - the
1544     matching won't pass the KET for an assertion. If any one branch matches,
1545     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1546     start of each branch to move the current point backwards, so the code at
1547     this level is identical to the lookahead case. When the assertion is part
1548     of a condition, we want to return immediately afterwards. The caller of
1549     this incarnation of the match() function will have set MATCH_CONDASSERT in
1550     md->match_function type, and one of these opcodes will be the first opcode
1551     that is processed. We use a local variable that is preserved over calls to
1552     match() to remember this case. */
1553
1554     case OP_ASSERT:
1555     case OP_ASSERTBACK:
1556     save_mark = md->mark;
1557     if (md->match_function_type == MATCH_CONDASSERT)
1558       {
1559       condassert = TRUE;
1560       md->match_function_type = 0;
1561       }
1562     else condassert = FALSE;
1563
1564     do
1565       {
1566       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1567       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1568         {
1569         mstart = md->start_match_ptr;   /* In case \K reset it */
1570         break;
1571         }
1572       md->mark = save_mark;
1573
1574       /* A COMMIT failure must fail the entire assertion, without trying any
1575       subsequent branches. */
1576
1577       if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH);
1578
1579       /* PCRE does not allow THEN to escape beyond an assertion; it
1580       is treated as NOMATCH. */
1581
1582       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1583       ecode += GET(ecode, 1);
1584       }
1585     while (*ecode == OP_ALT);
1586
1587     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1588
1589     /* If checking an assertion for a condition, return MATCH_MATCH. */
1590
1591     if (condassert) RRETURN(MATCH_MATCH);
1592
1593     /* Continue from after the assertion, updating the offsets high water
1594     mark, since extracts may have been taken during the assertion. */
1595
1596     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1597     ecode += 1 + LINK_SIZE;
1598     offset_top = md->end_offset_top;
1599     continue;
1600
1601     /* Negative assertion: all branches must fail to match. Encountering SKIP,
1602     PRUNE, or COMMIT means we must assume failure without checking subsequent
1603     branches. */
1604
1605     case OP_ASSERT_NOT:
1606     case OP_ASSERTBACK_NOT:
1607     save_mark = md->mark;
1608     if (md->match_function_type == MATCH_CONDASSERT)
1609       {
1610       condassert = TRUE;
1611       md->match_function_type = 0;
1612       }
1613     else condassert = FALSE;
1614
1615     do
1616       {
1617       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1618       md->mark = save_mark;
1619       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1620       if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1621         {
1622         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1623         break;
1624         }
1625
1626       /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1627       as NOMATCH. */
1628
1629       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1630       ecode += GET(ecode,1);
1631       }
1632     while (*ecode == OP_ALT);
1633
1634     if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1635
1636     ecode += 1 + LINK_SIZE;
1637     continue;
1638
1639     /* Move the subject pointer back. This occurs only at the start of
1640     each branch of a lookbehind assertion. If we are too close to the start to
1641     move back, this match function fails. When working with UTF-8 we move
1642     back a number of characters, not bytes. */
1643
1644     case OP_REVERSE:
1645 #ifdef SUPPORT_UTF
1646     if (utf)
1647       {
1648       i = GET(ecode, 1);
1649       while (i-- > 0)
1650         {
1651         eptr--;
1652         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1653         BACKCHAR(eptr);
1654         }
1655       }
1656     else
1657 #endif
1658
1659     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1660
1661       {
1662       eptr -= GET(ecode, 1);
1663       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1664       }
1665
1666     /* Save the earliest consulted character, then skip to next op code */
1667
1668     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1669     ecode += 1 + LINK_SIZE;
1670     break;
1671
1672     /* The callout item calls an external function, if one is provided, passing
1673     details of the match so far. This is mainly for debugging, though the
1674     function is able to force a failure. */
1675
1676     case OP_CALLOUT:
1677     if (PUBL(callout) != NULL)
1678       {
1679       PUBL(callout_block) cb;
1680       cb.version          = 2;   /* Version 1 of the callout block */
1681       cb.callout_number   = ecode[1];
1682       cb.offset_vector    = md->offset_vector;
1683 #ifdef COMPILE_PCRE8
1684       cb.subject          = (PCRE_SPTR)md->start_subject;
1685 #else
1686       cb.subject          = (PCRE_SPTR16)md->start_subject;
1687 #endif
1688       cb.subject_length   = (int)(md->end_subject - md->start_subject);
1689       cb.start_match      = (int)(mstart - md->start_subject);
1690       cb.current_position = (int)(eptr - md->start_subject);
1691       cb.pattern_position = GET(ecode, 2);
1692       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1693       cb.capture_top      = offset_top/2;
1694       cb.capture_last     = md->capture_last;
1695       cb.callout_data     = md->callout_data;
1696       cb.mark             = md->nomatch_mark;
1697       if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1698       if (rrc < 0) RRETURN(rrc);
1699       }
1700     ecode += 2 + 2*LINK_SIZE;
1701     break;
1702
1703     /* Recursion either matches the current regex, or some subexpression. The
1704     offset data is the offset to the starting bracket from the start of the
1705     whole pattern. (This is so that it works from duplicated subpatterns.)
1706
1707     The state of the capturing groups is preserved over recursion, and
1708     re-instated afterwards. We don't know how many are started and not yet
1709     finished (offset_top records the completed total) so we just have to save
1710     all the potential data. There may be up to 65535 such values, which is too
1711     large to put on the stack, but using malloc for small numbers seems
1712     expensive. As a compromise, the stack is used when there are no more than
1713     REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
1714
1715     There are also other values that have to be saved. We use a chained
1716     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1717     for the original version of this logic. It has, however, been hacked around
1718     a lot, so he is not to blame for the current way it works. */
1719
1720     case OP_RECURSE:
1721       {
1722       recursion_info *ri;
1723       int recno;
1724
1725       callpat = md->start_code + GET(ecode, 1);
1726       recno = (callpat == md->start_code)? 0 :
1727         GET2(callpat, 1 + LINK_SIZE);
1728
1729       /* Check for repeating a recursion without advancing the subject pointer.
1730       This should catch convoluted mutual recursions. (Some simple cases are
1731       caught at compile time.) */
1732
1733       for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1734         if (recno == ri->group_num && eptr == ri->subject_position)
1735           RRETURN(PCRE_ERROR_RECURSELOOP);
1736
1737       /* Add to "recursing stack" */
1738
1739       new_recursive.group_num = recno;
1740       new_recursive.subject_position = eptr;
1741       new_recursive.prevrec = md->recursive;
1742       md->recursive = &new_recursive;
1743
1744       /* Where to continue from afterwards */
1745
1746       ecode += 1 + LINK_SIZE;
1747
1748       /* Now save the offset data */
1749
1750       new_recursive.saved_max = md->offset_end;
1751       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1752         new_recursive.offset_save = stacksave;
1753       else
1754         {
1755         new_recursive.offset_save =
1756           (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1757         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1758         }
1759       memcpy(new_recursive.offset_save, md->offset_vector,
1760             new_recursive.saved_max * sizeof(int));
1761
1762       /* OK, now we can do the recursion. After processing each alternative,
1763       restore the offset data. If there were nested recursions, md->recursive
1764       might be changed, so reset it before looping. */
1765
1766       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1767       cbegroup = (*callpat >= OP_SBRA);
1768       do
1769         {
1770         if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1771         RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1772           md, eptrb, RM6);
1773         memcpy(md->offset_vector, new_recursive.offset_save,
1774             new_recursive.saved_max * sizeof(int));
1775         md->recursive = new_recursive.prevrec;
1776         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1777           {
1778           DPRINTF(("Recursion matched\n"));
1779           if (new_recursive.offset_save != stacksave)
1780             (PUBL(free))(new_recursive.offset_save);
1781
1782           /* Set where we got to in the subject, and reset the start in case
1783           it was changed by \K. This *is* propagated back out of a recursion,
1784           for Perl compatibility. */
1785
1786           eptr = md->end_match_ptr;
1787           mstart = md->start_match_ptr;
1788           goto RECURSION_MATCHED;        /* Exit loop; end processing */
1789           }
1790
1791         /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it
1792         is treated as NOMATCH. */
1793
1794         else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&
1795                  rrc != MATCH_COMMIT)
1796           {
1797           DPRINTF(("Recursion gave error %d\n", rrc));
1798           if (new_recursive.offset_save != stacksave)
1799             (PUBL(free))(new_recursive.offset_save);
1800           RRETURN(rrc);
1801           }
1802
1803         md->recursive = &new_recursive;
1804         callpat += GET(callpat, 1);
1805         }
1806       while (*callpat == OP_ALT);
1807
1808       DPRINTF(("Recursion didn't match\n"));
1809       md->recursive = new_recursive.prevrec;
1810       if (new_recursive.offset_save != stacksave)
1811         (PUBL(free))(new_recursive.offset_save);
1812       RRETURN(MATCH_NOMATCH);
1813       }
1814
1815     RECURSION_MATCHED:
1816     break;
1817
1818     /* An alternation is the end of a branch; scan along to find the end of the
1819     bracketed group and go to there. */
1820
1821     case OP_ALT:
1822     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1823     break;
1824
1825     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1826     indicating that it may occur zero times. It may repeat infinitely, or not
1827     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1828     with fixed upper repeat limits are compiled as a number of copies, with the
1829     optional ones preceded by BRAZERO or BRAMINZERO. */
1830
1831     case OP_BRAZERO:
1832     next = ecode + 1;
1833     RMATCH(eptr, next, offset_top, md, eptrb, RM10);
1834     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1835     do next += GET(next, 1); while (*next == OP_ALT);
1836     ecode = next + 1 + LINK_SIZE;
1837     break;
1838
1839     case OP_BRAMINZERO:
1840     next = ecode + 1;
1841     do next += GET(next, 1); while (*next == OP_ALT);
1842     RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
1843     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1844     ecode++;
1845     break;
1846
1847     case OP_SKIPZERO:
1848     next = ecode+1;
1849     do next += GET(next,1); while (*next == OP_ALT);
1850     ecode = next + 1 + LINK_SIZE;
1851     break;
1852
1853     /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
1854     here; just jump to the group, with allow_zero set TRUE. */
1855
1856     case OP_BRAPOSZERO:
1857     op = *(++ecode);
1858     allow_zero = TRUE;
1859     if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
1860       goto POSSESSIVE_NON_CAPTURE;
1861
1862     /* End of a group, repeated or non-repeating. */
1863
1864     case OP_KET:
1865     case OP_KETRMIN:
1866     case OP_KETRMAX:
1867     case OP_KETRPOS:
1868     prev = ecode - GET(ecode, 1);
1869
1870     /* If this was a group that remembered the subject start, in order to break
1871     infinite repeats of empty string matches, retrieve the subject start from
1872     the chain. Otherwise, set it NULL. */
1873
1874     if (*prev >= OP_SBRA || *prev == OP_ONCE)
1875       {
1876       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1877       eptrb = eptrb->epb_prev;              /* Backup to previous group */
1878       }
1879     else saved_eptr = NULL;
1880
1881     /* If we are at the end of an assertion group or a non-capturing atomic
1882     group, stop matching and return MATCH_MATCH, but record the current high
1883     water mark for use by positive assertions. We also need to record the match
1884     start in case it was changed by \K. */
1885
1886     if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1887          *prev == OP_ONCE_NC)
1888       {
1889       md->end_match_ptr = eptr;      /* For ONCE_NC */
1890       md->end_offset_top = offset_top;
1891       md->start_match_ptr = mstart;
1892       RRETURN(MATCH_MATCH);         /* Sets md->mark */
1893       }
1894
1895     /* For capturing groups we have to check the group number back at the start
1896     and if necessary complete handling an extraction by setting the offsets and
1897     bumping the high water mark. Whole-pattern recursion is coded as a recurse
1898     into group 0, so it won't be picked up here. Instead, we catch it when the
1899     OP_END is reached. Other recursion is handled here. We just have to record
1900     the current subject position and start match pointer and give a MATCH
1901     return. */
1902
1903     if (*prev == OP_CBRA || *prev == OP_SCBRA ||
1904         *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
1905       {
1906       number = GET2(prev, 1+LINK_SIZE);
1907       offset = number << 1;
1908
1909 #ifdef PCRE_DEBUG
1910       printf("end bracket %d", number);
1911       printf("\n");
1912 #endif
1913
1914       /* Handle a recursively called group. */
1915
1916       if (md->recursive != NULL && md->recursive->group_num == number)
1917         {
1918         md->end_match_ptr = eptr;
1919         md->start_match_ptr = mstart;
1920         RRETURN(MATCH_MATCH);
1921         }
1922
1923       /* Deal with capturing */
1924
1925       md->capture_last = number;
1926       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1927         {
1928         /* If offset is greater than offset_top, it means that we are
1929         "skipping" a capturing group, and that group's offsets must be marked
1930         unset. In earlier versions of PCRE, all the offsets were unset at the
1931         start of matching, but this doesn't work because atomic groups and
1932         assertions can cause a value to be set that should later be unset.
1933         Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
1934         part of the atomic group, but this is not on the final matching path,
1935         so must be unset when 2 is set. (If there is no group 2, there is no
1936         problem, because offset_top will then be 2, indicating no capture.) */
1937
1938         if (offset > offset_top)
1939           {
1940           int *iptr = md->offset_vector + offset_top;
1941           int *iend = md->offset_vector + offset;
1942           while (iptr < iend) *iptr++ = -1;
1943           }
1944
1945         /* Now make the extraction */
1946
1947         md->offset_vector[offset] =
1948           md->offset_vector[md->offset_end - number];
1949         md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1950         if (offset_top <= offset) offset_top = offset + 2;
1951         }
1952       }
1953
1954     /* For an ordinary non-repeating ket, just continue at this level. This
1955     also happens for a repeating ket if no characters were matched in the
1956     group. This is the forcible breaking of infinite loops as implemented in
1957     Perl 5.005. For a non-repeating atomic group that includes captures,
1958     establish a backup point by processing the rest of the pattern at a lower
1959     level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
1960     original OP_ONCE level, thereby bypassing intermediate backup points, but
1961     resetting any captures that happened along the way. */
1962
1963     if (*ecode == OP_KET || eptr == saved_eptr)
1964       {
1965       if (*prev == OP_ONCE)
1966         {
1967         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
1968         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1969         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
1970         RRETURN(MATCH_ONCE);
1971         }
1972       ecode += 1 + LINK_SIZE;    /* Carry on at this level */
1973       break;
1974       }
1975
1976     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
1977     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
1978     at a time from the outer level, thus saving stack. */
1979
1980     if (*ecode == OP_KETRPOS)
1981       {
1982       md->end_match_ptr = eptr;
1983       md->end_offset_top = offset_top;
1984       RRETURN(MATCH_KETRPOS);
1985       }
1986
1987     /* The normal repeating kets try the rest of the pattern or restart from
1988     the preceding bracket, in the appropriate order. In the second case, we can
1989     use tail recursion to avoid using another stack frame, unless we have an
1990     an atomic group or an unlimited repeat of a group that can match an empty
1991     string. */
1992
1993     if (*ecode == OP_KETRMIN)
1994       {
1995       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
1996       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1997       if (*prev == OP_ONCE)
1998         {
1999         RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
2000         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2001         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
2002         RRETURN(MATCH_ONCE);
2003         }
2004       if (*prev >= OP_SBRA)    /* Could match an empty string */
2005         {
2006         RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
2007         RRETURN(rrc);
2008         }
2009       ecode = prev;
2010       goto TAIL_RECURSE;
2011       }
2012     else  /* OP_KETRMAX */
2013       {
2014       RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
2015       if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
2016       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2017       if (*prev == OP_ONCE)
2018         {
2019         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
2020         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2021         md->once_target = prev;
2022         RRETURN(MATCH_ONCE);
2023         }
2024       ecode += 1 + LINK_SIZE;
2025       goto TAIL_RECURSE;
2026       }
2027     /* Control never gets here */
2028
2029     /* Not multiline mode: start of subject assertion, unless notbol. */
2030
2031     case OP_CIRC:
2032     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2033
2034     /* Start of subject assertion */
2035
2036     case OP_SOD:
2037     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
2038     ecode++;
2039     break;
2040
2041     /* Multiline mode: start of subject unless notbol, or after any newline. */
2042
2043     case OP_CIRCM:
2044     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2045     if (eptr != md->start_subject &&
2046         (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
2047       RRETURN(MATCH_NOMATCH);
2048     ecode++;
2049     break;
2050
2051     /* Start of match assertion */
2052
2053     case OP_SOM:
2054     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
2055     ecode++;
2056     break;
2057
2058     /* Reset the start of match point */
2059
2060     case OP_SET_SOM:
2061     mstart = eptr;
2062     ecode++;
2063     break;
2064
2065     /* Multiline mode: assert before any newline, or before end of subject
2066     unless noteol is set. */
2067
2068     case OP_DOLLM:
2069     if (eptr < md->end_subject)
2070       {
2071       if (!IS_NEWLINE(eptr))
2072         {
2073         if (md->partial != 0 &&
2074             eptr + 1 >= md->end_subject &&
2075             NLBLOCK->nltype == NLTYPE_FIXED &&
2076             NLBLOCK->nllen == 2 &&
2077             *eptr == NLBLOCK->nl[0])
2078           {
2079           md->hitend = TRUE;
2080           if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2081           }
2082         RRETURN(MATCH_NOMATCH);
2083         }
2084       }
2085     else
2086       {
2087       if (md->noteol) RRETURN(MATCH_NOMATCH);
2088       SCHECK_PARTIAL();
2089       }
2090     ecode++;
2091     break;
2092
2093     /* Not multiline mode: assert before a terminating newline or before end of
2094     subject unless noteol is set. */
2095
2096     case OP_DOLL:
2097     if (md->noteol) RRETURN(MATCH_NOMATCH);
2098     if (!md->endonly) goto ASSERT_NL_OR_EOS;
2099
2100     /* ... else fall through for endonly */
2101
2102     /* End of subject assertion (\z) */
2103
2104     case OP_EOD:
2105     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
2106     SCHECK_PARTIAL();
2107     ecode++;
2108     break;
2109
2110     /* End of subject or ending \n assertion (\Z) */
2111
2112     case OP_EODN:
2113     ASSERT_NL_OR_EOS:
2114     if (eptr < md->end_subject &&
2115         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2116       {
2117       if (md->partial != 0 &&
2118           eptr + 1 >= md->end_subject &&
2119           NLBLOCK->nltype == NLTYPE_FIXED &&
2120           NLBLOCK->nllen == 2 &&
2121           *eptr == NLBLOCK->nl[0])
2122         {
2123         md->hitend = TRUE;
2124         if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2125         }
2126       RRETURN(MATCH_NOMATCH);
2127       }
2128
2129     /* Either at end of string or \n before end. */
2130
2131     SCHECK_PARTIAL();
2132     ecode++;
2133     break;
2134
2135     /* Word boundary assertions */
2136
2137     case OP_NOT_WORD_BOUNDARY:
2138     case OP_WORD_BOUNDARY:
2139       {
2140
2141       /* Find out if the previous and current characters are "word" characters.
2142       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
2143       be "non-word" characters. Remember the earliest consulted character for
2144       partial matching. */
2145
2146 #ifdef SUPPORT_UTF
2147       if (utf)
2148         {
2149         /* Get status of previous character */
2150
2151         if (eptr == md->start_subject) prev_is_word = FALSE; else
2152           {
2153           PCRE_PUCHAR lastptr = eptr - 1;
2154           BACKCHAR(lastptr);
2155           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2156           GETCHAR(c, lastptr);
2157 #ifdef SUPPORT_UCP
2158           if (md->use_ucp)
2159             {
2160             if (c == '_') prev_is_word = TRUE; else
2161               {
2162               int cat = UCD_CATEGORY(c);
2163               prev_is_word = (cat == ucp_L || cat == ucp_N);
2164               }
2165             }
2166           else
2167 #endif
2168           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2169           }
2170
2171         /* Get status of next character */
2172
2173         if (eptr >= md->end_subject)
2174           {
2175           SCHECK_PARTIAL();
2176           cur_is_word = FALSE;
2177           }
2178         else
2179           {
2180           GETCHAR(c, eptr);
2181 #ifdef SUPPORT_UCP
2182           if (md->use_ucp)
2183             {
2184             if (c == '_') cur_is_word = TRUE; else
2185               {
2186               int cat = UCD_CATEGORY(c);
2187               cur_is_word = (cat == ucp_L || cat == ucp_N);
2188               }
2189             }
2190           else
2191 #endif
2192           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2193           }
2194         }
2195       else
2196 #endif
2197
2198       /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
2199       consistency with the behaviour of \w we do use it in this case. */
2200
2201         {
2202         /* Get status of previous character */
2203
2204         if (eptr == md->start_subject) prev_is_word = FALSE; else
2205           {
2206           if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
2207 #ifdef SUPPORT_UCP
2208           if (md->use_ucp)
2209             {
2210             c = eptr[-1];
2211             if (c == '_') prev_is_word = TRUE; else
2212               {
2213               int cat = UCD_CATEGORY(c);
2214               prev_is_word = (cat == ucp_L || cat == ucp_N);
2215               }
2216             }
2217           else
2218 #endif
2219           prev_is_word = MAX_255(eptr[-1])
2220             && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2221           }
2222
2223         /* Get status of next character */
2224
2225         if (eptr >= md->end_subject)
2226           {
2227           SCHECK_PARTIAL();
2228           cur_is_word = FALSE;
2229           }
2230         else
2231 #ifdef SUPPORT_UCP
2232         if (md->use_ucp)
2233           {
2234           c = *eptr;
2235           if (c == '_') cur_is_word = TRUE; else
2236             {
2237             int cat = UCD_CATEGORY(c);
2238             cur_is_word = (cat == ucp_L || cat == ucp_N);
2239             }
2240           }
2241         else
2242 #endif
2243         cur_is_word = MAX_255(*eptr)
2244           && ((md->ctypes[*eptr] & ctype_word) != 0);
2245         }
2246
2247       /* Now see if the situation is what we want */
2248
2249       if ((*ecode++ == OP_WORD_BOUNDARY)?
2250            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2251         RRETURN(MATCH_NOMATCH);
2252       }
2253     break;
2254
2255     /* Match any single character type except newline; have to take care with
2256     CRLF newlines and partial matching. */
2257
2258     case OP_ANY:
2259     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2260     if (md->partial != 0 &&
2261         eptr + 1 >= md->end_subject &&
2262         NLBLOCK->nltype == NLTYPE_FIXED &&
2263         NLBLOCK->nllen == 2 &&
2264         *eptr == NLBLOCK->nl[0])
2265       {
2266       md->hitend = TRUE;
2267       if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2268       }
2269
2270     /* Fall through */
2271
2272     /* Match any single character whatsoever. */
2273
2274     case OP_ALLANY:
2275     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2276       {                            /* not be updated before SCHECK_PARTIAL. */
2277       SCHECK_PARTIAL();
2278       RRETURN(MATCH_NOMATCH);
2279       }
2280     eptr++;
2281 #ifdef SUPPORT_UTF
2282     if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2283 #endif
2284     ecode++;
2285     break;
2286
2287     /* Match a single byte, even in UTF-8 mode. This opcode really does match
2288     any byte, even newline, independent of the setting of PCRE_DOTALL. */
2289
2290     case OP_ANYBYTE:
2291     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2292       {                            /* not be updated before SCHECK_PARTIAL. */
2293       SCHECK_PARTIAL();
2294       RRETURN(MATCH_NOMATCH);
2295       }
2296     eptr++;
2297     ecode++;
2298     break;
2299
2300     case OP_NOT_DIGIT:
2301     if (eptr >= md->end_subject)
2302       {
2303       SCHECK_PARTIAL();
2304       RRETURN(MATCH_NOMATCH);
2305       }
2306     GETCHARINCTEST(c, eptr);
2307     if (
2308 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2309        c < 256 &&
2310 #endif
2311        (md->ctypes[c] & ctype_digit) != 0
2312        )
2313       RRETURN(MATCH_NOMATCH);
2314     ecode++;
2315     break;
2316
2317     case OP_DIGIT:
2318     if (eptr >= md->end_subject)
2319       {
2320       SCHECK_PARTIAL();
2321       RRETURN(MATCH_NOMATCH);
2322       }
2323     GETCHARINCTEST(c, eptr);
2324     if (
2325 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2326        c > 255 ||
2327 #endif
2328        (md->ctypes[c] & ctype_digit) == 0
2329        )
2330       RRETURN(MATCH_NOMATCH);
2331     ecode++;
2332     break;
2333
2334     case OP_NOT_WHITESPACE:
2335     if (eptr >= md->end_subject)
2336       {
2337       SCHECK_PARTIAL();
2338       RRETURN(MATCH_NOMATCH);
2339       }
2340     GETCHARINCTEST(c, eptr);
2341     if (
2342 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2343        c < 256 &&
2344 #endif
2345        (md->ctypes[c] & ctype_space) != 0
2346        )
2347       RRETURN(MATCH_NOMATCH);
2348     ecode++;
2349     break;
2350
2351     case OP_WHITESPACE:
2352     if (eptr >= md->end_subject)
2353       {
2354       SCHECK_PARTIAL();
2355       RRETURN(MATCH_NOMATCH);
2356       }
2357     GETCHARINCTEST(c, eptr);
2358     if (
2359 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2360        c > 255 ||
2361 #endif
2362        (md->ctypes[c] & ctype_space) == 0
2363        )
2364       RRETURN(MATCH_NOMATCH);
2365     ecode++;
2366     break;
2367
2368     case OP_NOT_WORDCHAR:
2369     if (eptr >= md->end_subject)
2370       {
2371       SCHECK_PARTIAL();
2372       RRETURN(MATCH_NOMATCH);
2373       }
2374     GETCHARINCTEST(c, eptr);
2375     if (
2376 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2377        c < 256 &&
2378 #endif
2379        (md->ctypes[c] & ctype_word) != 0
2380        )
2381       RRETURN(MATCH_NOMATCH);
2382     ecode++;
2383     break;
2384
2385     case OP_WORDCHAR:
2386     if (eptr >= md->end_subject)
2387       {
2388       SCHECK_PARTIAL();
2389       RRETURN(MATCH_NOMATCH);
2390       }
2391     GETCHARINCTEST(c, eptr);
2392     if (
2393 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2394        c > 255 ||
2395 #endif
2396        (md->ctypes[c] & ctype_word) == 0
2397        )
2398       RRETURN(MATCH_NOMATCH);
2399     ecode++;
2400     break;
2401
2402     case OP_ANYNL:
2403     if (eptr >= md->end_subject)
2404       {
2405       SCHECK_PARTIAL();
2406       RRETURN(MATCH_NOMATCH);
2407       }
2408     GETCHARINCTEST(c, eptr);
2409     switch(c)
2410       {
2411       default: RRETURN(MATCH_NOMATCH);
2412
2413       case 0x000d:
2414       if (eptr >= md->end_subject)
2415         {
2416         SCHECK_PARTIAL();
2417         }
2418       else if (*eptr == 0x0a) eptr++;
2419       break;
2420
2421       case 0x000a:
2422       break;
2423
2424       case 0x000b:
2425       case 0x000c:
2426       case 0x0085:
2427       case 0x2028:
2428       case 0x2029:
2429       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2430       break;
2431       }
2432     ecode++;
2433     break;
2434
2435     case OP_NOT_HSPACE:
2436     if (eptr >= md->end_subject)
2437       {
2438       SCHECK_PARTIAL();
2439       RRETURN(MATCH_NOMATCH);
2440       }
2441     GETCHARINCTEST(c, eptr);
2442     switch(c)
2443       {
2444       default: break;
2445       case 0x09:      /* HT */
2446       case 0x20:      /* SPACE */
2447       case 0xa0:      /* NBSP */
2448       case 0x1680:    /* OGHAM SPACE MARK */
2449       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2450       case 0x2000:    /* EN QUAD */
2451       case 0x2001:    /* EM QUAD */
2452       case 0x2002:    /* EN SPACE */
2453       case 0x2003:    /* EM SPACE */
2454       case 0x2004:    /* THREE-PER-EM SPACE */
2455       case 0x2005:    /* FOUR-PER-EM SPACE */
2456       case 0x2006:    /* SIX-PER-EM SPACE */
2457       case 0x2007:    /* FIGURE SPACE */
2458       case 0x2008:    /* PUNCTUATION SPACE */
2459       case 0x2009:    /* THIN SPACE */
2460       case 0x200A:    /* HAIR SPACE */
2461       case 0x202f:    /* NARROW NO-BREAK SPACE */
2462       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2463       case 0x3000:    /* IDEOGRAPHIC SPACE */
2464       RRETURN(MATCH_NOMATCH);
2465       }
2466     ecode++;
2467     break;
2468
2469     case OP_HSPACE:
2470     if (eptr >= md->end_subject)
2471       {
2472       SCHECK_PARTIAL();
2473       RRETURN(MATCH_NOMATCH);
2474       }
2475     GETCHARINCTEST(c, eptr);
2476     switch(c)
2477       {
2478       default: RRETURN(MATCH_NOMATCH);
2479       case 0x09:      /* HT */
2480       case 0x20:      /* SPACE */
2481       case 0xa0:      /* NBSP */
2482       case 0x1680:    /* OGHAM SPACE MARK */
2483       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2484       case 0x2000:    /* EN QUAD */
2485       case 0x2001:    /* EM QUAD */
2486       case 0x2002:    /* EN SPACE */
2487       case 0x2003:    /* EM SPACE */
2488       case 0x2004:    /* THREE-PER-EM SPACE */
2489       case 0x2005:    /* FOUR-PER-EM SPACE */
2490       case 0x2006:    /* SIX-PER-EM SPACE */
2491       case 0x2007:    /* FIGURE SPACE */
2492       case 0x2008:    /* PUNCTUATION SPACE */
2493       case 0x2009:    /* THIN SPACE */
2494       case 0x200A:    /* HAIR SPACE */
2495       case 0x202f:    /* NARROW NO-BREAK SPACE */
2496       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2497       case 0x3000:    /* IDEOGRAPHIC SPACE */
2498       break;
2499       }
2500     ecode++;
2501     break;
2502
2503     case OP_NOT_VSPACE:
2504     if (eptr >= md->end_subject)
2505       {
2506       SCHECK_PARTIAL();
2507       RRETURN(MATCH_NOMATCH);
2508       }
2509     GETCHARINCTEST(c, eptr);
2510     switch(c)
2511       {
2512       default: break;
2513       case 0x0a:      /* LF */
2514       case 0x0b:      /* VT */
2515       case 0x0c:      /* FF */
2516       case 0x0d:      /* CR */
2517       case 0x85:      /* NEL */
2518       case 0x2028:    /* LINE SEPARATOR */
2519       case 0x2029:    /* PARAGRAPH SEPARATOR */
2520       RRETURN(MATCH_NOMATCH);
2521       }
2522     ecode++;
2523     break;
2524
2525     case OP_VSPACE:
2526     if (eptr >= md->end_subject)
2527       {
2528       SCHECK_PARTIAL();
2529       RRETURN(MATCH_NOMATCH);
2530       }
2531     GETCHARINCTEST(c, eptr);
2532     switch(c)
2533       {
2534       default: RRETURN(MATCH_NOMATCH);
2535       case 0x0a:      /* LF */
2536       case 0x0b:      /* VT */
2537       case 0x0c:      /* FF */
2538       case 0x0d:      /* CR */
2539       case 0x85:      /* NEL */
2540       case 0x2028:    /* LINE SEPARATOR */
2541       case 0x2029:    /* PARAGRAPH SEPARATOR */
2542       break;
2543       }
2544     ecode++;
2545     break;
2546
2547 #ifdef SUPPORT_UCP
2548     /* Check the next character by Unicode property. We will get here only
2549     if the support is in the binary; otherwise a compile-time error occurs. */
2550
2551     case OP_PROP:
2552     case OP_NOTPROP:
2553     if (eptr >= md->end_subject)
2554       {
2555       SCHECK_PARTIAL();
2556       RRETURN(MATCH_NOMATCH);
2557       }
2558     GETCHARINCTEST(c, eptr);
2559       {
2560       const pcre_uint8 chartype = UCD_CHARTYPE(c);
2561
2562       switch(ecode[1])
2563         {
2564         case PT_ANY:
2565         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2566         break;
2567
2568         case PT_LAMP:
2569         if ((chartype == ucp_Lu ||
2570              chartype == ucp_Ll ||
2571              chartype == ucp_Lt) == (op == OP_NOTPROP))
2572           RRETURN(MATCH_NOMATCH);
2573         break;
2574
2575         case PT_GC:
2576         if ((ecode[2] != PRIV(ucp_gentype)[chartype]) == (op == OP_PROP))
2577           RRETURN(MATCH_NOMATCH);
2578         break;
2579
2580         case PT_PC:
2581         if ((ecode[2] != chartype) == (op == OP_PROP))
2582           RRETURN(MATCH_NOMATCH);
2583         break;
2584
2585         case PT_SC:
2586         if ((ecode[2] != UCD_SCRIPT(c)) == (op == OP_PROP))
2587           RRETURN(MATCH_NOMATCH);
2588         break;
2589
2590         /* These are specials */
2591
2592         case PT_ALNUM:
2593         if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
2594              PRIV(ucp_gentype)[chartype] == ucp_N) == (op == OP_NOTPROP))
2595           RRETURN(MATCH_NOMATCH);
2596         break;
2597
2598         case PT_SPACE:    /* Perl space */
2599         if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
2600              c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2601                == (op == OP_NOTPROP))
2602           RRETURN(MATCH_NOMATCH);
2603         break;
2604
2605         case PT_PXSPACE:  /* POSIX space */
2606         if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
2607              c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2608              c == CHAR_FF || c == CHAR_CR)
2609                == (op == OP_NOTPROP))
2610           RRETURN(MATCH_NOMATCH);
2611         break;
2612
2613         case PT_WORD:
2614         if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
2615              PRIV(ucp_gentype)[chartype] == ucp_N ||
2616              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2617           RRETURN(MATCH_NOMATCH);
2618         break;
2619
2620         /* This should never occur */
2621
2622         default:
2623         RRETURN(PCRE_ERROR_INTERNAL);
2624         }
2625
2626       ecode += 3;
2627       }
2628     break;
2629
2630     /* Match an extended Unicode sequence. We will get here only if the support
2631     is in the binary; otherwise a compile-time error occurs. */
2632
2633     case OP_EXTUNI:
2634     if (eptr >= md->end_subject)
2635       {
2636       SCHECK_PARTIAL();
2637       RRETURN(MATCH_NOMATCH);
2638       }
2639     GETCHARINCTEST(c, eptr);
2640     if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
2641     while (eptr < md->end_subject)
2642       {
2643       int len = 1;
2644       if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2645       if (UCD_CATEGORY(c) != ucp_M) break;
2646       eptr += len;
2647       }
2648     CHECK_PARTIAL();
2649     ecode++;
2650     break;
2651 #endif
2652
2653
2654     /* Match a back reference, possibly repeatedly. Look past the end of the
2655     item to see if there is repeat information following. The code is similar
2656     to that for character classes, but repeated for efficiency. Then obey
2657     similar code to character type repeats - written out again for speed.
2658     However, if the referenced string is the empty string, always treat
2659     it as matched, any number of times (otherwise there could be infinite
2660     loops). */
2661
2662     case OP_REF:
2663     case OP_REFI:
2664     caseless = op == OP_REFI;
2665     offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2666     ecode += 1 + IMM2_SIZE;
2667
2668     /* If the reference is unset, there are two possibilities:
2669
2670     (a) In the default, Perl-compatible state, set the length negative;
2671     this ensures that every attempt at a match fails. We can't just fail
2672     here, because of the possibility of quantifiers with zero minima.
2673
2674     (b) If the JavaScript compatibility flag is set, set the length to zero
2675     so that the back reference matches an empty string.
2676
2677     Otherwise, set the length to the length of what was matched by the
2678     referenced subpattern. */
2679
2680     if (offset >= offset_top || md->offset_vector[offset] < 0)
2681       length = (md->jscript_compat)? 0 : -1;
2682     else
2683       length = md->offset_vector[offset+1] - md->offset_vector[offset];
2684
2685     /* Set up for repetition, or handle the non-repeated case */
2686
2687     switch (*ecode)
2688       {
2689       case OP_CRSTAR:
2690       case OP_CRMINSTAR:
2691       case OP_CRPLUS:
2692       case OP_CRMINPLUS:
2693       case OP_CRQUERY:
2694       case OP_CRMINQUERY:
2695       c = *ecode++ - OP_CRSTAR;
2696       minimize = (c & 1) != 0;
2697       min = rep_min[c];                 /* Pick up values from tables; */
2698       max = rep_max[c];                 /* zero for max => infinity */
2699       if (max == 0) max = INT_MAX;
2700       break;
2701
2702       case OP_CRRANGE:
2703       case OP_CRMINRANGE:
2704       minimize = (*ecode == OP_CRMINRANGE);
2705       min = GET2(ecode, 1);
2706       max = GET2(ecode, 1 + IMM2_SIZE);
2707       if (max == 0) max = INT_MAX;
2708       ecode += 1 + 2 * IMM2_SIZE;
2709       break;
2710
2711       default:               /* No repeat follows */
2712       if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2713         {
2714         if (length == -2) eptr = md->end_subject;   /* Partial match */
2715         CHECK_PARTIAL();
2716         RRETURN(MATCH_NOMATCH);
2717         }
2718       eptr += length;
2719       continue;              /* With the main loop */
2720       }
2721
2722     /* Handle repeated back references. If the length of the reference is
2723     zero, just continue with the main loop. If the length is negative, it
2724     means the reference is unset in non-Java-compatible mode. If the minimum is
2725     zero, we can continue at the same level without recursion. For any other
2726     minimum, carrying on will result in NOMATCH. */
2727
2728     if (length == 0) continue;
2729     if (length < 0 && min == 0) continue;
2730
2731     /* First, ensure the minimum number of matches are present. We get back
2732     the length of the reference string explicitly rather than passing the
2733     address of eptr, so that eptr can be a register variable. */
2734
2735     for (i = 1; i <= min; i++)
2736       {
2737       int slength;
2738       if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2739         {
2740         if (slength == -2) eptr = md->end_subject;   /* Partial match */
2741         CHECK_PARTIAL();
2742         RRETURN(MATCH_NOMATCH);
2743         }
2744       eptr += slength;
2745       }
2746
2747     /* If min = max, continue at the same level without recursion.
2748     They are not both allowed to be zero. */
2749
2750     if (min == max) continue;
2751
2752     /* If minimizing, keep trying and advancing the pointer */
2753
2754     if (minimize)
2755       {
2756       for (fi = min;; fi++)
2757         {
2758         int slength;
2759         RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2760         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2761         if (fi >= max) RRETURN(MATCH_NOMATCH);
2762         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2763           {
2764           if (slength == -2) eptr = md->end_subject;   /* Partial match */
2765           CHECK_PARTIAL();
2766           RRETURN(MATCH_NOMATCH);
2767           }
2768         eptr += slength;
2769         }
2770       /* Control never gets here */
2771       }
2772
2773     /* If maximizing, find the longest string and work backwards */
2774
2775     else
2776       {
2777       pp = eptr;
2778       for (i = min; i < max; i++)
2779         {
2780         int slength;
2781         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2782           {
2783           /* Can't use CHECK_PARTIAL because we don't want to update eptr in
2784           the soft partial matching case. */
2785
2786           if (slength == -2 && md->partial != 0 &&
2787               md->end_subject > md->start_used_ptr)
2788             {
2789             md->hitend = TRUE;
2790             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2791             }
2792           break;
2793           }
2794         eptr += slength;
2795         }
2796
2797       while (eptr >= pp)
2798         {
2799         RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
2800         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2801         eptr -= length;
2802         }
2803       RRETURN(MATCH_NOMATCH);
2804       }
2805     /* Control never gets here */
2806
2807     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2808     used when all the characters in the class have values in the range 0-255,
2809     and either the matching is caseful, or the characters are in the range
2810     0-127 when UTF-8 processing is enabled. The only difference between
2811     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2812     encountered.
2813
2814     First, look past the end of the item to see if there is repeat information
2815     following. Then obey similar code to character type repeats - written out
2816     again for speed. */
2817
2818     case OP_NCLASS:
2819     case OP_CLASS:
2820       {
2821       /* The data variable is saved across frames, so the byte map needs to
2822       be stored there. */
2823 #define BYTE_MAP ((pcre_uint8 *)data)
2824       data = ecode + 1;                /* Save for matching */
2825       ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2826
2827       switch (*ecode)
2828         {
2829         case OP_CRSTAR:
2830         case OP_CRMINSTAR:
2831         case OP_CRPLUS:
2832         case OP_CRMINPLUS:
2833         case OP_CRQUERY:
2834         case OP_CRMINQUERY:
2835         c = *ecode++ - OP_CRSTAR;
2836         minimize = (c & 1) != 0;
2837         min = rep_min[c];                 /* Pick up values from tables; */
2838         max = rep_max[c];                 /* zero for max => infinity */
2839         if (max == 0) max = INT_MAX;
2840         break;
2841
2842         case OP_CRRANGE:
2843         case OP_CRMINRANGE:
2844         minimize = (*ecode == OP_CRMINRANGE);
2845         min = GET2(ecode, 1);
2846         max = GET2(ecode, 1 + IMM2_SIZE);
2847         if (max == 0) max = INT_MAX;
2848         ecode += 1 + 2 * IMM2_SIZE;
2849         break;
2850
2851         default:               /* No repeat follows */
2852         min = max = 1;
2853         break;
2854         }
2855
2856       /* First, ensure the minimum number of matches are present. */
2857
2858 #ifdef SUPPORT_UTF
2859       if (utf)
2860         {
2861         for (i = 1; i <= min; i++)
2862           {
2863           if (eptr >= md->end_subject)
2864             {
2865             SCHECK_PARTIAL();
2866             RRETURN(MATCH_NOMATCH);
2867             }
2868           GETCHARINC(c, eptr);
2869           if (c > 255)
2870             {
2871             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2872             }
2873           else
2874             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2875           }
2876         }
2877       else
2878 #endif
2879       /* Not UTF mode */
2880         {
2881         for (i = 1; i <= min; i++)
2882           {
2883           if (eptr >= md->end_subject)
2884             {
2885             SCHECK_PARTIAL();
2886             RRETURN(MATCH_NOMATCH);
2887             }
2888           c = *eptr++;
2889 #ifndef COMPILE_PCRE8
2890           if (c > 255)
2891             {
2892             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2893             }
2894           else
2895 #endif
2896             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2897           }
2898         }
2899
2900       /* If max == min we can continue with the main loop without the
2901       need to recurse. */
2902
2903       if (min == max) continue;
2904
2905       /* If minimizing, keep testing the rest of the expression and advancing
2906       the pointer while it matches the class. */
2907
2908       if (minimize)
2909         {
2910 #ifdef SUPPORT_UTF
2911         if (utf)
2912           {
2913           for (fi = min;; fi++)
2914             {
2915             RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
2916             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2917             if (fi >= max) RRETURN(MATCH_NOMATCH);
2918             if (eptr >= md->end_subject)
2919               {
2920               SCHECK_PARTIAL();
2921               RRETURN(MATCH_NOMATCH);
2922               }
2923             GETCHARINC(c, eptr);
2924             if (c > 255)
2925               {
2926               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2927               }
2928             else
2929               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2930             }
2931           }
2932         else
2933 #endif
2934         /* Not UTF mode */
2935           {
2936           for (fi = min;; fi++)
2937             {
2938             RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
2939             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2940             if (fi >= max) RRETURN(MATCH_NOMATCH);
2941             if (eptr >= md->end_subject)
2942               {
2943               SCHECK_PARTIAL();
2944               RRETURN(MATCH_NOMATCH);
2945               }
2946             c = *eptr++;
2947 #ifndef COMPILE_PCRE8
2948             if (c > 255)
2949               {
2950               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2951               }
2952             else
2953 #endif
2954               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2955             }
2956           }
2957         /* Control never gets here */
2958         }
2959
2960       /* If maximizing, find the longest possible run, then work backwards. */
2961
2962       else
2963         {
2964         pp = eptr;
2965
2966 #ifdef SUPPORT_UTF
2967         if (utf)
2968           {
2969           for (i = min; i < max; i++)
2970             {
2971             int len = 1;
2972             if (eptr >= md->end_subject)
2973               {
2974               SCHECK_PARTIAL();
2975               break;
2976               }
2977             GETCHARLEN(c, eptr, len);
2978             if (c > 255)
2979               {
2980               if (op == OP_CLASS) break;
2981               }
2982             else
2983               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
2984             eptr += len;
2985             }
2986           for (;;)
2987             {
2988             RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
2989             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2990             if (eptr-- == pp) break;        /* Stop if tried at original pos */
2991             BACKCHAR(eptr);
2992             }
2993           }
2994         else
2995 #endif
2996           /* Not UTF mode */
2997           {
2998           for (i = min; i < max; i++)
2999             {
3000             if (eptr >= md->end_subject)
3001               {
3002               SCHECK_PARTIAL();
3003               break;
3004               }
3005             c = *eptr;
3006 #ifndef COMPILE_PCRE8
3007             if (c > 255)
3008               {
3009               if (op == OP_CLASS) break;
3010               }
3011             else
3012 #endif
3013               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3014             eptr++;
3015             }
3016           while (eptr >= pp)
3017             {
3018             RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
3019             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3020             eptr--;
3021             }
3022           }
3023
3024         RRETURN(MATCH_NOMATCH);
3025         }
3026 #undef BYTE_MAP
3027       }
3028     /* Control never gets here */
3029
3030
3031     /* Match an extended character class. This opcode is encountered only
3032     when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
3033     mode, because Unicode properties are supported in non-UTF-8 mode. */
3034
3035 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3036     case OP_XCLASS:
3037       {
3038       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
3039       ecode += GET(ecode, 1);                      /* Advance past the item */
3040
3041       switch (*ecode)
3042         {
3043         case OP_CRSTAR:
3044         case OP_CRMINSTAR:
3045         case OP_CRPLUS:
3046         case OP_CRMINPLUS:
3047         case OP_CRQUERY:
3048         case OP_CRMINQUERY:
3049         c = *ecode++ - OP_CRSTAR;
3050         minimize = (c & 1) != 0;
3051         min = rep_min[c];                 /* Pick up values from tables; */
3052         max = rep_max[c];                 /* zero for max => infinity */
3053         if (max == 0) max = INT_MAX;
3054         break;
3055
3056         case OP_CRRANGE:
3057         case OP_CRMINRANGE:
3058         minimize = (*ecode == OP_CRMINRANGE);
3059         min = GET2(ecode, 1);
3060         max = GET2(ecode, 1 + IMM2_SIZE);
3061         if (max == 0) max = INT_MAX;
3062         ecode += 1 + 2 * IMM2_SIZE;
3063         break;
3064
3065         default:               /* No repeat follows */
3066         min = max = 1;
3067         break;
3068         }
3069
3070       /* First, ensure the minimum number of matches are present. */
3071
3072       for (i = 1; i <= min; i++)
3073         {
3074         if (eptr >= md->end_subject)
3075           {
3076           SCHECK_PARTIAL();
3077           RRETURN(MATCH_NOMATCH);
3078           }
3079         GETCHARINCTEST(c, eptr);
3080         if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3081         }
3082
3083       /* If max == min we can continue with the main loop without the
3084       need to recurse. */
3085
3086       if (min == max) continue;
3087
3088       /* If minimizing, keep testing the rest of the expression and advancing
3089       the pointer while it matches the class. */
3090
3091       if (minimize)
3092         {
3093         for (fi = min;; fi++)
3094           {
3095           RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
3096           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3097           if (fi >= max) RRETURN(MATCH_NOMATCH);
3098           if (eptr >= md->end_subject)
3099             {
3100             SCHECK_PARTIAL();
3101             RRETURN(MATCH_NOMATCH);
3102             }
3103           GETCHARINCTEST(c, eptr);
3104           if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3105           }
3106         /* Control never gets here */
3107         }
3108
3109       /* If maximizing, find the longest possible run, then work backwards. */
3110
3111       else
3112         {
3113         pp = eptr;
3114         for (i = min; i < max; i++)
3115           {
3116           int len = 1;
3117           if (eptr >= md->end_subject)
3118             {
3119             SCHECK_PARTIAL();
3120             break;
3121             }
3122 #ifdef SUPPORT_UTF
3123           GETCHARLENTEST(c, eptr, len);
3124 #else
3125           c = *eptr;
3126 #endif
3127           if (!PRIV(xclass)(c, data, utf)) break;
3128           eptr += len;
3129           }
3130         for(;;)
3131           {
3132           RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3133           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3134           if (eptr-- == pp) break;        /* Stop if tried at original pos */
3135 #ifdef SUPPORT_UTF
3136           if (utf) BACKCHAR(eptr);
3137 #endif
3138           }
3139         RRETURN(MATCH_NOMATCH);
3140         }
3141
3142       /* Control never gets here */
3143       }
3144 #endif    /* End of XCLASS */
3145
3146     /* Match a single character, casefully */
3147
3148     case OP_CHAR:
3149 #ifdef SUPPORT_UTF
3150     if (utf)
3151       {
3152       length = 1;
3153       ecode++;
3154       GETCHARLEN(fc, ecode, length);
3155       if (length > md->end_subject - eptr)
3156         {
3157         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
3158         RRETURN(MATCH_NOMATCH);
3159         }
3160       while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
3161       }
3162     else
3163 #endif
3164     /* Not UTF mode */
3165       {
3166       if (md->end_subject - eptr < 1)
3167         {
3168         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
3169         RRETURN(MATCH_NOMATCH);
3170         }
3171       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
3172       ecode += 2;
3173       }
3174     break;
3175
3176     /* Match a single character, caselessly. If we are at the end of the
3177     subject, give up immediately. */
3178
3179     case OP_CHARI:
3180     if (eptr >= md->end_subject)
3181       {
3182       SCHECK_PARTIAL();
3183       RRETURN(MATCH_NOMATCH);
3184       }
3185
3186 #ifdef SUPPORT_UTF
3187     if (utf)
3188       {
3189       length = 1;
3190       ecode++;
3191       GETCHARLEN(fc, ecode, length);
3192
3193       /* If the pattern character's value is < 128, we have only one byte, and
3194       we know that its other case must also be one byte long, so we can use the
3195       fast lookup table. We know that there is at least one byte left in the
3196       subject. */
3197
3198       if (fc < 128)
3199         {
3200         if (md->lcc[fc]
3201             != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3202         ecode++;
3203         eptr++;
3204         }
3205
3206       /* Otherwise we must pick up the subject character. Note that we cannot
3207       use the value of "length" to check for sufficient bytes left, because the
3208       other case of the character may have more or fewer bytes.  */
3209
3210       else
3211         {
3212         unsigned int dc;
3213         GETCHARINC(dc, eptr);
3214         ecode += length;
3215
3216         /* If we have Unicode property support, we can use it to test the other
3217         case of the character, if there is one. */
3218
3219         if (fc != dc)
3220           {
3221 #ifdef SUPPORT_UCP
3222           if (dc != UCD_OTHERCASE(fc))
3223 #endif
3224             RRETURN(MATCH_NOMATCH);
3225           }
3226         }
3227       }
3228     else
3229 #endif   /* SUPPORT_UTF */
3230
3231     /* Not UTF mode */
3232       {
3233       if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3234           != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3235       eptr++;
3236       ecode += 2;
3237       }
3238     break;
3239
3240     /* Match a single character repeatedly. */
3241
3242     case OP_EXACT:
3243     case OP_EXACTI:
3244     min = max = GET2(ecode, 1);
3245     ecode += 1 + IMM2_SIZE;
3246     goto REPEATCHAR;
3247
3248     case OP_POSUPTO:
3249     case OP_POSUPTOI:
3250     possessive = TRUE;
3251     /* Fall through */
3252
3253     case OP_UPTO:
3254     case OP_UPTOI:
3255     case OP_MINUPTO:
3256     case OP_MINUPTOI:
3257     min = 0;
3258     max = GET2(ecode, 1);
3259     minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3260     ecode += 1 + IMM2_SIZE;
3261     goto REPEATCHAR;
3262
3263     case OP_POSSTAR:
3264     case OP_POSSTARI:
3265     possessive = TRUE;
3266     min = 0;
3267     max = INT_MAX;
3268     ecode++;
3269     goto REPEATCHAR;
3270
3271     case OP_POSPLUS:
3272     case OP_POSPLUSI:
3273     possessive = TRUE;
3274     min = 1;
3275     max = INT_MAX;
3276     ecode++;
3277     goto REPEATCHAR;
3278
3279     case OP_POSQUERY:
3280     case OP_POSQUERYI:
3281     possessive = TRUE;
3282     min = 0;
3283     max = 1;
3284     ecode++;
3285     goto REPEATCHAR;
3286
3287     case OP_STAR:
3288     case OP_STARI:
3289     case OP_MINSTAR:
3290     case OP_MINSTARI:
3291     case OP_PLUS:
3292     case OP_PLUSI:
3293     case OP_MINPLUS:
3294     case OP_MINPLUSI:
3295     case OP_QUERY:
3296     case OP_QUERYI:
3297     case OP_MINQUERY:
3298     case OP_MINQUERYI:
3299     c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
3300     minimize = (c & 1) != 0;
3301     min = rep_min[c];                 /* Pick up values from tables; */
3302     max = rep_max[c];                 /* zero for max => infinity */
3303     if (max == 0) max = INT_MAX;
3304
3305     /* Common code for all repeated single-character matches. */
3306
3307     REPEATCHAR:
3308 #ifdef SUPPORT_UTF
3309     if (utf)
3310       {
3311       length = 1;
3312       charptr = ecode;
3313       GETCHARLEN(fc, ecode, length);
3314       ecode += length;
3315
3316       /* Handle multibyte character matching specially here. There is
3317       support for caseless matching if UCP support is present. */
3318
3319       if (length > 1)
3320         {
3321 #ifdef SUPPORT_UCP
3322         unsigned int othercase;
3323         if (op >= OP_STARI &&     /* Caseless */
3324             (othercase = UCD_OTHERCASE(fc)) != fc)
3325           oclength = PRIV(ord2utf)(othercase, occhars);
3326         else oclength = 0;
3327 #endif  /* SUPPORT_UCP */
3328
3329         for (i = 1; i <= min; i++)
3330           {
3331           if (eptr <= md->end_subject - length &&
3332             memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3333 #ifdef SUPPORT_UCP
3334           else if (oclength > 0 &&
3335                    eptr <= md->end_subject - oclength &&
3336                    memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3337 #endif  /* SUPPORT_UCP */
3338           else
3339             {
3340             CHECK_PARTIAL();
3341             RRETURN(MATCH_NOMATCH);
3342             }
3343           }
3344
3345         if (min == max) continue;
3346
3347         if (minimize)
3348           {
3349           for (fi = min;; fi++)
3350             {
3351             RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3352             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3353             if (fi >= max) RRETURN(MATCH_NOMATCH);
3354             if (eptr <= md->end_subject - length &&
3355               memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3356 #ifdef SUPPORT_UCP
3357             else if (oclength > 0 &&
3358                      eptr <= md->end_subject - oclength &&
3359                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3360 #endif  /* SUPPORT_UCP */
3361             else
3362               {
3363               CHECK_PARTIAL();
3364               RRETURN(MATCH_NOMATCH);
3365               }
3366             }
3367           /* Control never gets here */
3368           }
3369
3370         else  /* Maximize */
3371           {
3372           pp = eptr;
3373           for (i = min; i < max; i++)
3374             {
3375             if (eptr <= md->end_subject - length &&
3376                 memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3377 #ifdef SUPPORT_UCP
3378             else if (oclength > 0 &&
3379                      eptr <= md->end_subject - oclength &&
3380                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3381 #endif  /* SUPPORT_UCP */
3382             else
3383               {
3384               CHECK_PARTIAL();
3385               break;
3386               }
3387             }
3388
3389           if (possessive) continue;
3390
3391           for(;;)
3392             {
3393             RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3394             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3395             if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
3396 #ifdef SUPPORT_UCP
3397             eptr--;
3398             BACKCHAR(eptr);
3399 #else   /* without SUPPORT_UCP */
3400             eptr -= length;
3401 #endif  /* SUPPORT_UCP */
3402             }
3403           }
3404         /* Control never gets here */
3405         }
3406
3407       /* If the length of a UTF-8 character is 1, we fall through here, and
3408       obey the code as for non-UTF-8 characters below, though in this case the
3409       value of fc will always be < 128. */
3410       }
3411     else
3412 #endif  /* SUPPORT_UTF */
3413       /* When not in UTF-8 mode, load a single-byte character. */
3414       fc = *ecode++;
3415
3416     /* The value of fc at this point is always one character, though we may
3417     or may not be in UTF mode. The code is duplicated for the caseless and
3418     caseful cases, for speed, since matching characters is likely to be quite
3419     common. First, ensure the minimum number of matches are present. If min =
3420     max, continue at the same level without recursing. Otherwise, if
3421     minimizing, keep trying the rest of the expression and advancing one
3422     matching character if failing, up to the maximum. Alternatively, if
3423     maximizing, find the maximum number of characters and work backwards. */
3424
3425     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3426       max, (char *)eptr));
3427
3428     if (op >= OP_STARI)  /* Caseless */
3429       {
3430 #ifdef COMPILE_PCRE8
3431       /* fc must be < 128 if UTF is enabled. */
3432       foc = md->fcc[fc];
3433 #else
3434 #ifdef SUPPORT_UTF
3435 #ifdef SUPPORT_UCP
3436       if (utf && fc > 127)
3437         foc = UCD_OTHERCASE(fc);
3438 #else
3439       if (utf && fc > 127)
3440         foc = fc;
3441 #endif /* SUPPORT_UCP */
3442       else
3443 #endif /* SUPPORT_UTF */
3444         foc = TABLE_GET(fc, md->fcc, fc);
3445 #endif /* COMPILE_PCRE8 */
3446
3447       for (i = 1; i <= min; i++)
3448         {
3449         if (eptr >= md->end_subject)
3450           {
3451           SCHECK_PARTIAL();
3452           RRETURN(MATCH_NOMATCH);
3453           }
3454         if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3455         eptr++;
3456         }
3457       if (min == max) continue;
3458       if (minimize)
3459         {
3460         for (fi = min;; fi++)
3461           {
3462           RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3463           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3464           if (fi >= max) RRETURN(MATCH_NOMATCH);
3465           if (eptr >= md->end_subject)
3466             {
3467             SCHECK_PARTIAL();
3468             RRETURN(MATCH_NOMATCH);
3469             }
3470           if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3471           eptr++;
3472           }
3473         /* Control never gets here */
3474         }
3475       else  /* Maximize */
3476         {
3477         pp = eptr;
3478         for (i = min; i < max; i++)
3479           {
3480           if (eptr >= md->end_subject)
3481             {
3482             SCHECK_PARTIAL();
3483             break;
3484             }
3485           if (fc != *eptr && foc != *eptr) break;
3486           eptr++;
3487           }
3488
3489         if (possessive) continue;
3490
3491         while (eptr >= pp)
3492           {
3493           RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
3494           eptr--;
3495           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3496           }
3497         RRETURN(MATCH_NOMATCH);
3498         }
3499       /* Control never gets here */
3500       }
3501
3502     /* Caseful comparisons (includes all multi-byte characters) */
3503
3504     else
3505       {
3506       for (i = 1; i <= min; i++)
3507         {
3508         if (eptr >= md->end_subject)
3509           {
3510           SCHECK_PARTIAL();
3511           RRETURN(MATCH_NOMATCH);
3512           }
3513         if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
3514         }
3515
3516       if (min == max) continue;
3517
3518       if (minimize)
3519         {
3520         for (fi = min;; fi++)
3521           {
3522           RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3523           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3524           if (fi >= max) RRETURN(MATCH_NOMATCH);
3525           if (eptr >= md->end_subject)
3526             {
3527             SCHECK_PARTIAL();
3528             RRETURN(MATCH_NOMATCH);
3529             }
3530           if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
3531           }
3532         /* Control never gets here */
3533         }
3534       else  /* Maximize */
3535         {
3536         pp = eptr;
3537         for (i = min; i < max; i++)
3538           {
3539           if (eptr >= md->end_subject)
3540             {
3541             SCHECK_PARTIAL();
3542             break;
3543             }
3544           if (fc != *eptr) break;
3545           eptr++;
3546           }
3547         if (possessive) continue;
3548
3549         while (eptr >= pp)
3550           {
3551           RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
3552           eptr--;
3553           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3554           }
3555         RRETURN(MATCH_NOMATCH);
3556         }
3557       }
3558     /* Control never gets here */
3559
3560     /* Match a negated single one-byte character. The character we are
3561     checking can be multibyte. */
3562
3563     case OP_NOT:
3564     case OP_NOTI:
3565     if (eptr >= md->end_subject)
3566       {
3567       SCHECK_PARTIAL();
3568       RRETURN(MATCH_NOMATCH);
3569       }
3570 #ifdef SUPPORT_UTF
3571     if (utf)
3572       {
3573       unsigned int ch, och;
3574
3575       ecode++;
3576       GETCHARINC(ch, ecode);
3577       GETCHARINC(c, eptr);
3578
3579       if (op == OP_NOT)
3580         {
3581         if (ch == c) RRETURN(MATCH_NOMATCH);
3582         }
3583       else
3584         {
3585 #ifdef SUPPORT_UCP
3586         if (ch > 127)
3587           och = UCD_OTHERCASE(ch);
3588 #else
3589         if (ch > 127)
3590           och = ch;
3591 #endif /* SUPPORT_UCP */
3592         else
3593           och = TABLE_GET(ch, md->fcc, ch);
3594         if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3595         }
3596       }
3597     else
3598 #endif
3599       {
3600       unsigned int ch = ecode[1];
3601       c = *eptr++;
3602       if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
3603         RRETURN(MATCH_NOMATCH);
3604       ecode += 2;
3605       }
3606     break;
3607
3608     /* Match a negated single one-byte character repeatedly. This is almost a
3609     repeat of the code for a repeated single character, but I haven't found a
3610     nice way of commoning these up that doesn't require a test of the
3611     positive/negative option for each character match. Maybe that wouldn't add
3612     very much to the time taken, but character matching *is* what this is all
3613     about... */
3614
3615     case OP_NOTEXACT:
3616     case OP_NOTEXACTI:
3617     min = max = GET2(ecode, 1);
3618     ecode += 1 + IMM2_SIZE;
3619     goto REPEATNOTCHAR;
3620
3621     case OP_NOTUPTO:
3622     case OP_NOTUPTOI:
3623     case OP_NOTMINUPTO:
3624     case OP_NOTMINUPTOI:
3625     min = 0;
3626     max = GET2(ecode, 1);
3627     minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3628     ecode += 1 + IMM2_SIZE;
3629     goto REPEATNOTCHAR;
3630
3631     case OP_NOTPOSSTAR:
3632     case OP_NOTPOSSTARI:
3633     possessive = TRUE;
3634     min = 0;
3635     max = INT_MAX;
3636     ecode++;
3637     goto REPEATNOTCHAR;
3638
3639     case OP_NOTPOSPLUS:
3640     case OP_NOTPOSPLUSI:
3641     possessive = TRUE;
3642     min = 1;
3643     max = INT_MAX;
3644     ecode++;
3645     goto REPEATNOTCHAR;
3646
3647     case OP_NOTPOSQUERY:
3648     case OP_NOTPOSQUERYI:
3649     possessive = TRUE;
3650     min = 0;
3651     max = 1;
3652     ecode++;
3653     goto REPEATNOTCHAR;
3654
3655     case OP_NOTPOSUPTO:
3656     case OP_NOTPOSUPTOI:
3657     possessive = TRUE;
3658     min = 0;
3659     max = GET2(ecode, 1);
3660     ecode += 1 + IMM2_SIZE;
3661     goto REPEATNOTCHAR;
3662
3663     case OP_NOTSTAR:
3664     case OP_NOTSTARI:
3665     case OP_NOTMINSTAR:
3666     case OP_NOTMINSTARI:
3667     case OP_NOTPLUS:
3668     case OP_NOTPLUSI:
3669     case OP_NOTMINPLUS:
3670     case OP_NOTMINPLUSI:
3671     case OP_NOTQUERY:
3672     case OP_NOTQUERYI:
3673     case OP_NOTMINQUERY:
3674     case OP_NOTMINQUERYI:
3675     c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
3676     minimize = (c & 1) != 0;
3677     min = rep_min[c];                 /* Pick up values from tables; */
3678     max = rep_max[c];                 /* zero for max => infinity */
3679     if (max == 0) max = INT_MAX;
3680
3681     /* Common code for all repeated single-byte matches. */
3682
3683     REPEATNOTCHAR:
3684     GETCHARINCTEST(fc, ecode);
3685
3686     /* The code is duplicated for the caseless and caseful cases, for speed,
3687     since matching characters is likely to be quite common. First, ensure the
3688     minimum number of matches are present. If min = max, continue at the same
3689     level without recursing. Otherwise, if minimizing, keep trying the rest of
3690     the expression and advancing one matching character if failing, up to the
3691     maximum. Alternatively, if maximizing, find the maximum number of
3692     characters and work backwards. */
3693
3694     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3695       max, (char *)eptr));
3696
3697     if (op >= OP_NOTSTARI)     /* Caseless */
3698       {
3699 #ifdef SUPPORT_UTF
3700 #ifdef SUPPORT_UCP
3701       if (utf && fc > 127)
3702         foc = UCD_OTHERCASE(fc);
3703 #else
3704       if (utf && fc > 127)
3705         foc = fc;
3706 #endif /* SUPPORT_UCP */
3707       else
3708 #endif /* SUPPORT_UTF */
3709         foc = TABLE_GET(fc, md->fcc, fc);
3710
3711 #ifdef SUPPORT_UTF
3712       if (utf)
3713         {
3714         unsigned int d;
3715         for (i = 1; i <= min; i++)
3716           {
3717           if (eptr >= md->end_subject)
3718             {
3719             SCHECK_PARTIAL();
3720             RRETURN(MATCH_NOMATCH);
3721             }
3722           GETCHARINC(d, eptr);
3723           if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3724           }
3725         }
3726       else
3727 #endif
3728       /* Not UTF mode */
3729         {
3730         for (i = 1; i <= min; i++)
3731           {
3732           if (eptr >= md->end_subject)
3733             {
3734             SCHECK_PARTIAL();
3735             RRETURN(MATCH_NOMATCH);
3736             }
3737           if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3738           eptr++;
3739           }
3740         }
3741
3742       if (min == max) continue;
3743
3744       if (minimize)
3745         {
3746 #ifdef SUPPORT_UTF
3747         if (utf)
3748           {
3749           unsigned int d;
3750           for (fi = min;; fi++)
3751             {
3752             RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3753             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3754             if (fi >= max) RRETURN(MATCH_NOMATCH);
3755             if (eptr >= md->end_subject)
3756               {
3757               SCHECK_PARTIAL();
3758               RRETURN(MATCH_NOMATCH);
3759               }
3760             GETCHARINC(d, eptr);
3761             if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3762             }
3763           }
3764         else
3765 #endif
3766         /* Not UTF mode */
3767           {
3768           for (fi = min;; fi++)
3769             {
3770             RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3771             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3772             if (fi >= max) RRETURN(MATCH_NOMATCH);
3773             if (eptr >= md->end_subject)
3774               {
3775               SCHECK_PARTIAL();
3776               RRETURN(MATCH_NOMATCH);
3777               }
3778             if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3779             eptr++;
3780             }
3781           }
3782         /* Control never gets here */
3783         }
3784
3785       /* Maximize case */
3786
3787       else
3788         {
3789         pp = eptr;
3790
3791 #ifdef SUPPORT_UTF
3792         if (utf)
3793           {
3794           unsigned int d;
3795           for (i = min; i < max; i++)
3796             {
3797             int len = 1;
3798             if (eptr >= md->end_subject)
3799               {
3800               SCHECK_PARTIAL();
3801               break;
3802               }
3803             GETCHARLEN(d, eptr, len);
3804             if (fc == d || (unsigned int)foc == d) break;
3805             eptr += len;
3806             }
3807           if (possessive) continue;
3808           for(;;)
3809             {
3810             RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3811             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3812             if (eptr-- == pp) break;        /* Stop if tried at original pos */
3813             BACKCHAR(eptr);
3814             }
3815           }
3816         else
3817 #endif
3818         /* Not UTF mode */
3819           {
3820           for (i = min; i < max; i++)
3821             {
3822             if (eptr >= md->end_subject)
3823               {
3824               SCHECK_PARTIAL();
3825               break;
3826               }
3827             if (fc == *eptr || foc == *eptr) break;
3828             eptr++;
3829             }
3830           if (possessive) continue;
3831           while (eptr >= pp)
3832             {
3833             RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
3834             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3835             eptr--;
3836             }
3837           }
3838
3839         RRETURN(MATCH_NOMATCH);
3840         }
3841       /* Control never gets here */
3842       }
3843
3844     /* Caseful comparisons */
3845
3846     else
3847       {
3848 #ifdef SUPPORT_UTF
3849       if (utf)
3850         {
3851         unsigned int d;
3852         for (i = 1; i <= min; i++)
3853           {
3854           if (eptr >= md->end_subject)
3855             {
3856             SCHECK_PARTIAL();
3857             RRETURN(MATCH_NOMATCH);
3858             }
3859           GETCHARINC(d, eptr);
3860           if (fc == d) RRETURN(MATCH_NOMATCH);
3861           }
3862         }
3863       else
3864 #endif
3865       /* Not UTF mode */
3866         {
3867         for (i = 1; i <= min; i++)
3868           {
3869           if (eptr >= md->end_subject)
3870             {
3871             SCHECK_PARTIAL();
3872             RRETURN(MATCH_NOMATCH);
3873             }
3874           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3875           }
3876         }
3877
3878       if (min == max) continue;
3879
3880       if (minimize)
3881         {
3882 #ifdef SUPPORT_UTF
3883         if (utf)
3884           {
3885           unsigned int d;
3886           for (fi = min;; fi++)
3887             {
3888             RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
3889             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3890             if (fi >= max) RRETURN(MATCH_NOMATCH);
3891             if (eptr >= md->end_subject)
3892               {
3893               SCHECK_PARTIAL();
3894               RRETURN(MATCH_NOMATCH);
3895               }
3896             GETCHARINC(d, eptr);
3897             if (fc == d) RRETURN(MATCH_NOMATCH);
3898             }
3899           }
3900         else
3901 #endif
3902         /* Not UTF mode */
3903           {
3904           for (fi = min;; fi++)
3905             {
3906             RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
3907             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3908             if (fi >= max) RRETURN(MATCH_NOMATCH);
3909             if (eptr >= md->end_subject)
3910               {
3911               SCHECK_PARTIAL();
3912               RRETURN(MATCH_NOMATCH);
3913               }
3914             if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3915             }
3916           }
3917         /* Control never gets here */
3918         }
3919
3920       /* Maximize case */
3921
3922       else
3923         {
3924         pp = eptr;
3925
3926 #ifdef SUPPORT_UTF
3927         if (utf)
3928           {
3929           unsigned int d;
3930           for (i = min; i < max; i++)
3931             {
3932             int len = 1;
3933             if (eptr >= md->end_subject)
3934               {
3935               SCHECK_PARTIAL();
3936               break;
3937               }
3938             GETCHARLEN(d, eptr, len);
3939             if (fc == d) break;
3940             eptr += len;
3941             }
3942           if (possessive) continue;
3943           for(;;)
3944             {
3945             RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
3946             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3947             if (eptr-- == pp) break;        /* Stop if tried at original pos */
3948             BACKCHAR(eptr);
3949             }
3950           }
3951         else
3952 #endif
3953         /* Not UTF mode */
3954           {
3955           for (i = min; i < max; i++)
3956             {
3957             if (eptr >= md->end_subject)
3958               {
3959               SCHECK_PARTIAL();
3960               break;
3961               }
3962             if (fc == *eptr) break;
3963             eptr++;
3964             }
3965           if (possessive) continue;
3966           while (eptr >= pp)
3967             {
3968             RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
3969             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3970             eptr--;
3971             }
3972           }
3973
3974         RRETURN(MATCH_NOMATCH);
3975         }
3976       }
3977     /* Control never gets here */
3978
3979     /* Match a single character type repeatedly; several different opcodes
3980     share code. This is very similar to the code for single characters, but we
3981     repeat it in the interests of efficiency. */
3982
3983     case OP_TYPEEXACT:
3984     min = max = GET2(ecode, 1);
3985     minimize = TRUE;
3986     ecode += 1 + IMM2_SIZE;
3987     goto REPEATTYPE;
3988
3989     case OP_TYPEUPTO:
3990     case OP_TYPEMINUPTO:
3991     min = 0;
3992     max = GET2(ecode, 1);
3993     minimize = *ecode == OP_TYPEMINUPTO;
3994     ecode += 1 + IMM2_SIZE;
3995     goto REPEATTYPE;
3996
3997     case OP_TYPEPOSSTAR:
3998     possessive = TRUE;
3999     min = 0;
4000     max = INT_MAX;
4001     ecode++;
4002     goto REPEATTYPE;
4003
4004     case OP_TYPEPOSPLUS:
4005     possessive = TRUE;
4006     min = 1;
4007     max = INT_MAX;
4008     ecode++;
4009     goto REPEATTYPE;
4010
4011     case OP_TYPEPOSQUERY:
4012     possessive = TRUE;
4013     min = 0;
4014     max = 1;
4015     ecode++;
4016     goto REPEATTYPE;
4017
4018     case OP_TYPEPOSUPTO:
4019     possessive = TRUE;
4020     min = 0;
4021     max = GET2(ecode, 1);
4022     ecode += 1 + IMM2_SIZE;
4023     goto REPEATTYPE;
4024
4025     case OP_TYPESTAR:
4026     case OP_TYPEMINSTAR:
4027     case OP_TYPEPLUS:
4028     case OP_TYPEMINPLUS:
4029     case OP_TYPEQUERY:
4030     case OP_TYPEMINQUERY:
4031     c = *ecode++ - OP_TYPESTAR;
4032     minimize = (c & 1) != 0;
4033     min = rep_min[c];                 /* Pick up values from tables; */
4034     max = rep_max[c];                 /* zero for max => infinity */
4035     if (max == 0) max = INT_MAX;
4036
4037     /* Common code for all repeated single character type matches. Note that
4038     in UTF-8 mode, '.' matches a character of any length, but for the other
4039     character types, the valid characters are all one-byte long. */
4040
4041     REPEATTYPE:
4042     ctype = *ecode++;      /* Code for the character type */
4043
4044 #ifdef SUPPORT_UCP
4045     if (ctype == OP_PROP || ctype == OP_NOTPROP)
4046       {
4047       prop_fail_result = ctype == OP_NOTPROP;
4048       prop_type = *ecode++;
4049       prop_value = *ecode++;
4050       }
4051     else prop_type = -1;
4052 #endif
4053
4054     /* First, ensure the minimum number of matches are present. Use inline
4055     code for maximizing the speed, and do the type test once at the start
4056     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
4057     is tidier. Also separate the UCP code, which can be the same for both UTF-8
4058     and single-bytes. */
4059
4060     if (min > 0)
4061       {
4062 #ifdef SUPPORT_UCP
4063       if (prop_type >= 0)
4064         {
4065         switch(prop_type)
4066           {
4067           case PT_ANY:
4068           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4069           for (i = 1; i <= min; i++)
4070             {
4071             if (eptr >= md->end_subject)
4072               {
4073               SCHECK_PARTIAL();
4074               RRETURN(MATCH_NOMATCH);
4075               }
4076             GETCHARINCTEST(c, eptr);
4077             }
4078           break;
4079
4080           case PT_LAMP:
4081           for (i = 1; i <= min; i++)
4082             {
4083             int chartype;
4084             if (eptr >= md->end_subject)
4085               {
4086               SCHECK_PARTIAL();
4087               RRETURN(MATCH_NOMATCH);
4088               }
4089             GETCHARINCTEST(c, eptr);
4090             chartype = UCD_CHARTYPE(c);
4091             if ((chartype == ucp_Lu ||
4092                  chartype == ucp_Ll ||
4093                  chartype == ucp_Lt) == prop_fail_result)
4094               RRETURN(MATCH_NOMATCH);
4095             }
4096           break;
4097
4098           case PT_GC:
4099           for (i = 1; i <= min; i++)
4100             {
4101             if (eptr >= md->end_subject)
4102               {
4103               SCHECK_PARTIAL();
4104               RRETURN(MATCH_NOMATCH);
4105               }
4106             GETCHARINCTEST(c, eptr);
4107             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4108               RRETURN(MATCH_NOMATCH);
4109             }
4110           break;
4111
4112           case PT_PC:
4113           for (i = 1; i <= min; i++)
4114             {
4115             if (eptr >= md->end_subject)
4116               {
4117               SCHECK_PARTIAL();
4118               RRETURN(MATCH_NOMATCH);
4119               }
4120             GETCHARINCTEST(c, eptr);
4121             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4122               RRETURN(MATCH_NOMATCH);
4123             }
4124           break;
4125
4126           case PT_SC:
4127           for (i = 1; i <= min; i++)
4128             {
4129             if (eptr >= md->end_subject)
4130               {
4131               SCHECK_PARTIAL();
4132               RRETURN(MATCH_NOMATCH);
4133               }
4134             GETCHARINCTEST(c, eptr);
4135             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4136               RRETURN(MATCH_NOMATCH);
4137             }
4138           break;
4139
4140           case PT_ALNUM:
4141           for (i = 1; i <= min; i++)
4142             {
4143             int category;
4144             if (eptr >= md->end_subject)
4145               {
4146               SCHECK_PARTIAL();
4147               RRETURN(MATCH_NOMATCH);
4148               }
4149             GETCHARINCTEST(c, eptr);
4150             category = UCD_CATEGORY(c);
4151             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4152               RRETURN(MATCH_NOMATCH);
4153             }
4154           break;
4155
4156           case PT_SPACE:    /* Perl space */
4157           for (i = 1; i <= min; i++)
4158             {
4159             if (eptr >= md->end_subject)
4160               {
4161               SCHECK_PARTIAL();
4162               RRETURN(MATCH_NOMATCH);
4163               }
4164             GETCHARINCTEST(c, eptr);
4165             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4166                  c == CHAR_FF || c == CHAR_CR)
4167                    == prop_fail_result)
4168               RRETURN(MATCH_NOMATCH);
4169             }
4170           break;
4171
4172           case PT_PXSPACE:  /* POSIX space */
4173           for (i = 1; i <= min; i++)
4174             {
4175             if (eptr >= md->end_subject)
4176               {
4177               SCHECK_PARTIAL();
4178               RRETURN(MATCH_NOMATCH);
4179               }
4180             GETCHARINCTEST(c, eptr);
4181             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4182                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4183                    == prop_fail_result)
4184               RRETURN(MATCH_NOMATCH);
4185             }
4186           break;
4187
4188           case PT_WORD:
4189           for (i = 1; i <= min; i++)
4190             {
4191             int category;
4192             if (eptr >= md->end_subject)
4193               {
4194               SCHECK_PARTIAL();
4195               RRETURN(MATCH_NOMATCH);
4196               }
4197             GETCHARINCTEST(c, eptr);
4198             category = UCD_CATEGORY(c);
4199             if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
4200                    == prop_fail_result)
4201               RRETURN(MATCH_NOMATCH);
4202             }
4203           break;
4204
4205           /* This should not occur */
4206
4207           default:
4208           RRETURN(PCRE_ERROR_INTERNAL);
4209           }
4210         }
4211
4212       /* Match extended Unicode sequences. We will get here only if the
4213       support is in the binary; otherwise a compile-time error occurs. */
4214
4215       else if (ctype == OP_EXTUNI)
4216         {
4217         for (i = 1; i <= min; i++)
4218           {
4219           if (eptr >= md->end_subject)
4220             {
4221             SCHECK_PARTIAL();
4222             RRETURN(MATCH_NOMATCH);
4223             }
4224           GETCHARINCTEST(c, eptr);
4225           if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
4226           while (eptr < md->end_subject)
4227             {
4228             int len = 1;
4229             if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4230             if (UCD_CATEGORY(c) != ucp_M) break;
4231             eptr += len;
4232             }
4233           CHECK_PARTIAL();
4234           }
4235         }
4236
4237       else
4238 #endif     /* SUPPORT_UCP */
4239
4240 /* Handle all other cases when the coding is UTF-8 */
4241
4242 #ifdef SUPPORT_UTF
4243       if (utf) switch(ctype)
4244         {
4245         case OP_ANY:
4246         for (i = 1; i <= min; i++)
4247           {
4248           if (eptr >= md->end_subject)
4249             {
4250             SCHECK_PARTIAL();
4251             RRETURN(MATCH_NOMATCH);
4252             }
4253           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4254           if (md->partial != 0 &&
4255               eptr + 1 >= md->end_subject &&
4256               NLBLOCK->nltype == NLTYPE_FIXED &&
4257               NLBLOCK->nllen == 2 &&
4258               *eptr == NLBLOCK->nl[0])
4259             {
4260             md->hitend = TRUE;
4261             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4262             }
4263           eptr++;
4264           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4265           }
4266         break;
4267
4268         case OP_ALLANY:
4269         for (i = 1; i <= min; i++)
4270           {
4271           if (eptr >= md->end_subject)
4272             {
4273             SCHECK_PARTIAL();
4274             RRETURN(MATCH_NOMATCH);
4275             }
4276           eptr++;
4277           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4278           }
4279         break;
4280
4281         case OP_ANYBYTE:
4282         if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
4283         eptr += min;
4284         break;
4285
4286         case OP_ANYNL:
4287         for (i = 1; i <= min; i++)
4288           {
4289           if (eptr >= md->end_subject)
4290             {
4291             SCHECK_PARTIAL();
4292             RRETURN(MATCH_NOMATCH);
4293             }
4294           GETCHARINC(c, eptr);
4295           switch(c)
4296             {
4297             default: RRETURN(MATCH_NOMATCH);
4298
4299             case 0x000d:
4300             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4301             break;
4302
4303             case 0x000a:
4304             break;
4305
4306             case 0x000b:
4307             case 0x000c:
4308             case 0x0085:
4309             case 0x2028:
4310             case 0x2029:
4311             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4312             break;
4313             }
4314           }
4315         break;
4316
4317         case OP_NOT_HSPACE:
4318         for (i = 1; i <= min; i++)
4319           {
4320           if (eptr >= md->end_subject)
4321             {
4322             SCHECK_PARTIAL();
4323             RRETURN(MATCH_NOMATCH);
4324             }
4325           GETCHARINC(c, eptr);
4326           switch(c)
4327             {
4328             default: break;
4329             case 0x09:      /* HT */
4330             case 0x20:      /* SPACE */
4331             case 0xa0:      /* NBSP */
4332             case 0x1680:    /* OGHAM SPACE MARK */
4333             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4334             case 0x2000:    /* EN QUAD */
4335             case 0x2001:    /* EM QUAD */
4336             case 0x2002:    /* EN SPACE */
4337             case 0x2003:    /* EM SPACE */
4338             case 0x2004:    /* THREE-PER-EM SPACE */
4339             case 0x2005:    /* FOUR-PER-EM SPACE */
4340             case 0x2006:    /* SIX-PER-EM SPACE */
4341             case 0x2007:    /* FIGURE SPACE */
4342             case 0x2008:    /* PUNCTUATION SPACE */
4343             case 0x2009:    /* THIN SPACE */
4344             case 0x200A:    /* HAIR SPACE */
4345             case 0x202f:    /* NARROW NO-BREAK SPACE */
4346             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4347             case 0x3000:    /* IDEOGRAPHIC SPACE */
4348             RRETURN(MATCH_NOMATCH);
4349             }
4350           }
4351         break;
4352
4353         case OP_HSPACE:
4354         for (i = 1; i <= min; i++)
4355           {
4356           if (eptr >= md->end_subject)
4357             {
4358             SCHECK_PARTIAL();
4359             RRETURN(MATCH_NOMATCH);
4360             }
4361           GETCHARINC(c, eptr);
4362           switch(c)
4363             {
4364             default: RRETURN(MATCH_NOMATCH);
4365             case 0x09:      /* HT */
4366             case 0x20:      /* SPACE */
4367             case 0xa0:      /* NBSP */
4368             case 0x1680:    /* OGHAM SPACE MARK */
4369             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4370             case 0x2000:    /* EN QUAD */
4371             case 0x2001:    /* EM QUAD */
4372             case 0x2002:    /* EN SPACE */
4373             case 0x2003:    /* EM SPACE */
4374             case 0x2004:    /* THREE-PER-EM SPACE */
4375             case 0x2005:    /* FOUR-PER-EM SPACE */
4376             case 0x2006:    /* SIX-PER-EM SPACE */
4377             case 0x2007:    /* FIGURE SPACE */
4378             case 0x2008:    /* PUNCTUATION SPACE */
4379             case 0x2009:    /* THIN SPACE */
4380             case 0x200A:    /* HAIR SPACE */
4381             case 0x202f:    /* NARROW NO-BREAK SPACE */
4382             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4383             case 0x3000:    /* IDEOGRAPHIC SPACE */
4384             break;
4385             }
4386           }
4387         break;
4388
4389         case OP_NOT_VSPACE:
4390         for (i = 1; i <= min; i++)
4391           {
4392           if (eptr >= md->end_subject)
4393             {
4394             SCHECK_PARTIAL();
4395             RRETURN(MATCH_NOMATCH);
4396             }
4397           GETCHARINC(c, eptr);
4398           switch(c)
4399             {
4400             default: break;
4401             case 0x0a:      /* LF */
4402             case 0x0b:      /* VT */
4403             case 0x0c:      /* FF */
4404             case 0x0d:      /* CR */
4405             case 0x85:      /* NEL */
4406             case 0x2028:    /* LINE SEPARATOR */
4407             case 0x2029:    /* PARAGRAPH SEPARATOR */
4408             RRETURN(MATCH_NOMATCH);
4409             }
4410           }
4411         break;
4412
4413         case OP_VSPACE:
4414         for (i = 1; i <= min; i++)
4415           {
4416           if (eptr >= md->end_subject)
4417             {
4418             SCHECK_PARTIAL();
4419             RRETURN(MATCH_NOMATCH);
4420             }
4421           GETCHARINC(c, eptr);
4422           switch(c)
4423             {
4424             default: RRETURN(MATCH_NOMATCH);
4425             case 0x0a:      /* LF */
4426             case 0x0b:      /* VT */
4427             case 0x0c:      /* FF */
4428             case 0x0d:      /* CR */
4429             case 0x85:      /* NEL */
4430             case 0x2028:    /* LINE SEPARATOR */
4431             case 0x2029:    /* PARAGRAPH SEPARATOR */
4432             break;
4433             }
4434           }
4435         break;
4436
4437         case OP_NOT_DIGIT:
4438         for (i = 1; i <= min; i++)
4439           {
4440           if (eptr >= md->end_subject)
4441             {
4442             SCHECK_PARTIAL();
4443             RRETURN(MATCH_NOMATCH);
4444             }
4445           GETCHARINC(c, eptr);
4446           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
4447             RRETURN(MATCH_NOMATCH);
4448           }
4449         break;
4450
4451         case OP_DIGIT:
4452         for (i = 1; i <= min; i++)
4453           {
4454           if (eptr >= md->end_subject)
4455             {
4456             SCHECK_PARTIAL();
4457             RRETURN(MATCH_NOMATCH);
4458             }
4459           if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0)
4460             RRETURN(MATCH_NOMATCH);
4461           eptr++;
4462           /* No need to skip more bytes - we know it's a 1-byte character */
4463           }
4464         break;
4465
4466         case OP_NOT_WHITESPACE:
4467         for (i = 1; i <= min; i++)
4468           {
4469           if (eptr >= md->end_subject)
4470             {
4471             SCHECK_PARTIAL();
4472             RRETURN(MATCH_NOMATCH);
4473             }
4474           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
4475             RRETURN(MATCH_NOMATCH);
4476           eptr++;
4477           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4478           }
4479         break;
4480
4481         case OP_WHITESPACE:
4482         for (i = 1; i <= min; i++)
4483           {
4484           if (eptr >= md->end_subject)
4485             {
4486             SCHECK_PARTIAL();
4487             RRETURN(MATCH_NOMATCH);
4488             }
4489           if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0)
4490             RRETURN(MATCH_NOMATCH);
4491           eptr++;
4492           /* No need to skip more bytes - we know it's a 1-byte character */
4493           }
4494         break;
4495
4496         case OP_NOT_WORDCHAR:
4497         for (i = 1; i <= min; i++)
4498           {
4499           if (eptr >= md->end_subject)
4500             {
4501             SCHECK_PARTIAL();
4502             RRETURN(MATCH_NOMATCH);
4503             }
4504           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
4505             RRETURN(MATCH_NOMATCH);
4506           eptr++;
4507           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4508           }
4509         break;
4510
4511         case OP_WORDCHAR:
4512         for (i = 1; i <= min; i++)
4513           {
4514           if (eptr >= md->end_subject)
4515             {
4516             SCHECK_PARTIAL();
4517             RRETURN(MATCH_NOMATCH);
4518             }
4519           if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0)
4520             RRETURN(MATCH_NOMATCH);
4521           eptr++;
4522           /* No need to skip more bytes - we know it's a 1-byte character */
4523           }
4524         break;
4525
4526         default:
4527         RRETURN(PCRE_ERROR_INTERNAL);
4528         }  /* End switch(ctype) */
4529
4530       else
4531 #endif     /* SUPPORT_UTF */
4532
4533       /* Code for the non-UTF-8 case for minimum matching of operators other
4534       than OP_PROP and OP_NOTPROP. */
4535
4536       switch(ctype)
4537         {
4538         case OP_ANY:
4539         for (i = 1; i <= min; i++)
4540           {
4541           if (eptr >= md->end_subject)
4542             {
4543             SCHECK_PARTIAL();
4544             RRETURN(MATCH_NOMATCH);
4545             }
4546           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4547           if (md->partial != 0 &&
4548               eptr + 1 >= md->end_subject &&
4549               NLBLOCK->nltype == NLTYPE_FIXED &&
4550               NLBLOCK->nllen == 2 &&
4551               *eptr == NLBLOCK->nl[0])
4552             {
4553             md->hitend = TRUE;
4554             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4555             }
4556           eptr++;
4557           }
4558         break;
4559
4560         case OP_ALLANY:
4561         if (eptr > md->end_subject - min)
4562           {
4563           SCHECK_PARTIAL();
4564           RRETURN(MATCH_NOMATCH);
4565           }
4566         eptr += min;
4567         break;
4568
4569         case OP_ANYBYTE:
4570         if (eptr > md->end_subject - min)
4571           {
4572           SCHECK_PARTIAL();
4573           RRETURN(MATCH_NOMATCH);
4574           }
4575         eptr += min;
4576         break;
4577
4578         case OP_ANYNL:
4579         for (i = 1; i <= min; i++)
4580           {
4581           if (eptr >= md->end_subject)
4582             {
4583             SCHECK_PARTIAL();
4584             RRETURN(MATCH_NOMATCH);
4585             }
4586           switch(*eptr++)
4587             {
4588             default: RRETURN(MATCH_NOMATCH);
4589
4590             case 0x000d:
4591             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4592             break;
4593
4594             case 0x000a:
4595             break;
4596
4597             case 0x000b:
4598             case 0x000c:
4599             case 0x0085:
4600 #ifdef COMPILE_PCRE16
4601             case 0x2028:
4602             case 0x2029:
4603 #endif
4604             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4605             break;
4606             }
4607           }
4608         break;
4609
4610         case OP_NOT_HSPACE:
4611         for (i = 1; i <= min; i++)
4612           {
4613           if (eptr >= md->end_subject)
4614             {
4615             SCHECK_PARTIAL();
4616             RRETURN(MATCH_NOMATCH);
4617             }
4618           switch(*eptr++)
4619             {
4620             default: break;
4621             case 0x09:      /* HT */
4622             case 0x20:      /* SPACE */
4623             case 0xa0:      /* NBSP */
4624 #ifdef COMPILE_PCRE16
4625             case 0x1680:    /* OGHAM SPACE MARK */
4626             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4627             case 0x2000:    /* EN QUAD */
4628             case 0x2001:    /* EM QUAD */
4629             case 0x2002:    /* EN SPACE */
4630             case 0x2003:    /* EM SPACE */
4631             case 0x2004:    /* THREE-PER-EM SPACE */
4632             case 0x2005:    /* FOUR-PER-EM SPACE */
4633             case 0x2006:    /* SIX-PER-EM SPACE */
4634             case 0x2007:    /* FIGURE SPACE */
4635             case 0x2008:    /* PUNCTUATION SPACE */
4636             case 0x2009:    /* THIN SPACE */
4637             case 0x200A:    /* HAIR SPACE */
4638             case 0x202f:    /* NARROW NO-BREAK SPACE */
4639             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4640             case 0x3000:    /* IDEOGRAPHIC SPACE */
4641 #endif
4642             RRETURN(MATCH_NOMATCH);
4643             }
4644           }
4645         break;
4646
4647         case OP_HSPACE:
4648         for (i = 1; i <= min; i++)
4649           {
4650           if (eptr >= md->end_subject)
4651             {
4652             SCHECK_PARTIAL();
4653             RRETURN(MATCH_NOMATCH);
4654             }
4655           switch(*eptr++)
4656             {
4657             default: RRETURN(MATCH_NOMATCH);
4658             case 0x09:      /* HT */
4659             case 0x20:      /* SPACE */
4660             case 0xa0:      /* NBSP */
4661 #ifdef COMPILE_PCRE16
4662             case 0x1680:    /* OGHAM SPACE MARK */
4663             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4664             case 0x2000:    /* EN QUAD */
4665             case 0x2001:    /* EM QUAD */
4666             case 0x2002:    /* EN SPACE */
4667             case 0x2003:    /* EM SPACE */
4668             case 0x2004:    /* THREE-PER-EM SPACE */
4669             case 0x2005:    /* FOUR-PER-EM SPACE */
4670             case 0x2006:    /* SIX-PER-EM SPACE */
4671             case 0x2007:    /* FIGURE SPACE */
4672             case 0x2008:    /* PUNCTUATION SPACE */
4673             case 0x2009:    /* THIN SPACE */
4674             case 0x200A:    /* HAIR SPACE */
4675             case 0x202f:    /* NARROW NO-BREAK SPACE */
4676             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4677             case 0x3000:    /* IDEOGRAPHIC SPACE */
4678 #endif
4679             break;
4680             }
4681           }
4682         break;
4683
4684         case OP_NOT_VSPACE:
4685         for (i = 1; i <= min; i++)
4686           {
4687           if (eptr >= md->end_subject)
4688             {
4689             SCHECK_PARTIAL();
4690             RRETURN(MATCH_NOMATCH);
4691             }
4692           switch(*eptr++)
4693             {
4694             default: break;
4695             case 0x0a:      /* LF */
4696             case 0x0b:      /* VT */
4697             case 0x0c:      /* FF */
4698             case 0x0d:      /* CR */
4699             case 0x85:      /* NEL */
4700 #ifdef COMPILE_PCRE16
4701             case 0x2028:    /* LINE SEPARATOR */
4702             case 0x2029:    /* PARAGRAPH SEPARATOR */
4703 #endif
4704             RRETURN(MATCH_NOMATCH);
4705             }
4706           }
4707         break;
4708
4709         case OP_VSPACE:
4710         for (i = 1; i <= min; i++)
4711           {
4712           if (eptr >= md->end_subject)
4713             {
4714             SCHECK_PARTIAL();
4715             RRETURN(MATCH_NOMATCH);
4716             }
4717           switch(*eptr++)
4718             {
4719             default: RRETURN(MATCH_NOMATCH);
4720             case 0x0a:      /* LF */
4721             case 0x0b:      /* VT */
4722             case 0x0c:      /* FF */
4723             case 0x0d:      /* CR */
4724             case 0x85:      /* NEL */
4725 #ifdef COMPILE_PCRE16
4726             case 0x2028:    /* LINE SEPARATOR */
4727             case 0x2029:    /* PARAGRAPH SEPARATOR */
4728 #endif
4729             break;
4730             }
4731           }
4732         break;
4733
4734         case OP_NOT_DIGIT:
4735         for (i = 1; i <= min; i++)
4736           {
4737           if (eptr >= md->end_subject)
4738             {
4739             SCHECK_PARTIAL();
4740             RRETURN(MATCH_NOMATCH);
4741             }
4742           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
4743             RRETURN(MATCH_NOMATCH);
4744           eptr++;
4745           }
4746         break;
4747
4748         case OP_DIGIT:
4749         for (i = 1; i <= min; i++)
4750           {
4751           if (eptr >= md->end_subject)
4752             {
4753             SCHECK_PARTIAL();
4754             RRETURN(MATCH_NOMATCH);
4755             }
4756           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
4757             RRETURN(MATCH_NOMATCH);
4758           eptr++;
4759           }
4760         break;
4761
4762         case OP_NOT_WHITESPACE:
4763         for (i = 1; i <= min; i++)
4764           {
4765           if (eptr >= md->end_subject)
4766             {
4767             SCHECK_PARTIAL();
4768             RRETURN(MATCH_NOMATCH);
4769             }
4770           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
4771             RRETURN(MATCH_NOMATCH);
4772           eptr++;
4773           }
4774         break;
4775
4776         case OP_WHITESPACE:
4777         for (i = 1; i <= min; i++)
4778           {
4779           if (eptr >= md->end_subject)
4780             {
4781             SCHECK_PARTIAL();
4782             RRETURN(MATCH_NOMATCH);
4783             }
4784           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
4785             RRETURN(MATCH_NOMATCH);
4786           eptr++;
4787           }
4788         break;
4789
4790         case OP_NOT_WORDCHAR:
4791         for (i = 1; i <= min; i++)
4792           {
4793           if (eptr >= md->end_subject)
4794             {
4795             SCHECK_PARTIAL();
4796             RRETURN(MATCH_NOMATCH);
4797             }
4798           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
4799             RRETURN(MATCH_NOMATCH);
4800           eptr++;
4801           }
4802         break;
4803
4804         case OP_WORDCHAR:
4805         for (i = 1; i <= min; i++)
4806           {
4807           if (eptr >= md->end_subject)
4808             {
4809             SCHECK_PARTIAL();
4810             RRETURN(MATCH_NOMATCH);
4811             }
4812           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
4813             RRETURN(MATCH_NOMATCH);
4814           eptr++;
4815           }
4816         break;
4817
4818         default:
4819         RRETURN(PCRE_ERROR_INTERNAL);
4820         }
4821       }
4822
4823     /* If min = max, continue at the same level without recursing */
4824
4825     if (min == max) continue;
4826
4827     /* If minimizing, we have to test the rest of the pattern before each
4828     subsequent match. Again, separate the UTF-8 case for speed, and also
4829     separate the UCP cases. */
4830
4831     if (minimize)
4832       {
4833 #ifdef SUPPORT_UCP
4834       if (prop_type >= 0)
4835         {
4836         switch(prop_type)
4837           {
4838           case PT_ANY:
4839           for (fi = min;; fi++)
4840             {
4841             RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
4842             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4843             if (fi >= max) RRETURN(MATCH_NOMATCH);
4844             if (eptr >= md->end_subject)
4845               {
4846               SCHECK_PARTIAL();
4847               RRETURN(MATCH_NOMATCH);
4848               }
4849             GETCHARINCTEST(c, eptr);
4850             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4851             }
4852           /* Control never gets here */
4853
4854           case PT_LAMP:
4855           for (fi = min;; fi++)
4856             {
4857             int chartype;
4858             RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
4859             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4860             if (fi >= max) RRETURN(MATCH_NOMATCH);
4861             if (eptr >= md->end_subject)
4862               {
4863               SCHECK_PARTIAL();
4864               RRETURN(MATCH_NOMATCH);
4865               }
4866             GETCHARINCTEST(c, eptr);
4867             chartype = UCD_CHARTYPE(c);
4868             if ((chartype == ucp_Lu ||
4869                  chartype == ucp_Ll ||
4870                  chartype == ucp_Lt) == prop_fail_result)
4871               RRETURN(MATCH_NOMATCH);
4872             }
4873           /* Control never gets here */
4874
4875           case PT_GC:
4876           for (fi = min;; fi++)
4877             {
4878             RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
4879             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4880             if (fi >= max) RRETURN(MATCH_NOMATCH);
4881             if (eptr >= md->end_subject)
4882               {
4883               SCHECK_PARTIAL();
4884               RRETURN(MATCH_NOMATCH);
4885               }
4886             GETCHARINCTEST(c, eptr);
4887             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4888               RRETURN(MATCH_NOMATCH);
4889             }
4890           /* Control never gets here */
4891
4892           case PT_PC:
4893           for (fi = min;; fi++)
4894             {
4895             RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
4896             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4897             if (fi >= max) RRETURN(MATCH_NOMATCH);
4898             if (eptr >= md->end_subject)
4899               {
4900               SCHECK_PARTIAL();
4901               RRETURN(MATCH_NOMATCH);
4902               }
4903             GETCHARINCTEST(c, eptr);
4904             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4905               RRETURN(MATCH_NOMATCH);
4906             }
4907           /* Control never gets here */
4908
4909           case PT_SC:
4910           for (fi = min;; fi++)
4911             {
4912             RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
4913             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4914             if (fi >= max) RRETURN(MATCH_NOMATCH);
4915             if (eptr >= md->end_subject)
4916               {
4917               SCHECK_PARTIAL();
4918               RRETURN(MATCH_NOMATCH);
4919               }
4920             GETCHARINCTEST(c, eptr);
4921             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4922               RRETURN(MATCH_NOMATCH);
4923             }
4924           /* Control never gets here */
4925
4926           case PT_ALNUM:
4927           for (fi = min;; fi++)
4928             {
4929             int category;
4930             RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
4931             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4932             if (fi >= max) RRETURN(MATCH_NOMATCH);
4933             if (eptr >= md->end_subject)
4934               {
4935               SCHECK_PARTIAL();
4936               RRETURN(MATCH_NOMATCH);
4937               }
4938             GETCHARINCTEST(c, eptr);
4939             category = UCD_CATEGORY(c);
4940             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4941               RRETURN(MATCH_NOMATCH);
4942             }
4943           /* Control never gets here */
4944
4945           case PT_SPACE:    /* Perl space */
4946           for (fi = min;; fi++)
4947             {
4948             RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
4949             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4950             if (fi >= max) RRETURN(MATCH_NOMATCH);
4951             if (eptr >= md->end_subject)
4952               {
4953               SCHECK_PARTIAL();
4954               RRETURN(MATCH_NOMATCH);
4955               }
4956             GETCHARINCTEST(c, eptr);
4957             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4958                  c == CHAR_FF || c == CHAR_CR)
4959                    == prop_fail_result)
4960               RRETURN(MATCH_NOMATCH);
4961             }
4962           /* Control never gets here */
4963
4964           case PT_PXSPACE:  /* POSIX space */
4965           for (fi = min;; fi++)
4966             {
4967             RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
4968             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4969             if (fi >= max) RRETURN(MATCH_NOMATCH);
4970             if (eptr >= md->end_subject)
4971               {
4972               SCHECK_PARTIAL();
4973               RRETURN(MATCH_NOMATCH);
4974               }
4975             GETCHARINCTEST(c, eptr);
4976             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4977                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4978                    == prop_fail_result)
4979               RRETURN(MATCH_NOMATCH);
4980             }
4981           /* Control never gets here */
4982
4983           case PT_WORD:
4984           for (fi = min;; fi++)
4985             {
4986             int category;
4987             RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
4988             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4989             if (fi >= max) RRETURN(MATCH_NOMATCH);
4990             if (eptr >= md->end_subject)
4991               {
4992               SCHECK_PARTIAL();
4993               RRETURN(MATCH_NOMATCH);
4994               }
4995             GETCHARINCTEST(c, eptr);
4996             category = UCD_CATEGORY(c);
4997             if ((category == ucp_L ||
4998                  category == ucp_N ||
4999                  c == CHAR_UNDERSCORE)
5000                    == prop_fail_result)
5001               RRETURN(MATCH_NOMATCH);
5002             }
5003           /* Control never gets here */
5004
5005           /* This should never occur */
5006
5007           default:
5008           RRETURN(PCRE_ERROR_INTERNAL);
5009           }
5010         }
5011
5012       /* Match extended Unicode sequences. We will get here only if the
5013       support is in the binary; otherwise a compile-time error occurs. */
5014
5015       else if (ctype == OP_EXTUNI)
5016         {
5017         for (fi = min;; fi++)
5018           {
5019           RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
5020           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5021           if (fi >= max) RRETURN(MATCH_NOMATCH);
5022           if (eptr >= md->end_subject)
5023             {
5024             SCHECK_PARTIAL();
5025             RRETURN(MATCH_NOMATCH);
5026             }
5027           GETCHARINCTEST(c, eptr);
5028           if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
5029           while (eptr < md->end_subject)
5030             {
5031             int len = 1;
5032             if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5033             if (UCD_CATEGORY(c) != ucp_M) break;
5034             eptr += len;
5035             }
5036           CHECK_PARTIAL();
5037           }
5038         }
5039       else
5040 #endif     /* SUPPORT_UCP */
5041
5042 #ifdef SUPPORT_UTF
5043       if (utf)
5044         {
5045         for (fi = min;; fi++)
5046           {
5047           RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
5048           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5049           if (fi >= max) RRETURN(MATCH_NOMATCH);
5050           if (eptr >= md->end_subject)
5051             {
5052             SCHECK_PARTIAL();
5053             RRETURN(MATCH_NOMATCH);
5054             }
5055           if (ctype == OP_ANY && IS_NEWLINE(eptr))
5056             RRETURN(MATCH_NOMATCH);
5057           GETCHARINC(c, eptr);
5058           switch(ctype)
5059             {
5060             case OP_ANY:               /* This is the non-NL case */
5061             if (md->partial != 0 &&    /* Take care with CRLF partial */
5062                 eptr >= md->end_subject &&
5063                 NLBLOCK->nltype == NLTYPE_FIXED &&
5064                 NLBLOCK->nllen == 2 &&
5065                 c == NLBLOCK->nl[0])
5066               {
5067               md->hitend = TRUE;
5068               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5069               }
5070             break;
5071
5072             case OP_ALLANY:
5073             case OP_ANYBYTE:
5074             break;
5075
5076             case OP_ANYNL:
5077             switch(c)
5078               {
5079               default: RRETURN(MATCH_NOMATCH);
5080               case 0x000d:
5081               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
5082               break;
5083               case 0x000a:
5084               break;
5085
5086               case 0x000b:
5087               case 0x000c:
5088               case 0x0085:
5089               case 0x2028:
5090               case 0x2029:
5091               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5092               break;
5093               }
5094             break;
5095
5096             case OP_NOT_HSPACE:
5097             switch(c)
5098               {
5099               default: break;
5100               case 0x09:      /* HT */
5101               case 0x20:      /* SPACE */
5102               case 0xa0:      /* NBSP */
5103               case 0x1680:    /* OGHAM SPACE MARK */
5104               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5105               case 0x2000:    /* EN QUAD */
5106               case 0x2001:    /* EM QUAD */
5107               case 0x2002:    /* EN SPACE */
5108               case 0x2003:    /* EM SPACE */
5109               case 0x2004:    /* THREE-PER-EM SPACE */
5110               case 0x2005:    /* FOUR-PER-EM SPACE */
5111               case 0x2006:    /* SIX-PER-EM SPACE */
5112               case 0x2007:    /* FIGURE SPACE */
5113               case 0x2008:    /* PUNCTUATION SPACE */
5114               case 0x2009:    /* THIN SPACE */
5115               case 0x200A:    /* HAIR SPACE */
5116               case 0x202f:    /* NARROW NO-BREAK SPACE */
5117               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5118               case 0x3000:    /* IDEOGRAPHIC SPACE */
5119               RRETURN(MATCH_NOMATCH);
5120               }
5121             break;
5122
5123             case OP_HSPACE:
5124             switch(c)
5125               {
5126               default: RRETURN(MATCH_NOMATCH);
5127               case 0x09:      /* HT */
5128               case 0x20:      /* SPACE */
5129               case 0xa0:      /* NBSP */
5130               case 0x1680:    /* OGHAM SPACE MARK */
5131               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5132               case 0x2000:    /* EN QUAD */
5133               case 0x2001:    /* EM QUAD */
5134               case 0x2002:    /* EN SPACE */
5135               case 0x2003:    /* EM SPACE */
5136               case 0x2004:    /* THREE-PER-EM SPACE */
5137               case 0x2005:    /* FOUR-PER-EM SPACE */
5138               case 0x2006:    /* SIX-PER-EM SPACE */
5139               case 0x2007:    /* FIGURE SPACE */
5140               case 0x2008:    /* PUNCTUATION SPACE */
5141               case 0x2009:    /* THIN SPACE */
5142               case 0x200A:    /* HAIR SPACE */
5143               case 0x202f:    /* NARROW NO-BREAK SPACE */
5144               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5145               case 0x3000:    /* IDEOGRAPHIC SPACE */
5146               break;
5147               }
5148             break;
5149
5150             case OP_NOT_VSPACE:
5151             switch(c)
5152               {
5153               default: break;
5154               case 0x0a:      /* LF */
5155               case 0x0b:      /* VT */
5156               case 0x0c:      /* FF */
5157               case 0x0d:      /* CR */
5158               case 0x85:      /* NEL */
5159               case 0x2028:    /* LINE SEPARATOR */
5160               case 0x2029:    /* PARAGRAPH SEPARATOR */
5161               RRETURN(MATCH_NOMATCH);
5162               }
5163             break;
5164
5165             case OP_VSPACE:
5166             switch(c)
5167               {
5168               default: RRETURN(MATCH_NOMATCH);
5169               case 0x0a:      /* LF */
5170               case 0x0b:      /* VT */
5171               case 0x0c:      /* FF */
5172               case 0x0d:      /* CR */
5173               case 0x85:      /* NEL */
5174               case 0x2028:    /* LINE SEPARATOR */
5175               case 0x2029:    /* PARAGRAPH SEPARATOR */
5176               break;
5177               }
5178             break;
5179
5180             case OP_NOT_DIGIT:
5181             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
5182               RRETURN(MATCH_NOMATCH);
5183             break;
5184
5185             case OP_DIGIT:
5186             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
5187               RRETURN(MATCH_NOMATCH);
5188             break;
5189
5190             case OP_NOT_WHITESPACE:
5191             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
5192               RRETURN(MATCH_NOMATCH);
5193             break;
5194
5195             case OP_WHITESPACE:
5196             if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
5197               RRETURN(MATCH_NOMATCH);
5198             break;
5199
5200             case OP_NOT_WORDCHAR:
5201             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
5202               RRETURN(MATCH_NOMATCH);
5203             break;
5204
5205             case OP_WORDCHAR:
5206             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
5207               RRETURN(MATCH_NOMATCH);
5208             break;
5209
5210             default:
5211             RRETURN(PCRE_ERROR_INTERNAL);
5212             }
5213           }
5214         }
5215       else
5216 #endif
5217       /* Not UTF mode */
5218         {
5219         for (fi = min;; fi++)
5220           {
5221           RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
5222           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5223           if (fi >= max) RRETURN(MATCH_NOMATCH);
5224           if (eptr >= md->end_subject)
5225             {
5226             SCHECK_PARTIAL();
5227             RRETURN(MATCH_NOMATCH);
5228             }
5229           if (ctype == OP_ANY && IS_NEWLINE(eptr))
5230             RRETURN(MATCH_NOMATCH);
5231           c = *eptr++;
5232           switch(ctype)
5233             {
5234             case OP_ANY:               /* This is the non-NL case */
5235             if (md->partial != 0 &&    /* Take care with CRLF partial */
5236                 eptr >= md->end_subject &&
5237                 NLBLOCK->nltype == NLTYPE_FIXED &&
5238                 NLBLOCK->nllen == 2 &&
5239                 c == NLBLOCK->nl[0])
5240               {
5241               md->hitend = TRUE;
5242               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5243               }
5244             break;
5245
5246             case OP_ALLANY:
5247             case OP_ANYBYTE:
5248             break;
5249
5250             case OP_ANYNL:
5251             switch(c)
5252               {
5253               default: RRETURN(MATCH_NOMATCH);
5254               case 0x000d:
5255               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
5256               break;
5257
5258               case 0x000a:
5259               break;
5260
5261               case 0x000b:
5262               case 0x000c:
5263               case 0x0085:
5264 #ifdef COMPILE_PCRE16
5265               case 0x2028:
5266               case 0x2029:
5267 #endif
5268               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5269               break;
5270               }
5271             break;
5272
5273             case OP_NOT_HSPACE:
5274             switch(c)
5275               {
5276               default: break;
5277               case 0x09:      /* HT */
5278               case 0x20:      /* SPACE */
5279               case 0xa0:      /* NBSP */
5280 #ifdef COMPILE_PCRE16
5281               case 0x1680:    /* OGHAM SPACE MARK */
5282               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5283               case 0x2000:    /* EN QUAD */
5284               case 0x2001:    /* EM QUAD */
5285               case 0x2002:    /* EN SPACE */
5286               case 0x2003:    /* EM SPACE */
5287               case 0x2004:    /* THREE-PER-EM SPACE */
5288               case 0x2005:    /* FOUR-PER-EM SPACE */
5289               case 0x2006:    /* SIX-PER-EM SPACE */
5290               case 0x2007:    /* FIGURE SPACE */
5291               case 0x2008:    /* PUNCTUATION SPACE */
5292               case 0x2009:    /* THIN SPACE */
5293               case 0x200A:    /* HAIR SPACE */
5294               case 0x202f:    /* NARROW NO-BREAK SPACE */
5295               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5296               case 0x3000:    /* IDEOGRAPHIC SPACE */
5297 #endif
5298               RRETURN(MATCH_NOMATCH);
5299               }
5300             break;
5301
5302             case OP_HSPACE:
5303             switch(c)
5304               {
5305               default: RRETURN(MATCH_NOMATCH);
5306               case 0x09:      /* HT */
5307               case 0x20:      /* SPACE */
5308               case 0xa0:      /* NBSP */
5309 #ifdef COMPILE_PCRE16
5310               case 0x1680:    /* OGHAM SPACE MARK */
5311               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5312               case 0x2000:    /* EN QUAD */
5313               case 0x2001:    /* EM QUAD */
5314               case 0x2002:    /* EN SPACE */
5315               case 0x2003:    /* EM SPACE */
5316               case 0x2004:    /* THREE-PER-EM SPACE */
5317               case 0x2005:    /* FOUR-PER-EM SPACE */
5318               case 0x2006:    /* SIX-PER-EM SPACE */
5319               case 0x2007:    /* FIGURE SPACE */
5320               case 0x2008:    /* PUNCTUATION SPACE */
5321               case 0x2009:    /* THIN SPACE */
5322               case 0x200A:    /* HAIR SPACE */
5323               case 0x202f:    /* NARROW NO-BREAK SPACE */
5324               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5325               case 0x3000:    /* IDEOGRAPHIC SPACE */
5326 #endif
5327               break;
5328               }
5329             break;
5330
5331             case OP_NOT_VSPACE:
5332             switch(c)
5333               {
5334               default: break;
5335               case 0x0a:      /* LF */
5336               case 0x0b:      /* VT */
5337               case 0x0c:      /* FF */
5338               case 0x0d:      /* CR */
5339               case 0x85:      /* NEL */
5340 #ifdef COMPILE_PCRE16
5341               case 0x2028:    /* LINE SEPARATOR */
5342               case 0x2029:    /* PARAGRAPH SEPARATOR */
5343 #endif
5344               RRETURN(MATCH_NOMATCH);
5345               }
5346             break;
5347
5348             case OP_VSPACE:
5349             switch(c)
5350               {
5351               default: RRETURN(MATCH_NOMATCH);
5352               case 0x0a:      /* LF */
5353               case 0x0b:      /* VT */
5354               case 0x0c:      /* FF */
5355               case 0x0d:      /* CR */
5356               case 0x85:      /* NEL */
5357 #ifdef COMPILE_PCRE16
5358               case 0x2028:    /* LINE SEPARATOR */
5359               case 0x2029:    /* PARAGRAPH SEPARATOR */
5360 #endif
5361               break;
5362               }
5363             break;
5364
5365             case OP_NOT_DIGIT:
5366             if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
5367             break;
5368
5369             case OP_DIGIT:
5370             if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
5371             break;
5372
5373             case OP_NOT_WHITESPACE:
5374             if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
5375             break;
5376
5377             case OP_WHITESPACE:
5378             if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
5379             break;
5380
5381             case OP_NOT_WORDCHAR:
5382             if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
5383             break;
5384
5385             case OP_WORDCHAR:
5386             if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
5387             break;
5388
5389             default:
5390             RRETURN(PCRE_ERROR_INTERNAL);
5391             }
5392           }
5393         }
5394       /* Control never gets here */
5395       }
5396
5397     /* If maximizing, it is worth using inline code for speed, doing the type
5398     test once at the start (i.e. keep it out of the loop). Again, keep the
5399     UTF-8 and UCP stuff separate. */
5400
5401     else
5402       {
5403       pp = eptr;  /* Remember where we started */
5404
5405 #ifdef SUPPORT_UCP
5406       if (prop_type >= 0)
5407         {
5408         switch(prop_type)
5409           {
5410           case PT_ANY:
5411           for (i = min; i < max; i++)
5412             {
5413             int len = 1;
5414             if (eptr >= md->end_subject)
5415               {
5416               SCHECK_PARTIAL();
5417               break;
5418               }
5419             GETCHARLENTEST(c, eptr, len);
5420             if (prop_fail_result) break;
5421             eptr+= len;
5422             }
5423           break;
5424
5425           case PT_LAMP:
5426           for (i = min; i < max; i++)
5427             {
5428             int chartype;
5429             int len = 1;
5430             if (eptr >= md->end_subject)
5431               {
5432               SCHECK_PARTIAL();
5433               break;
5434               }
5435             GETCHARLENTEST(c, eptr, len);
5436             chartype = UCD_CHARTYPE(c);
5437             if ((chartype == ucp_Lu ||
5438                  chartype == ucp_Ll ||
5439                  chartype == ucp_Lt) == prop_fail_result)
5440               break;
5441             eptr+= len;
5442             }
5443           break;
5444
5445           case PT_GC:
5446           for (i = min; i < max; i++)
5447             {
5448             int len = 1;
5449             if (eptr >= md->end_subject)
5450               {
5451               SCHECK_PARTIAL();
5452               break;
5453               }
5454             GETCHARLENTEST(c, eptr, len);
5455             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
5456             eptr+= len;
5457             }
5458           break;
5459
5460           case PT_PC:
5461           for (i = min; i < max; i++)
5462             {
5463             int len = 1;
5464             if (eptr >= md->end_subject)
5465               {
5466               SCHECK_PARTIAL();
5467               break;
5468               }
5469             GETCHARLENTEST(c, eptr, len);
5470             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
5471             eptr+= len;
5472             }
5473           break;
5474
5475           case PT_SC:
5476           for (i = min; i < max; i++)
5477             {
5478             int len = 1;
5479             if (eptr >= md->end_subject)
5480               {
5481               SCHECK_PARTIAL();
5482               break;
5483               }
5484             GETCHARLENTEST(c, eptr, len);
5485             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
5486             eptr+= len;
5487             }
5488           break;
5489
5490           case PT_ALNUM:
5491           for (i = min; i < max; i++)
5492             {
5493             int category;
5494             int len = 1;
5495             if (eptr >= md->end_subject)
5496               {
5497               SCHECK_PARTIAL();
5498               break;
5499               }
5500             GETCHARLENTEST(c, eptr, len);
5501             category = UCD_CATEGORY(c);
5502             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
5503               break;
5504             eptr+= len;
5505             }
5506           break;
5507
5508           case PT_SPACE:    /* Perl space */
5509           for (i = min; i < max; i++)
5510             {
5511             int len = 1;
5512             if (eptr >= md->end_subject)
5513               {
5514               SCHECK_PARTIAL();
5515               break;
5516               }
5517             GETCHARLENTEST(c, eptr, len);
5518             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
5519                  c == CHAR_FF || c == CHAR_CR)
5520                  == prop_fail_result)
5521               break;
5522             eptr+= len;
5523             }
5524           break;
5525
5526           case PT_PXSPACE:  /* POSIX space */
5527           for (i = min; i < max; i++)
5528             {
5529             int len = 1;
5530             if (eptr >= md->end_subject)
5531               {
5532               SCHECK_PARTIAL();
5533               break;
5534               }
5535             GETCHARLENTEST(c, eptr, len);
5536             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
5537                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
5538                  == prop_fail_result)
5539               break;
5540             eptr+= len;
5541             }
5542           break;
5543
5544           case PT_WORD:
5545           for (i = min; i < max; i++)
5546             {
5547             int category;
5548             int len = 1;
5549             if (eptr >= md->end_subject)
5550               {
5551               SCHECK_PARTIAL();
5552               break;
5553               }
5554             GETCHARLENTEST(c, eptr, len);
5555             category = UCD_CATEGORY(c);
5556             if ((category == ucp_L || category == ucp_N ||
5557                  c == CHAR_UNDERSCORE) == prop_fail_result)
5558               break;
5559             eptr+= len;
5560             }
5561           break;
5562
5563           default:
5564           RRETURN(PCRE_ERROR_INTERNAL);
5565           }
5566
5567         /* eptr is now past the end of the maximum run */
5568
5569         if (possessive) continue;
5570         for(;;)
5571           {
5572           RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5573           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5574           if (eptr-- == pp) break;        /* Stop if tried at original pos */
5575           if (utf) BACKCHAR(eptr);
5576           }
5577         }
5578
5579       /* Match extended Unicode sequences. We will get here only if the
5580       support is in the binary; otherwise a compile-time error occurs. */
5581
5582       else if (ctype == OP_EXTUNI)
5583         {
5584         for (i = min; i < max; i++)
5585           {
5586           int len = 1;
5587           if (eptr >= md->end_subject)
5588             {
5589             SCHECK_PARTIAL();
5590             break;
5591             }
5592           if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5593           if (UCD_CATEGORY(c) == ucp_M) break;
5594           eptr += len;
5595           while (eptr < md->end_subject)
5596             {
5597             len = 1;
5598             if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5599             if (UCD_CATEGORY(c) != ucp_M) break;
5600             eptr += len;
5601             }
5602           CHECK_PARTIAL();
5603           }
5604
5605         /* eptr is now past the end of the maximum run */
5606
5607         if (possessive) continue;
5608
5609         for(;;)
5610           {
5611           RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
5612           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5613           if (eptr-- == pp) break;        /* Stop if tried at original pos */
5614           for (;;)                        /* Move back over one extended */
5615             {
5616             if (!utf) c = *eptr; else
5617               {
5618               BACKCHAR(eptr);
5619               GETCHAR(c, eptr);
5620               }
5621             if (UCD_CATEGORY(c) != ucp_M) break;
5622             eptr--;
5623             }
5624           }
5625         }
5626
5627       else
5628 #endif   /* SUPPORT_UCP */
5629
5630 #ifdef SUPPORT_UTF
5631       if (utf)
5632         {
5633         switch(ctype)
5634           {
5635           case OP_ANY:
5636           if (max < INT_MAX)
5637             {
5638             for (i = min; i < max; i++)
5639               {
5640               if (eptr >= md->end_subject)
5641                 {
5642                 SCHECK_PARTIAL();
5643                 break;
5644                 }
5645               if (IS_NEWLINE(eptr)) break;
5646               if (md->partial != 0 &&    /* Take care with CRLF partial */
5647                   eptr + 1 >= md->end_subject &&
5648                   NLBLOCK->nltype == NLTYPE_FIXED &&
5649                   NLBLOCK->nllen == 2 &&
5650                   *eptr == NLBLOCK->nl[0])
5651                 {
5652                 md->hitend = TRUE;
5653                 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5654                 }
5655               eptr++;
5656               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5657               }
5658             }
5659
5660           /* Handle unlimited UTF-8 repeat */
5661
5662           else
5663             {
5664             for (i = min; i < max; i++)
5665               {
5666               if (eptr >= md->end_subject)
5667                 {
5668                 SCHECK_PARTIAL();
5669                 break;
5670                 }
5671               if (IS_NEWLINE(eptr)) break;
5672               if (md->partial != 0 &&    /* Take care with CRLF partial */
5673                   eptr + 1 >= md->end_subject &&
5674                   NLBLOCK->nltype == NLTYPE_FIXED &&
5675                   NLBLOCK->nllen == 2 &&
5676                   *eptr == NLBLOCK->nl[0])
5677                 {
5678                 md->hitend = TRUE;
5679                 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5680                 }
5681               eptr++;
5682               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5683               }
5684             }
5685           break;
5686
5687           case OP_ALLANY:
5688           if (max < INT_MAX)
5689             {
5690             for (i = min; i < max; i++)
5691               {
5692               if (eptr >= md->end_subject)
5693                 {
5694                 SCHECK_PARTIAL();
5695                 break;
5696                 }
5697               eptr++;
5698               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5699               }
5700             }
5701           else
5702             {
5703             eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
5704             SCHECK_PARTIAL();
5705             }
5706           break;
5707
5708           /* The byte case is the same as non-UTF8 */
5709
5710           case OP_ANYBYTE:
5711           c = max - min;
5712           if (c > (unsigned int)(md->end_subject - eptr))
5713             {
5714             eptr = md->end_subject;
5715             SCHECK_PARTIAL();
5716             }
5717           else eptr += c;
5718           break;
5719
5720           case OP_ANYNL:
5721           for (i = min; i < max; i++)
5722             {
5723             int len = 1;
5724             if (eptr >= md->end_subject)
5725               {
5726               SCHECK_PARTIAL();
5727               break;
5728               }
5729             GETCHARLEN(c, eptr, len);
5730             if (c == 0x000d)
5731               {
5732               if (++eptr >= md->end_subject) break;
5733               if (*eptr == 0x000a) eptr++;
5734               }
5735             else
5736               {
5737               if (c != 0x000a &&
5738                   (md->bsr_anycrlf ||
5739                    (c != 0x000b && c != 0x000c &&
5740                     c != 0x0085 && c != 0x2028 && c != 0x2029)))
5741                 break;
5742               eptr += len;
5743               }
5744             }
5745           break;
5746
5747           case OP_NOT_HSPACE:
5748           case OP_HSPACE:
5749           for (i = min; i < max; i++)
5750             {
5751             BOOL gotspace;
5752             int len = 1;
5753             if (eptr >= md->end_subject)
5754               {
5755               SCHECK_PARTIAL();
5756               break;
5757               }
5758             GETCHARLEN(c, eptr, len);
5759             switch(c)
5760               {
5761               default: gotspace = FALSE; break;
5762               case 0x09:      /* HT */
5763               case 0x20:      /* SPACE */
5764               case 0xa0:      /* NBSP */
5765               case 0x1680:    /* OGHAM SPACE MARK */
5766               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5767               case 0x2000:    /* EN QUAD */
5768               case 0x2001:    /* EM QUAD */
5769               case 0x2002:    /* EN SPACE */
5770               case 0x2003:    /* EM SPACE */
5771               case 0x2004:    /* THREE-PER-EM SPACE */
5772               case 0x2005:    /* FOUR-PER-EM SPACE */
5773               case 0x2006:    /* SIX-PER-EM SPACE */
5774               case 0x2007:    /* FIGURE SPACE */
5775               case 0x2008:    /* PUNCTUATION SPACE */
5776               case 0x2009:    /* THIN SPACE */
5777               case 0x200A:    /* HAIR SPACE */
5778               case 0x202f:    /* NARROW NO-BREAK SPACE */
5779               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5780               case 0x3000:    /* IDEOGRAPHIC SPACE */
5781               gotspace = TRUE;
5782               break;
5783               }
5784             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5785             eptr += len;
5786             }
5787           break;
5788
5789           case OP_NOT_VSPACE:
5790           case OP_VSPACE:
5791           for (i = min; i < max; i++)
5792             {
5793             BOOL gotspace;
5794             int len = 1;
5795             if (eptr >= md->end_subject)
5796               {
5797               SCHECK_PARTIAL();
5798               break;
5799               }
5800             GETCHARLEN(c, eptr, len);
5801             switch(c)
5802               {
5803               default: gotspace = FALSE; break;
5804               case 0x0a:      /* LF */
5805               case 0x0b:      /* VT */
5806               case 0x0c:      /* FF */
5807               case 0x0d:      /* CR */
5808               case 0x85:      /* NEL */
5809               case 0x2028:    /* LINE SEPARATOR */
5810               case 0x2029:    /* PARAGRAPH SEPARATOR */
5811               gotspace = TRUE;
5812               break;
5813               }
5814             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5815             eptr += len;
5816             }
5817           break;
5818
5819           case OP_NOT_DIGIT:
5820           for (i = min; i < max; i++)
5821             {
5822             int len = 1;
5823             if (eptr >= md->end_subject)
5824               {
5825               SCHECK_PARTIAL();
5826               break;
5827               }
5828             GETCHARLEN(c, eptr, len);
5829             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
5830             eptr+= len;
5831             }
5832           break;
5833
5834           case OP_DIGIT:
5835           for (i = min; i < max; i++)
5836             {
5837             int len = 1;
5838             if (eptr >= md->end_subject)
5839               {
5840               SCHECK_PARTIAL();
5841               break;
5842               }
5843             GETCHARLEN(c, eptr, len);
5844             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
5845             eptr+= len;
5846             }
5847           break;
5848
5849           case OP_NOT_WHITESPACE:
5850           for (i = min; i < max; i++)
5851             {
5852             int len = 1;
5853             if (eptr >= md->end_subject)
5854               {
5855               SCHECK_PARTIAL();
5856               break;
5857               }
5858             GETCHARLEN(c, eptr, len);
5859             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
5860             eptr+= len;
5861             }
5862           break;
5863
5864           case OP_WHITESPACE:
5865           for (i = min; i < max; i++)
5866             {
5867             int len = 1;
5868             if (eptr >= md->end_subject)
5869               {
5870               SCHECK_PARTIAL();
5871               break;
5872               }
5873             GETCHARLEN(c, eptr, len);
5874             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
5875             eptr+= len;
5876             }
5877           break;
5878
5879           case OP_NOT_WORDCHAR:
5880           for (i = min; i < max; i++)
5881             {
5882             int len = 1;
5883             if (eptr >= md->end_subject)
5884               {
5885               SCHECK_PARTIAL();
5886               break;
5887               }
5888             GETCHARLEN(c, eptr, len);
5889             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
5890             eptr+= len;
5891             }
5892           break;
5893
5894           case OP_WORDCHAR:
5895           for (i = min; i < max; i++)
5896             {
5897             int len = 1;
5898             if (eptr >= md->end_subject)
5899               {
5900               SCHECK_PARTIAL();
5901               break;
5902               }
5903             GETCHARLEN(c, eptr, len);
5904             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
5905             eptr+= len;
5906             }
5907           break;
5908
5909           default:
5910           RRETURN(PCRE_ERROR_INTERNAL);
5911           }
5912
5913         /* eptr is now past the end of the maximum run. If possessive, we are
5914         done (no backing up). Otherwise, match at this position; anything other
5915         than no match is immediately returned. For nomatch, back up one
5916         character, unless we are matching \R and the last thing matched was
5917         \r\n, in which case, back up two bytes. */
5918
5919         if (possessive) continue;
5920         for(;;)
5921           {
5922           RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
5923           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5924           if (eptr-- == pp) break;        /* Stop if tried at original pos */
5925           BACKCHAR(eptr);
5926           if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
5927               eptr[-1] == '\r') eptr--;
5928           }
5929         }
5930       else
5931 #endif  /* SUPPORT_UTF */
5932       /* Not UTF mode */
5933         {
5934         switch(ctype)
5935           {
5936           case OP_ANY:
5937           for (i = min; i < max; i++)
5938             {
5939             if (eptr >= md->end_subject)
5940               {
5941               SCHECK_PARTIAL();
5942               break;
5943               }
5944             if (IS_NEWLINE(eptr)) break;
5945             if (md->partial != 0 &&    /* Take care with CRLF partial */
5946                 eptr + 1 >= md->end_subject &&
5947                 NLBLOCK->nltype == NLTYPE_FIXED &&
5948                 NLBLOCK->nllen == 2 &&
5949                 *eptr == NLBLOCK->nl[0])
5950               {
5951               md->hitend = TRUE;
5952               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5953               }
5954             eptr++;
5955             }
5956           break;
5957
5958           case OP_ALLANY:
5959           case OP_ANYBYTE:
5960           c = max - min;
5961           if (c > (unsigned int)(md->end_subject - eptr))
5962             {
5963             eptr = md->end_subject;
5964             SCHECK_PARTIAL();
5965             }
5966           else eptr += c;
5967           break;
5968
5969           case OP_ANYNL:
5970           for (i = min; i < max; i++)
5971             {
5972             if (eptr >= md->end_subject)
5973               {
5974               SCHECK_PARTIAL();
5975               break;
5976               }
5977             c = *eptr;
5978             if (c == 0x000d)
5979               {
5980               if (++eptr >= md->end_subject) break;
5981               if (*eptr == 0x000a) eptr++;
5982               }
5983             else
5984               {
5985               if (c != 0x000a && (md->bsr_anycrlf ||
5986                 (c != 0x000b && c != 0x000c && c != 0x0085
5987 #ifdef COMPILE_PCRE16
5988                 && c != 0x2028 && c != 0x2029
5989 #endif
5990                 ))) break;
5991               eptr++;
5992               }
5993             }
5994           break;
5995
5996           case OP_NOT_HSPACE:
5997           for (i = min; i < max; i++)
5998             {
5999             if (eptr >= md->end_subject)
6000               {
6001               SCHECK_PARTIAL();
6002               break;
6003               }
6004             c = *eptr;
6005             if (c == 0x09 || c == 0x20 || c == 0xa0
6006 #ifdef COMPILE_PCRE16
6007               || c == 0x1680 || c == 0x180e || (c >= 0x2000 && c <= 0x200A)
6008               || c == 0x202f || c == 0x205f || c == 0x3000
6009 #endif
6010               ) break;
6011             eptr++;
6012             }
6013           break;
6014
6015           case OP_HSPACE:
6016           for (i = min; i < max; i++)
6017             {
6018             if (eptr >= md->end_subject)
6019               {
6020               SCHECK_PARTIAL();
6021               break;
6022               }
6023             c = *eptr;
6024             if (c != 0x09 && c != 0x20 && c != 0xa0
6025 #ifdef COMPILE_PCRE16
6026               && c != 0x1680 && c != 0x180e && (c < 0x2000 || c > 0x200A)
6027               && c != 0x202f && c != 0x205f && c != 0x3000
6028 #endif
6029               ) break;
6030             eptr++;
6031             }
6032           break;
6033
6034           case OP_NOT_VSPACE:
6035           for (i = min; i < max; i++)
6036             {
6037             if (eptr >= md->end_subject)
6038               {
6039               SCHECK_PARTIAL();
6040               break;
6041               }
6042             c = *eptr;
6043             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85
6044 #ifdef COMPILE_PCRE16
6045               || c == 0x2028 || c == 0x2029
6046 #endif
6047               ) break;
6048             eptr++;
6049             }
6050           break;
6051
6052           case OP_VSPACE:
6053           for (i = min; i < max; i++)
6054             {
6055             if (eptr >= md->end_subject)
6056               {
6057               SCHECK_PARTIAL();
6058               break;
6059               }
6060             c = *eptr;
6061             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85
6062 #ifdef COMPILE_PCRE16
6063               && c != 0x2028 && c != 0x2029
6064 #endif
6065               ) break;
6066             eptr++;
6067             }
6068           break;
6069
6070           case OP_NOT_DIGIT:
6071           for (i = min; i < max; i++)
6072             {
6073             if (eptr >= md->end_subject)
6074               {
6075               SCHECK_PARTIAL();
6076               break;
6077               }
6078             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
6079             eptr++;
6080             }
6081           break;
6082
6083           case OP_DIGIT:
6084           for (i = min; i < max; i++)
6085             {
6086             if (eptr >= md->end_subject)
6087               {
6088               SCHECK_PARTIAL();
6089               break;
6090               }
6091             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
6092             eptr++;
6093             }
6094           break;
6095
6096           case OP_NOT_WHITESPACE:
6097           for (i = min; i < max; i++)
6098             {
6099             if (eptr >= md->end_subject)
6100               {
6101               SCHECK_PARTIAL();
6102               break;
6103               }
6104             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
6105             eptr++;
6106             }
6107           break;
6108
6109           case OP_WHITESPACE:
6110           for (i = min; i < max; i++)
6111             {
6112             if (eptr >= md->end_subject)
6113               {
6114               SCHECK_PARTIAL();
6115               break;
6116               }
6117             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
6118             eptr++;
6119             }
6120           break;
6121
6122           case OP_NOT_WORDCHAR:
6123           for (i = min; i < max; i++)
6124             {
6125             if (eptr >= md->end_subject)
6126               {
6127               SCHECK_PARTIAL();
6128               break;
6129               }
6130             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
6131             eptr++;
6132             }
6133           break;
6134
6135           case OP_WORDCHAR:
6136           for (i = min; i < max; i++)
6137             {
6138             if (eptr >= md->end_subject)
6139               {
6140               SCHECK_PARTIAL();
6141               break;
6142               }
6143             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
6144             eptr++;
6145             }
6146           break;
6147
6148           default:
6149           RRETURN(PCRE_ERROR_INTERNAL);
6150           }
6151
6152         /* eptr is now past the end of the maximum run. If possessive, we are
6153         done (no backing up). Otherwise, match at this position; anything other
6154         than no match is immediately returned. For nomatch, back up one
6155         character (byte), unless we are matching \R and the last thing matched
6156         was \r\n, in which case, back up two bytes. */
6157
6158         if (possessive) continue;
6159         while (eptr >= pp)
6160           {
6161           RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
6162           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6163           eptr--;
6164           if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
6165               eptr[-1] == '\r') eptr--;
6166           }
6167         }
6168
6169       /* Get here if we can't make it match with any permitted repetitions */
6170
6171       RRETURN(MATCH_NOMATCH);
6172       }
6173     /* Control never gets here */
6174
6175     /* There's been some horrible disaster. Arrival here can only mean there is
6176     something seriously wrong in the code above or the OP_xxx definitions. */
6177
6178     default:
6179     DPRINTF(("Unknown opcode %d\n", *ecode));
6180     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
6181     }
6182
6183   /* Do not stick any code in here without much thought; it is assumed
6184   that "continue" in the code above comes out to here to repeat the main
6185   loop. */
6186
6187   }             /* End of main loop */
6188 /* Control never reaches here */
6189
6190
6191 /* When compiling to use the heap rather than the stack for recursive calls to
6192 match(), the RRETURN() macro jumps here. The number that is saved in
6193 frame->Xwhere indicates which label we actually want to return to. */
6194
6195 #ifdef NO_RECURSE
6196 #define LBL(val) case val: goto L_RM##val;
6197 HEAP_RETURN:
6198 switch (frame->Xwhere)
6199   {
6200   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6201   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
6202   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
6203   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
6204   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
6205   LBL(65) LBL(66)
6206 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6207   LBL(21)
6208 #endif
6209 #ifdef SUPPORT_UTF
6210   LBL(16) LBL(18) LBL(20)
6211   LBL(22) LBL(23) LBL(28) LBL(30)
6212   LBL(32) LBL(34) LBL(42) LBL(46)
6213 #ifdef SUPPORT_UCP
6214   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6215   LBL(59) LBL(60) LBL(61) LBL(62)
6216 #endif  /* SUPPORT_UCP */
6217 #endif  /* SUPPORT_UTF */
6218   default:
6219   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
6220
6221 printf("+++jump error in pcre match: label %d non-existent\n", frame->Xwhere);
6222
6223   return PCRE_ERROR_INTERNAL;
6224   }
6225 #undef LBL
6226 #endif  /* NO_RECURSE */
6227 }
6228
6229
6230 /***************************************************************************
6231 ****************************************************************************
6232                    RECURSION IN THE match() FUNCTION
6233
6234 Undefine all the macros that were defined above to handle this. */
6235
6236 #ifdef NO_RECURSE
6237 #undef eptr
6238 #undef ecode
6239 #undef mstart
6240 #undef offset_top
6241 #undef eptrb
6242 #undef flags
6243
6244 #undef callpat
6245 #undef charptr
6246 #undef data
6247 #undef next
6248 #undef pp
6249 #undef prev
6250 #undef saved_eptr
6251
6252 #undef new_recursive
6253
6254 #undef cur_is_word
6255 #undef condition
6256 #undef prev_is_word
6257
6258 #undef ctype
6259 #undef length
6260 #undef max
6261 #undef min
6262 #undef number
6263 #undef offset
6264 #undef op
6265 #undef save_capture_last
6266 #undef save_offset1
6267 #undef save_offset2
6268 #undef save_offset3
6269 #undef stacksave
6270
6271 #undef newptrb
6272
6273 #endif
6274
6275 /* These two are defined as macros in both cases */
6276
6277 #undef fc
6278 #undef fi
6279
6280 /***************************************************************************
6281 ***************************************************************************/
6282
6283
6284 #ifdef NO_RECURSE
6285 /*************************************************
6286 *          Release allocated heap frames         *
6287 *************************************************/
6288
6289 /* This function releases all the allocated frames. The base frame is on the
6290 machine stack, and so must not be freed.
6291
6292 Argument: the address of the base frame
6293 Returns:  nothing
6294 */
6295
6296 static void
6297 release_match_heapframes (heapframe *frame_base)
6298 {
6299 heapframe *nextframe = frame_base->Xnextframe;
6300 while (nextframe != NULL)
6301   {
6302   heapframe *oldframe = nextframe;
6303   nextframe = nextframe->Xnextframe;
6304   (PUBL(stack_free))(oldframe);
6305   }
6306 }
6307 #endif
6308
6309
6310 /*************************************************
6311 *         Execute a Regular Expression           *
6312 *************************************************/
6313
6314 /* This function applies a compiled re to a subject string and picks out
6315 portions of the string if it matches. Two elements in the vector are set for
6316 each substring: the offsets to the start and end of the substring.
6317
6318 Arguments:
6319   argument_re     points to the compiled expression
6320   extra_data      points to extra data or is NULL
6321   subject         points to the subject string
6322   length          length of subject string (may contain binary zeros)
6323   start_offset    where to start in the subject string
6324   options         option bits
6325   offsets         points to a vector of ints to be filled in with offsets
6326   offsetcount     the number of elements in the vector
6327
6328 Returns:          > 0 => success; value is the number of elements filled in
6329                   = 0 => success, but offsets is not big enough
6330                    -1 => failed to match
6331                  < -1 => some kind of unexpected problem
6332 */
6333
6334 #ifdef COMPILE_PCRE8
6335 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6336 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
6337   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
6338   int offsetcount)
6339 #else
6340 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6341 pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
6342   PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
6343   int offsetcount)
6344 #endif
6345 {
6346 int rc, ocount, arg_offset_max;
6347 int newline;
6348 BOOL using_temporary_offsets = FALSE;
6349 BOOL anchored;
6350 BOOL startline;
6351 BOOL firstline;
6352 BOOL utf;
6353 BOOL has_first_char = FALSE;
6354 BOOL has_req_char = FALSE;
6355 pcre_uchar first_char = 0;
6356 pcre_uchar first_char2 = 0;
6357 pcre_uchar req_char = 0;
6358 pcre_uchar req_char2 = 0;
6359 match_data match_block;
6360 match_data *md = &match_block;
6361 const pcre_uint8 *tables;
6362 const pcre_uint8 *start_bits = NULL;
6363 PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6364 PCRE_PUCHAR end_subject;
6365 PCRE_PUCHAR start_partial = NULL;
6366 PCRE_PUCHAR req_char_ptr = start_match - 1;
6367
6368 const pcre_study_data *study;
6369 const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
6370
6371 #ifdef NO_RECURSE
6372 heapframe frame_zero;
6373 frame_zero.Xprevframe = NULL;            /* Marks the top level */
6374 frame_zero.Xnextframe = NULL;            /* None are allocated yet */
6375 md->match_frames_base = &frame_zero;
6376 #endif
6377
6378 /* Check for the special magic call that measures the size of the stack used
6379 per recursive call of match(). Without the funny casting for sizeof, a Windows
6380 compiler gave this error: "unary minus operator applied to unsigned type,
6381 result still unsigned". Hopefully the cast fixes that. */
6382
6383 if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
6384     start_offset == -999)
6385 #ifdef NO_RECURSE
6386   return -((int)sizeof(heapframe));
6387 #else
6388   return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
6389 #endif
6390
6391 /* Plausibility checks */
6392
6393 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
6394 if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6395   return PCRE_ERROR_NULL;
6396 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6397 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6398
6399 /* Check that the first field in the block is the magic number. If it is not,
6400 return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6401 REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6402 means that the pattern is likely compiled with different endianness. */
6403
6404 if (re->magic_number != MAGIC_NUMBER)
6405   return re->magic_number == REVERSED_MAGIC_NUMBER?
6406     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6407 if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
6408
6409 /* These two settings are used in the code for checking a UTF-8 string that
6410 follows immediately afterwards. Other values in the md block are used only
6411 during "normal" pcre_exec() processing, not when the JIT support is in use,
6412 so they are set up later. */
6413
6414 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
6415 utf = md->utf = (re->options & PCRE_UTF8) != 0;
6416 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
6417               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
6418
6419 /* Check a UTF-8 string if required. Pass back the character offset and error
6420 code for an invalid string if a results vector is available. */
6421
6422 #ifdef SUPPORT_UTF
6423 if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
6424   {
6425   int erroroffset;
6426   int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
6427   if (errorcode != 0)
6428     {
6429     if (offsetcount >= 2)
6430       {
6431       offsets[0] = erroroffset;
6432       offsets[1] = errorcode;
6433       }
6434 #ifdef COMPILE_PCRE16
6435     return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
6436       PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
6437 #else
6438     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6439       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6440 #endif
6441     }
6442
6443   /* Check that a start_offset points to the start of a UTF character. */
6444   if (start_offset > 0 && start_offset < length &&
6445       NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
6446     return PCRE_ERROR_BADUTF8_OFFSET;
6447   }
6448 #endif
6449
6450 /* If the pattern was successfully studied with JIT support, run the JIT
6451 executable instead of the rest of this function. Most options must be set at
6452 compile time for the JIT code to be usable. Fallback to the normal code path if
6453 an unsupported flag is set. */
6454
6455 #ifdef SUPPORT_JIT
6456 if (extra_data != NULL
6457     && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
6458                              PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6459     && extra_data->executable_jit != NULL
6460     && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
6461                     PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART |
6462                     PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD)) == 0)
6463   {
6464   rc = PRIV(jit_exec)(re, extra_data, (const pcre_uchar *)subject, length,
6465        start_offset, options, offsets, offsetcount);
6466
6467   /* PCRE_ERROR_NULL means that the selected normal or partial matching
6468   mode is not compiled. In this case we simply fallback to interpreter. */
6469
6470   if (rc != PCRE_ERROR_NULL) return rc;
6471   }
6472 #endif
6473
6474 /* Carry on with non-JIT matching. This information is for finding all the
6475 numbers associated with a given name, for condition testing. */
6476
6477 md->name_table = (pcre_uchar *)re + re->name_table_offset;
6478 md->name_count = re->name_count;
6479 md->name_entry_size = re->name_entry_size;
6480
6481 /* Fish out the optional data from the extra_data structure, first setting
6482 the default values. */
6483
6484 study = NULL;
6485 md->match_limit = MATCH_LIMIT;
6486 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
6487 md->callout_data = NULL;
6488
6489 /* The table pointer is always in native byte order. */
6490
6491 tables = re->tables;
6492
6493 if (extra_data != NULL)
6494   {
6495   unsigned int flags = extra_data->flags;
6496   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
6497     study = (const pcre_study_data *)extra_data->study_data;
6498   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
6499     md->match_limit = extra_data->match_limit;
6500   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
6501     md->match_limit_recursion = extra_data->match_limit_recursion;
6502   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
6503     md->callout_data = extra_data->callout_data;
6504   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
6505   }
6506
6507 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
6508 is a feature that makes it possible to save compiled regex and re-use them
6509 in other programs later. */
6510
6511 if (tables == NULL) tables = PRIV(default_tables);
6512
6513 /* Set up other data */
6514
6515 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
6516 startline = (re->flags & PCRE_STARTLINE) != 0;
6517 firstline = (re->options & PCRE_FIRSTLINE) != 0;
6518
6519 /* The code starts after the real_pcre block and the capture name table. */
6520
6521 md->start_code = (const pcre_uchar *)re + re->name_table_offset +
6522   re->name_count * re->name_entry_size;
6523
6524 md->start_subject = (PCRE_PUCHAR)subject;
6525 md->start_offset = start_offset;
6526 md->end_subject = md->start_subject + length;
6527 end_subject = md->end_subject;
6528
6529 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6530 md->use_ucp = (re->options & PCRE_UCP) != 0;
6531 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6532 md->ignore_skip_arg = FALSE;
6533
6534 /* Some options are unpacked into BOOL variables in the hope that testing
6535 them will be faster than individual option bits. */
6536
6537 md->notbol = (options & PCRE_NOTBOL) != 0;
6538 md->noteol = (options & PCRE_NOTEOL) != 0;
6539 md->notempty = (options & PCRE_NOTEMPTY) != 0;
6540 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
6541
6542 md->hitend = FALSE;
6543 md->mark = md->nomatch_mark = NULL;     /* In case never set */
6544
6545 md->recursive = NULL;                   /* No recursion at top level */
6546 md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
6547
6548 md->lcc = tables + lcc_offset;
6549 md->fcc = tables + fcc_offset;
6550 md->ctypes = tables + ctypes_offset;
6551
6552 /* Handle different \R options. */
6553
6554 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
6555   {
6556   case 0:
6557   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
6558     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
6559   else
6560 #ifdef BSR_ANYCRLF
6561   md->bsr_anycrlf = TRUE;
6562 #else
6563   md->bsr_anycrlf = FALSE;
6564 #endif
6565   break;
6566
6567   case PCRE_BSR_ANYCRLF:
6568   md->bsr_anycrlf = TRUE;
6569   break;
6570
6571   case PCRE_BSR_UNICODE:
6572   md->bsr_anycrlf = FALSE;
6573   break;
6574
6575   default: return PCRE_ERROR_BADNEWLINE;
6576   }
6577
6578 /* Handle different types of newline. The three bits give eight cases. If
6579 nothing is set at run time, whatever was used at compile time applies. */
6580
6581 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
6582         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
6583   {
6584   case 0: newline = NEWLINE; break;   /* Compile-time default */
6585   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
6586   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
6587   case PCRE_NEWLINE_CR+
6588        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
6589   case PCRE_NEWLINE_ANY: newline = -1; break;
6590   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
6591   default: return PCRE_ERROR_BADNEWLINE;
6592   }
6593
6594 if (newline == -2)
6595   {
6596   md->nltype = NLTYPE_ANYCRLF;
6597   }
6598 else if (newline < 0)
6599   {
6600   md->nltype = NLTYPE_ANY;
6601   }
6602 else
6603   {
6604   md->nltype = NLTYPE_FIXED;
6605   if (newline > 255)
6606     {
6607     md->nllen = 2;
6608     md->nl[0] = (newline >> 8) & 255;
6609     md->nl[1] = newline & 255;
6610     }
6611   else
6612     {
6613     md->nllen = 1;
6614     md->nl[0] = newline;
6615     }
6616   }
6617
6618 /* Partial matching was originally supported only for a restricted set of
6619 regexes; from release 8.00 there are no restrictions, but the bits are still
6620 defined (though never set). So there's no harm in leaving this code. */
6621
6622 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
6623   return PCRE_ERROR_BADPARTIAL;
6624
6625 /* If the expression has got more back references than the offsets supplied can
6626 hold, we get a temporary chunk of working store to use during the matching.
6627 Otherwise, we can use the vector supplied, rounding down its size to a multiple
6628 of 3. */
6629
6630 ocount = offsetcount - (offsetcount % 3);
6631 arg_offset_max = (2*ocount)/3;
6632
6633 if (re->top_backref > 0 && re->top_backref >= ocount/3)
6634   {
6635   ocount = re->top_backref * 3 + 3;
6636   md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
6637   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
6638   using_temporary_offsets = TRUE;
6639   DPRINTF(("Got memory to hold back references\n"));
6640   }
6641 else md->offset_vector = offsets;
6642
6643 md->offset_end = ocount;
6644 md->offset_max = (2*ocount)/3;
6645 md->offset_overflow = FALSE;
6646 md->capture_last = -1;
6647
6648 /* Reset the working variable associated with each extraction. These should
6649 never be used unless previously set, but they get saved and restored, and so we
6650 initialize them to avoid reading uninitialized locations. Also, unset the
6651 offsets for the matched string. This is really just for tidiness with callouts,
6652 in case they inspect these fields. */
6653
6654 if (md->offset_vector != NULL)
6655   {
6656   int *iptr = md->offset_vector + ocount;
6657   int *iend = iptr - re->top_bracket;
6658   if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
6659   while (--iptr >= iend) *iptr = -1;
6660   md->offset_vector[0] = md->offset_vector[1] = -1;
6661   }
6662
6663 /* Set up the first character to match, if available. The first_char value is
6664 never set for an anchored regular expression, but the anchoring may be forced
6665 at run time, so we have to test for anchoring. The first char may be unset for
6666 an unanchored pattern, of course. If there's no first char and the pattern was
6667 studied, there may be a bitmap of possible first characters. */
6668
6669 if (!anchored)
6670   {
6671   if ((re->flags & PCRE_FIRSTSET) != 0)
6672     {
6673     has_first_char = TRUE;
6674     first_char = first_char2 = (pcre_uchar)(re->first_char);
6675     if ((re->flags & PCRE_FCH_CASELESS) != 0)
6676       {
6677       first_char2 = TABLE_GET(first_char, md->fcc, first_char);
6678 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6679       if (utf && first_char > 127)
6680         first_char2 = UCD_OTHERCASE(first_char);
6681 #endif
6682       }
6683     }
6684   else
6685     if (!startline && study != NULL &&
6686       (study->flags & PCRE_STUDY_MAPPED) != 0)
6687         start_bits = study->start_bits;
6688   }
6689
6690 /* For anchored or unanchored matches, there may be a "last known required
6691 character" set. */
6692
6693 if ((re->flags & PCRE_REQCHSET) != 0)
6694   {
6695   has_req_char = TRUE;
6696   req_char = req_char2 = (pcre_uchar)(re->req_char);
6697   if ((re->flags & PCRE_RCH_CASELESS) != 0)
6698     {
6699     req_char2 = TABLE_GET(req_char, md->fcc, req_char);
6700 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6701     if (utf && req_char > 127)
6702       req_char2 = UCD_OTHERCASE(req_char);
6703 #endif
6704     }
6705   }
6706
6707
6708 /* ==========================================================================*/
6709
6710 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
6711 the loop runs just once. */
6712
6713 for(;;)
6714   {
6715   PCRE_PUCHAR save_end_subject = end_subject;
6716   PCRE_PUCHAR new_start_match;
6717
6718   /* If firstline is TRUE, the start of the match is constrained to the first
6719   line of a multiline string. That is, the match must be before or at the first
6720   newline. Implement this by temporarily adjusting end_subject so that we stop
6721   scanning at a newline. If the match fails at the newline, later code breaks
6722   this loop. */
6723
6724   if (firstline)
6725     {
6726     PCRE_PUCHAR t = start_match;
6727 #ifdef SUPPORT_UTF
6728     if (utf)
6729       {
6730       while (t < md->end_subject && !IS_NEWLINE(t))
6731         {
6732         t++;
6733         ACROSSCHAR(t < end_subject, *t, t++);
6734         }
6735       }
6736     else
6737 #endif
6738     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
6739     end_subject = t;
6740     }
6741
6742   /* There are some optimizations that avoid running the match if a known
6743   starting point is not found, or if a known later character is not present.
6744   However, there is an option that disables these, for testing and for ensuring
6745   that all callouts do actually occur. The option can be set in the regex by
6746   (*NO_START_OPT) or passed in match-time options. */
6747
6748   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
6749     {
6750     /* Advance to a unique first char if there is one. */
6751
6752     if (has_first_char)
6753       {
6754       if (first_char != first_char2)
6755         while (start_match < end_subject &&
6756             *start_match != first_char && *start_match != first_char2)
6757           start_match++;
6758       else
6759         while (start_match < end_subject && *start_match != first_char)
6760           start_match++;
6761       }
6762
6763     /* Or to just after a linebreak for a multiline match */
6764
6765     else if (startline)
6766       {
6767       if (start_match > md->start_subject + start_offset)
6768         {
6769 #ifdef SUPPORT_UTF
6770         if (utf)
6771           {
6772           while (start_match < end_subject && !WAS_NEWLINE(start_match))
6773             {
6774             start_match++;
6775             ACROSSCHAR(start_match < end_subject, *start_match,
6776               start_match++);
6777             }
6778           }
6779         else
6780 #endif
6781         while (start_match < end_subject && !WAS_NEWLINE(start_match))
6782           start_match++;
6783
6784         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
6785         and we are now at a LF, advance the match position by one more character.
6786         */
6787
6788         if (start_match[-1] == CHAR_CR &&
6789              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
6790              start_match < end_subject &&
6791              *start_match == CHAR_NL)
6792           start_match++;
6793         }
6794       }
6795
6796     /* Or to a non-unique first byte after study */
6797
6798     else if (start_bits != NULL)
6799       {
6800       while (start_match < end_subject)
6801         {
6802         unsigned int c = *start_match;
6803 #ifndef COMPILE_PCRE8
6804         if (c > 255) c = 255;
6805 #endif
6806         if ((start_bits[c/8] & (1 << (c&7))) == 0)
6807           {
6808           start_match++;
6809 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6810           /* In non 8-bit mode, the iteration will stop for
6811           characters > 255 at the beginning or not stop at all. */
6812           if (utf)
6813             ACROSSCHAR(start_match < end_subject, *start_match,
6814               start_match++);
6815 #endif
6816           }
6817         else break;
6818         }
6819       }
6820     }   /* Starting optimizations */
6821
6822   /* Restore fudged end_subject */
6823
6824   end_subject = save_end_subject;
6825
6826   /* The following two optimizations are disabled for partial matching or if
6827   disabling is explicitly requested. */
6828
6829   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
6830     {
6831     /* If the pattern was studied, a minimum subject length may be set. This is
6832     a lower bound; no actual string of that length may actually match the
6833     pattern. Although the value is, strictly, in characters, we treat it as
6834     bytes to avoid spending too much time in this optimization. */
6835
6836     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
6837         (pcre_uint32)(end_subject - start_match) < study->minlength)
6838       {
6839       rc = MATCH_NOMATCH;
6840       break;
6841       }
6842
6843     /* If req_char is set, we know that that character must appear in the
6844     subject for the match to succeed. If the first character is set, req_char
6845     must be later in the subject; otherwise the test starts at the match point.
6846     This optimization can save a huge amount of backtracking in patterns with
6847     nested unlimited repeats that aren't going to match. Writing separate code
6848     for cased/caseless versions makes it go faster, as does using an
6849     autoincrement and backing off on a match.
6850
6851     HOWEVER: when the subject string is very, very long, searching to its end
6852     can take a long time, and give bad performance on quite ordinary patterns.
6853     This showed up when somebody was matching something like /^\d+C/ on a
6854     32-megabyte string... so we don't do this when the string is sufficiently
6855     long. */
6856
6857     if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
6858       {
6859       PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
6860
6861       /* We don't need to repeat the search if we haven't yet reached the
6862       place we found it at last time. */
6863
6864       if (p > req_char_ptr)
6865         {
6866         if (req_char != req_char2)
6867           {
6868           while (p < end_subject)
6869             {
6870             int pp = *p++;
6871             if (pp == req_char || pp == req_char2) { p--; break; }
6872             }
6873           }
6874         else
6875           {
6876           while (p < end_subject)
6877             {
6878             if (*p++ == req_char) { p--; break; }
6879             }
6880           }
6881
6882         /* If we can't find the required character, break the matching loop,
6883         forcing a match failure. */
6884
6885         if (p >= end_subject)
6886           {
6887           rc = MATCH_NOMATCH;
6888           break;
6889           }
6890
6891         /* If we have found the required character, save the point where we
6892         found it, so that we don't search again next time round the loop if
6893         the start hasn't passed this character yet. */
6894
6895         req_char_ptr = p;
6896         }
6897       }
6898     }
6899
6900 #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
6901   printf(">>>> Match against: ");
6902   pchars(start_match, end_subject - start_match, TRUE, md);
6903   printf("\n");
6904 #endif
6905
6906   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6907   first starting point for which a partial match was found. */
6908
6909   md->start_match_ptr = start_match;
6910   md->start_used_ptr = start_match;
6911   md->match_call_count = 0;
6912   md->match_function_type = 0;
6913   md->end_offset_top = 0;
6914   rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
6915   if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
6916
6917   switch(rc)
6918     {
6919     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6920     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
6921     entirely. The only way we can do that is to re-do the match at the same
6922     point, with a flag to force SKIP with an argument to be ignored. Just
6923     treating this case as NOMATCH does not work because it does not check other
6924     alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
6925
6926     case MATCH_SKIP_ARG:
6927     new_start_match = start_match;
6928     md->ignore_skip_arg = TRUE;
6929     break;
6930
6931     /* SKIP passes back the next starting point explicitly, but if it is the
6932     same as the match we have just done, treat it as NOMATCH. */
6933
6934     case MATCH_SKIP:
6935     if (md->start_match_ptr != start_match)
6936       {
6937       new_start_match = md->start_match_ptr;
6938       break;
6939       }
6940     /* Fall through */
6941
6942     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6943     exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */
6944
6945     case MATCH_NOMATCH:
6946     case MATCH_PRUNE:
6947     case MATCH_THEN:
6948     md->ignore_skip_arg = FALSE;
6949     new_start_match = start_match + 1;
6950 #ifdef SUPPORT_UTF
6951     if (utf)
6952       ACROSSCHAR(new_start_match < end_subject, *new_start_match,
6953         new_start_match++);
6954 #endif
6955     break;
6956
6957     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6958
6959     case MATCH_COMMIT:
6960     rc = MATCH_NOMATCH;
6961     goto ENDLOOP;
6962
6963     /* Any other return is either a match, or some kind of error. */
6964
6965     default:
6966     goto ENDLOOP;
6967     }
6968
6969   /* Control reaches here for the various types of "no match at this point"
6970   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
6971
6972   rc = MATCH_NOMATCH;
6973
6974   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
6975   newline in the subject (though it may continue over the newline). Therefore,
6976   if we have just failed to match, starting at a newline, do not continue. */
6977
6978   if (firstline && IS_NEWLINE(start_match)) break;
6979
6980   /* Advance to new matching position */
6981
6982   start_match = new_start_match;
6983
6984   /* Break the loop if the pattern is anchored or if we have passed the end of
6985   the subject. */
6986
6987   if (anchored || start_match > end_subject) break;
6988
6989   /* If we have just passed a CR and we are now at a LF, and the pattern does
6990   not contain any explicit matches for \r or \n, and the newline option is CRLF
6991   or ANY or ANYCRLF, advance the match position by one more character. In
6992   normal matching start_match will aways be greater than the first position at
6993   this stage, but a failed *SKIP can cause a return at the same point, which is
6994   why the first test exists. */
6995
6996   if (start_match > (PCRE_PUCHAR)subject + start_offset &&
6997       start_match[-1] == CHAR_CR &&
6998       start_match < end_subject &&
6999       *start_match == CHAR_NL &&
7000       (re->flags & PCRE_HASCRORLF) == 0 &&
7001         (md->nltype == NLTYPE_ANY ||
7002          md->nltype == NLTYPE_ANYCRLF ||
7003          md->nllen == 2))
7004     start_match++;
7005
7006   md->mark = NULL;   /* Reset for start of next match attempt */
7007   }                  /* End of for(;;) "bumpalong" loop */
7008
7009 /* ==========================================================================*/
7010
7011 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
7012 conditions is true:
7013
7014 (1) The pattern is anchored or the match was failed by (*COMMIT);
7015
7016 (2) We are past the end of the subject;
7017
7018 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
7019     this option requests that a match occur at or before the first newline in
7020     the subject.
7021
7022 When we have a match and the offset vector is big enough to deal with any
7023 backreferences, captured substring offsets will already be set up. In the case
7024 where we had to get some local store to hold offsets for backreference
7025 processing, copy those that we can. In this case there need not be overflow if
7026 certain parts of the pattern were not used, even though there are more
7027 capturing parentheses than vector slots. */
7028
7029 ENDLOOP:
7030
7031 if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
7032   {
7033   if (using_temporary_offsets)
7034     {
7035     if (arg_offset_max >= 4)
7036       {
7037       memcpy(offsets + 2, md->offset_vector + 2,
7038         (arg_offset_max - 2) * sizeof(int));
7039       DPRINTF(("Copied offsets from temporary memory\n"));
7040       }
7041     if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
7042     DPRINTF(("Freeing temporary memory\n"));
7043     (PUBL(free))(md->offset_vector);
7044     }
7045
7046   /* Set the return code to the number of captured strings, or 0 if there were
7047   too many to fit into the vector. */
7048
7049   rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
7050     0 : md->end_offset_top/2;
7051
7052   /* If there is space in the offset vector, set any unused pairs at the end of
7053   the pattern to -1 for backwards compatibility. It is documented that this
7054   happens. In earlier versions, the whole set of potential capturing offsets
7055   was set to -1 each time round the loop, but this is handled differently now.
7056   "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
7057   those at the end that need unsetting here. We can't just unset them all at
7058   the start of the whole thing because they may get set in one branch that is
7059   not the final matching branch. */
7060
7061   if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
7062     {
7063     int *iptr, *iend;
7064     int resetcount = 2 + re->top_bracket * 2;
7065     if (resetcount > offsetcount) resetcount = offsetcount;
7066     iptr = offsets + md->end_offset_top;
7067     iend = offsets + resetcount;
7068     while (iptr < iend) *iptr++ = -1;
7069     }
7070
7071   /* If there is space, set up the whole thing as substring 0. The value of
7072   md->start_match_ptr might be modified if \K was encountered on the success
7073   matching path. */
7074
7075   if (offsetcount < 2) rc = 0; else
7076     {
7077     offsets[0] = (int)(md->start_match_ptr - md->start_subject);
7078     offsets[1] = (int)(md->end_match_ptr - md->start_subject);
7079     }
7080
7081   /* Return MARK data if requested */
7082
7083   if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
7084     *(extra_data->mark) = (pcre_uchar *)md->mark;
7085   DPRINTF((">>>> returning %d\n", rc));
7086 #ifdef NO_RECURSE
7087   release_match_heapframes(&frame_zero);
7088 #endif
7089   return rc;
7090   }
7091
7092 /* Control gets here if there has been an error, or if the overall match
7093 attempt has failed at all permitted starting positions. */
7094
7095 if (using_temporary_offsets)
7096   {
7097   DPRINTF(("Freeing temporary memory\n"));
7098   (PUBL(free))(md->offset_vector);
7099   }
7100
7101 /* For anything other than nomatch or partial match, just return the code. */
7102
7103 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
7104   {
7105   DPRINTF((">>>> error: returning %d\n", rc));
7106 #ifdef NO_RECURSE
7107   release_match_heapframes(&frame_zero);
7108 #endif
7109   return rc;
7110   }
7111
7112 /* Handle partial matches - disable any mark data */
7113
7114 if (start_partial != NULL)
7115   {
7116   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
7117   md->mark = NULL;
7118   if (offsetcount > 1)
7119     {
7120     offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
7121     offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
7122     }
7123   rc = PCRE_ERROR_PARTIAL;
7124   }
7125
7126 /* This is the classic nomatch case */
7127
7128 else
7129   {
7130   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
7131   rc = PCRE_ERROR_NOMATCH;
7132   }
7133
7134 /* Return the MARK data if it has been requested. */
7135
7136 if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
7137   *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
7138 #ifdef NO_RECURSE
7139   release_match_heapframes(&frame_zero);
7140 #endif
7141 return rc;
7142 }
7143
7144 /* End of pcre_exec.c */