glib/pcre/pcre_exec.c

   1 /*************************************************
   2 *      Perl-Compatible Regular Expressions       *
   3 *************************************************/
   4
   5 /* PCRE is a library of functions to support regular expressions whose syntax
   6 and semantics are as close as possible to those of the Perl 5 language.
   7
   8                        Written by Philip Hazel
   9            Copyright (c) 1997-2012 University of Cambridge
  10
  11 -----------------------------------------------------------------------------
  12 Redistribution and use in source and binary forms, with or without
  13 modification, are permitted provided that the following conditions are met:
  14
  15     * Redistributions of source code must retain the above copyright notice,
  16       this list of conditions and the following disclaimer.
  17
  18     * Redistributions in binary form must reproduce the above copyright
  19       notice, this list of conditions and the following disclaimer in the
  20       documentation and/or other materials provided with the distribution.
  21
  22     * Neither the name of the University of Cambridge nor the names of its
  23       contributors may be used to endorse or promote products derived from
  24       this software without specific prior written permission.
  25
  26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  36 POSSIBILITY OF SUCH DAMAGE.
  37 -----------------------------------------------------------------------------
  38 */
  39
  40 /* This module contains pcre_exec(), the externally visible function that does
  41 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
  42 possible. There are also some static supporting functions. */
  43
  44 #ifdef HAVE_CONFIG_H
  45 #include "config.h"
  46 #endif
  47
  48 #define NLBLOCK md             /* Block containing newline information */
  49 #define PSSTART start_subject  /* Field containing processed string start */
  50 #define PSEND   end_subject    /* Field containing processed string end */
  51
  52 #include "pcre_internal.h"
  53
  54 /* Undefine some potentially clashing cpp symbols */
  55
  56 #undef min
  57 #undef max
  58
  59 /* Values for setting in md->match_function_type to indicate two special types
  60 of call to match(). We do it this way to save on using another stack variable,
  61 as stack usage is to be discouraged. */
  62
  63 #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
  64 #define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
  65
  66 /* Non-error returns from the match() function. Error returns are externally
  67 defined PCRE_ERROR_xxx codes, which are all negative. */
  68
  69 #define MATCH_MATCH        1
  70 #define MATCH_NOMATCH      0
  71
  72 /* Special internal returns from the match() function. Make them sufficiently
  73 negative to avoid the external error codes. */
  74
  75 #define MATCH_ACCEPT       (-999)
  76 #define MATCH_COMMIT       (-998)
  77 #define MATCH_KETRPOS      (-997)
  78 #define MATCH_ONCE         (-996)
  79 #define MATCH_PRUNE        (-995)
  80 #define MATCH_SKIP         (-994)
  81 #define MATCH_SKIP_ARG     (-993)
  82 #define MATCH_THEN         (-992)
  83
  84 /* Maximum number of ints of offset to save on the stack for recursive calls.
  85 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
  86 because the offset vector is always a multiple of 3 long. */
  87
  88 #define REC_STACK_SAVE_MAX 30
  89
  90 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
  91
  92 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
  93 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
  94
  95
  96
  97 #ifdef PCRE_DEBUG
  98 /*************************************************
  99 *        Debugging function to print chars       *
 100 *************************************************/
 101
 102 /* Print a sequence of chars in printable format, stopping at the end of the
 103 subject if the requested.
 104
 105 Arguments:
 106   p           points to characters
 107   length      number to print
 108   is_subject  TRUE if printing from within md->start_subject
 109   md          pointer to matching data block, if is_subject is TRUE
 110
 111 Returns:     nothing
 112 */
 113
 114 static void
 115 pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
 116 {
 117 unsigned int c;
 118 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
 119 while (length-- > 0)
 120   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
 121 }
 122 #endif
 123
 124
 125
 126 /*************************************************
 127 *          Match a back-reference                *
 128 *************************************************/
 129
 130 /* Normally, if a back reference hasn't been set, the length that is passed is
 131 negative, so the match always fails. However, in JavaScript compatibility mode,
 132 the length passed is zero. Note that in caseless UTF-8 mode, the number of
 133 subject bytes matched may be different to the number of reference bytes.
 134
 135 Arguments:
 136   offset      index into the offset vector
 137   eptr        pointer into the subject
 138   length      length of reference to be matched (number of bytes)
 139   md          points to match data block
 140   caseless    TRUE if caseless
 141
 142 Returns:      >= 0 the number of subject bytes matched
 143               -1 no match
 144               -2 partial match; always given if at end subject
 145 */
 146
 147 static int
 148 match_ref(int offset, PCRE_PUCHAR eptr, int length, match_data *md,
 149   BOOL caseless)
 150 {
 151 PCRE_PUCHAR eptr_start = eptr;
 152 PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
 153
 154 #ifdef PCRE_DEBUG
 155 if (eptr >= md->end_subject)
 156   printf("matching subject <null>");
 157 else
 158   {
 159   printf("matching subject ");
 160   pchars(eptr, length, TRUE, md);
 161   }
 162 printf(" against backref ");
 163 pchars(p, length, FALSE, md);
 164 printf("\n");
 165 #endif
 166
 167 /* Always fail if reference not set (and not JavaScript compatible - in that
 168 case the length is passed as zero). */
 169
 170 if (length < 0) return -1;
 171
 172 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
 173 properly if Unicode properties are supported. Otherwise, we can check only
 174 ASCII characters. */
 175
 176 if (caseless)
 177   {
 178 #ifdef SUPPORT_UTF
 179 #ifdef SUPPORT_UCP
 180   if (md->utf)
 181     {
 182     /* Match characters up to the end of the reference. NOTE: the number of
 183     bytes matched may differ, because there are some characters whose upper and
 184     lower case versions code as different numbers of bytes. For example, U+023A
 185     (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
 186     a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
 187     the latter. It is important, therefore, to check the length along the
 188     reference, not along the subject (earlier code did this wrong). */
 189
 190     PCRE_PUCHAR endptr = p + length;
 191     while (p < endptr)
 192       {
 193       int c, d;
 194       if (eptr >= md->end_subject) return -2;   /* Partial match */
 195       GETCHARINC(c, eptr);
 196       GETCHARINC(d, p);
 197       if (c != d && c != UCD_OTHERCASE(d)) return -1;
 198       }
 199     }
 200   else
 201 #endif
 202 #endif
 203
 204   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
 205   is no UCP support. */
 206     {
 207     while (length-- > 0)
 208       {
 209       if (eptr >= md->end_subject) return -2;   /* Partial match */
 210       if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
 211       p++;
 212       eptr++;
 213       }
 214     }
 215   }
 216
 217 /* In the caseful case, we can just compare the bytes, whether or not we
 218 are in UTF-8 mode. */
 219
 220 else
 221   {
 222   while (length-- > 0)
 223     {
 224     if (eptr >= md->end_subject) return -2;   /* Partial match */
 225     if (*p++ != *eptr++) return -1;
 226     }
 227   }
 228
 229 return (int)(eptr - eptr_start);
 230 }
 231
 232
 233
 234 /***************************************************************************
 235 ****************************************************************************
 236                    RECURSION IN THE match() FUNCTION
 237
 238 The match() function is highly recursive, though not every recursive call
 239 increases the recursive depth. Nevertheless, some regular expressions can cause
 240 it to recurse to a great depth. I was writing for Unix, so I just let it call
 241 itself recursively. This uses the stack for saving everything that has to be
 242 saved for a recursive call. On Unix, the stack can be large, and this works
 243 fine.
 244
 245 It turns out that on some non-Unix-like systems there are problems with
 246 programs that use a lot of stack. (This despite the fact that every last chip
 247 has oodles of memory these days, and techniques for extending the stack have
 248 been known for decades.) So....
 249
 250 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
 251 calls by keeping local variables that need to be preserved in blocks of memory
 252 obtained from malloc() instead instead of on the stack. Macros are used to
 253 achieve this so that the actual code doesn't look very different to what it
 254 always used to.
 255
 256 The original heap-recursive code used longjmp(). However, it seems that this
 257 can be very slow on some operating systems. Following a suggestion from Stan
 258 Switzer, the use of longjmp() has been abolished, at the cost of having to
 259 provide a unique number for each call to RMATCH. There is no way of generating
 260 a sequence of numbers at compile time in C. I have given them names, to make
 261 them stand out more clearly.
 262
 263 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
 264 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
 265 tests. Furthermore, not using longjmp() means that local dynamic variables
 266 don't have indeterminate values; this has meant that the frame size can be
 267 reduced because the result can be "passed back" by straight setting of the
 268 variable instead of being passed in the frame.
 269 ****************************************************************************
 270 ***************************************************************************/
 271
 272 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
 273 below must be updated in sync.  */
 274
 275 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
 276        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
 277        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
 278        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
 279        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
 280        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
 281        RM61,  RM62, RM63, RM64, RM65, RM66 };
 282
 283 /* These versions of the macros use the stack, as normal. There are debugging
 284 versions and production versions. Note that the "rw" argument of RMATCH isn't
 285 actually used in this definition. */
 286
 287 #ifndef NO_RECURSE
 288
 289 #ifdef PCRE_DEBUG
 290 #define RMATCH(ra,rb,rc,rd,re,rw) \
 291   { \
 292   printf("match() called in line %d\n", __LINE__); \
 293   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
 294   printf("to line %d\n", __LINE__); \
 295   }
 296 #define RRETURN(ra) \
 297   { \
 298   printf("match() returned %d from line %d ", ra, __LINE__); \
 299   return ra; \
 300   }
 301 #else
 302 #define RMATCH(ra,rb,rc,rd,re,rw) \
 303   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
 304 #define RRETURN(ra) return ra
 305 #endif
 306
 307 #else
 308
 309
 310 /* These versions of the macros manage a private stack on the heap. Note that
 311 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
 312 argument of match(), which never changes. */
 313
 314 #define RMATCH(ra,rb,rc,rd,re,rw)\
 315   {\
 316   heapframe *newframe = frame->Xnextframe;\
 317   if (newframe == NULL)\
 318     {\
 319     newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
 320     if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
 321     newframe->Xnextframe = NULL;\
 322     frame->Xnextframe = newframe;\
 323     }\
 324   frame->Xwhere = rw;\
 325   newframe->Xeptr = ra;\
 326   newframe->Xecode = rb;\
 327   newframe->Xmstart = mstart;\
 328   newframe->Xoffset_top = rc;\
 329   newframe->Xeptrb = re;\
 330   newframe->Xrdepth = frame->Xrdepth + 1;\
 331   newframe->Xprevframe = frame;\
 332   frame = newframe;\
 333   DPRINTF(("restarting from line %d\n", __LINE__));\
 334   goto HEAP_RECURSE;\
 335   L_##rw:\
 336   DPRINTF(("jumped back to line %d\n", __LINE__));\
 337   }
 338
 339 #define RRETURN(ra)\
 340   {\
 341   heapframe *oldframe = frame;\
 342   frame = oldframe->Xprevframe;\
 343   if (frame != NULL)\
 344     {\
 345     rrc = ra;\
 346     goto HEAP_RETURN;\
 347     }\
 348   return ra;\
 349   }
 350
 351
 352 /* Structure for remembering the local variables in a private frame */
 353
 354 typedef struct heapframe {
 355   struct heapframe *Xprevframe;
 356   struct heapframe *Xnextframe;
 357
 358   /* Function arguments that may change */
 359
 360   PCRE_PUCHAR Xeptr;
 361   const pcre_uchar *Xecode;
 362   PCRE_PUCHAR Xmstart;
 363   int Xoffset_top;
 364   eptrblock *Xeptrb;
 365   unsigned int Xrdepth;
 366
 367   /* Function local variables */
 368
 369   PCRE_PUCHAR Xcallpat;
 370 #ifdef SUPPORT_UTF
 371   PCRE_PUCHAR Xcharptr;
 372 #endif
 373   PCRE_PUCHAR Xdata;
 374   PCRE_PUCHAR Xnext;
 375   PCRE_PUCHAR Xpp;
 376   PCRE_PUCHAR Xprev;
 377   PCRE_PUCHAR Xsaved_eptr;
 378
 379   recursion_info Xnew_recursive;
 380
 381   BOOL Xcur_is_word;
 382   BOOL Xcondition;
 383   BOOL Xprev_is_word;
 384
 385 #ifdef SUPPORT_UCP
 386   int Xprop_type;
 387   int Xprop_value;
 388   int Xprop_fail_result;
 389   int Xoclength;
 390   pcre_uchar Xocchars[6];
 391 #endif
 392
 393   int Xcodelink;
 394   int Xctype;
 395   unsigned int Xfc;
 396   int Xfi;
 397   int Xlength;
 398   int Xmax;
 399   int Xmin;
 400   int Xnumber;
 401   int Xoffset;
 402   int Xop;
 403   int Xsave_capture_last;
 404   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
 405   int Xstacksave[REC_STACK_SAVE_MAX];
 406
 407   eptrblock Xnewptrb;
 408
 409   /* Where to jump back to */
 410
 411   int Xwhere;
 412
 413 } heapframe;
 414
 415 #endif
 416
 417
 418 /***************************************************************************
 419 ***************************************************************************/
 420
 421
 422
 423 /*************************************************
 424 *         Match from current position            *
 425 *************************************************/
 426
 427 /* This function is called recursively in many circumstances. Whenever it
 428 returns a negative (error) response, the outer incarnation must also return the
 429 same response. */
 430
 431 /* These macros pack up tests that are used for partial matching, and which
 432 appear several times in the code. We set the "hit end" flag if the pointer is
 433 at the end of the subject and also past the start of the subject (i.e.
 434 something has been matched). For hard partial matching, we then return
 435 immediately. The second one is used when we already know we are past the end of
 436 the subject. */
 437
 438 #define CHECK_PARTIAL()\
 439   if (md->partial != 0 && eptr >= md->end_subject && \
 440       eptr > md->start_used_ptr) \
 441     { \
 442     md->hitend = TRUE; \
 443     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
 444     }
 445
 446 #define SCHECK_PARTIAL()\
 447   if (md->partial != 0 && eptr > md->start_used_ptr) \
 448     { \
 449     md->hitend = TRUE; \
 450     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
 451     }
 452
 453
 454 /* Performance note: It might be tempting to extract commonly used fields from
 455 the md structure (e.g. utf, end_subject) into individual variables to improve
 456 performance. Tests using gcc on a SPARC disproved this; in the first case, it
 457 made performance worse.
 458
 459 Arguments:
 460    eptr        pointer to current character in subject
 461    ecode       pointer to current position in compiled code
 462    mstart      pointer to the current match start position (can be modified
 463                  by encountering \K)
 464    offset_top  current top pointer
 465    md          pointer to "static" info for the match
 466    eptrb       pointer to chain of blocks containing eptr at start of
 467                  brackets - for testing for empty matches
 468    rdepth      the recursion depth
 469
 470 Returns:       MATCH_MATCH if matched            )  these values are >= 0
 471                MATCH_NOMATCH if failed to match  )
 472                a negative MATCH_xxx value for PRUNE, SKIP, etc
 473                a negative PCRE_ERROR_xxx value if aborted by an error condition
 474                  (e.g. stopped by repeated call or recursion limit)
 475 */
 476
 477 static int
 478 match(PCRE_PUCHAR eptr, const pcre_uchar *ecode,
 479   PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
 480   unsigned int rdepth)
 481 {
 482 /* These variables do not need to be preserved over recursion in this function,
 483 so they can be ordinary variables in all cases. Mark some of them with
 484 "register" because they are used a lot in loops. */
 485
 486 int  rrc;         /* Returns from recursive calls */
 487 int  i;           /* Used for loops not involving calls to RMATCH() */
 488 unsigned int c;   /* Character values not kept over RMATCH() calls */
 489 BOOL utf;         /* Local copy of UTF flag for speed */
 490
 491 BOOL minimize, possessive; /* Quantifier options */
 492 BOOL caseless;
 493 int condcode;
 494
 495 /* When recursion is not being used, all "local" variables that have to be
 496 preserved over calls to RMATCH() are part of a "frame". We set up the top-level
 497 frame on the stack here; subsequent instantiations are obtained from the heap
 498 whenever RMATCH() does a "recursion". See the macro definitions above. Putting
 499 the top-level on the stack rather than malloc-ing them all gives a performance
 500 boost in many cases where there is not much "recursion". */
 501
 502 #ifdef NO_RECURSE
 503 heapframe *frame = (heapframe *)md->match_frames_base;
 504
 505 /* Copy in the original argument variables */
 506
 507 frame->Xeptr = eptr;
 508 frame->Xecode = ecode;
 509 frame->Xmstart = mstart;
 510 frame->Xoffset_top = offset_top;
 511 frame->Xeptrb = eptrb;
 512 frame->Xrdepth = rdepth;
 513
 514 /* This is where control jumps back to to effect "recursion" */
 515
 516 HEAP_RECURSE:
 517
 518 /* Macros make the argument variables come from the current frame */
 519
 520 #define eptr               frame->Xeptr
 521 #define ecode              frame->Xecode
 522 #define mstart             frame->Xmstart
 523 #define offset_top         frame->Xoffset_top
 524 #define eptrb              frame->Xeptrb
 525 #define rdepth             frame->Xrdepth
 526
 527 /* Ditto for the local variables */
 528
 529 #ifdef SUPPORT_UTF
 530 #define charptr            frame->Xcharptr
 531 #endif
 532 #define callpat            frame->Xcallpat
 533 #define codelink           frame->Xcodelink
 534 #define data               frame->Xdata
 535 #define next               frame->Xnext
 536 #define pp                 frame->Xpp
 537 #define prev               frame->Xprev
 538 #define saved_eptr         frame->Xsaved_eptr
 539
 540 #define new_recursive      frame->Xnew_recursive
 541
 542 #define cur_is_word        frame->Xcur_is_word
 543 #define condition          frame->Xcondition
 544 #define prev_is_word       frame->Xprev_is_word
 545
 546 #ifdef SUPPORT_UCP
 547 #define prop_type          frame->Xprop_type
 548 #define prop_value         frame->Xprop_value
 549 #define prop_fail_result   frame->Xprop_fail_result
 550 #define oclength           frame->Xoclength
 551 #define occhars            frame->Xocchars
 552 #endif
 553
 554 #define ctype              frame->Xctype
 555 #define fc                 frame->Xfc
 556 #define fi                 frame->Xfi
 557 #define length             frame->Xlength
 558 #define max                frame->Xmax
 559 #define min                frame->Xmin
 560 #define number             frame->Xnumber
 561 #define offset             frame->Xoffset
 562 #define op                 frame->Xop
 563 #define save_capture_last  frame->Xsave_capture_last
 564 #define save_offset1       frame->Xsave_offset1
 565 #define save_offset2       frame->Xsave_offset2
 566 #define save_offset3       frame->Xsave_offset3
 567 #define stacksave          frame->Xstacksave
 568
 569 #define newptrb            frame->Xnewptrb
 570
 571 /* When recursion is being used, local variables are allocated on the stack and
 572 get preserved during recursion in the normal way. In this environment, fi and
 573 i, and fc and c, can be the same variables. */
 574
 575 #else         /* NO_RECURSE not defined */
 576 #define fi i
 577 #define fc c
 578
 579 /* Many of the following variables are used only in small blocks of the code.
 580 My normal style of coding would have declared them within each of those blocks.
 581 However, in order to accommodate the version of this code that uses an external
 582 "stack" implemented on the heap, it is easier to declare them all here, so the
 583 declarations can be cut out in a block. The only declarations within blocks
 584 below are for variables that do not have to be preserved over a recursive call
 585 to RMATCH(). */
 586
 587 #ifdef SUPPORT_UTF
 588 const pcre_uchar *charptr;
 589 #endif
 590 const pcre_uchar *callpat;
 591 const pcre_uchar *data;
 592 const pcre_uchar *next;
 593 PCRE_PUCHAR       pp;
 594 const pcre_uchar *prev;
 595 PCRE_PUCHAR       saved_eptr;
 596
 597 recursion_info new_recursive;
 598
 599 BOOL cur_is_word;
 600 BOOL condition;
 601 BOOL prev_is_word;
 602
 603 #ifdef SUPPORT_UCP
 604 int prop_type;
 605 int prop_value;
 606 int prop_fail_result;
 607 int oclength;
 608 pcre_uchar occhars[6];
 609 #endif
 610
 611 int codelink;
 612 int ctype;
 613 int length;
 614 int max;
 615 int min;
 616 int number;
 617 int offset;
 618 int op;
 619 int save_capture_last;
 620 int save_offset1, save_offset2, save_offset3;
 621 int stacksave[REC_STACK_SAVE_MAX];
 622
 623 eptrblock newptrb;
 624
 625 /* There is a special fudge for calling match() in a way that causes it to
 626 measure the size of its basic stack frame when the stack is being used for
 627 recursion. The second argument (ecode) being NULL triggers this behaviour. It
 628 cannot normally ever be NULL. The return is the negated value of the frame
 629 size. */
 630
 631 if (ecode == NULL)
 632   {
 633   if (rdepth == 0)
 634     return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
 635   else
 636     {
 637     int len = (char *)&rdepth - (char *)eptr;
 638     return (len > 0)? -len : len;
 639     }
 640   }
 641 #endif     /* NO_RECURSE */
 642
 643 /* To save space on the stack and in the heap frame, I have doubled up on some
 644 of the local variables that are used only in localised parts of the code, but
 645 still need to be preserved over recursive calls of match(). These macros define
 646 the alternative names that are used. */
 647
 648 #define allow_zero    cur_is_word
 649 #define cbegroup      condition
 650 #define code_offset   codelink
 651 #define condassert    condition
 652 #define matched_once  prev_is_word
 653 #define foc           number
 654 #define save_mark     data
 655
 656 /* These statements are here to stop the compiler complaining about unitialized
 657 variables. */
 658
 659 #ifdef SUPPORT_UCP
 660 prop_value = 0;
 661 prop_fail_result = 0;
 662 #endif
 663
 664
 665 /* This label is used for tail recursion, which is used in a few cases even
 666 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
 667 used. Thanks to Ian Taylor for noticing this possibility and sending the
 668 original patch. */
 669
 670 TAIL_RECURSE:
 671
 672 /* OK, now we can get on with the real code of the function. Recursive calls
 673 are specified by the macro RMATCH and RRETURN is used to return. When
 674 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
 675 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
 676 defined). However, RMATCH isn't like a function call because it's quite a
 677 complicated macro. It has to be used in one particular way. This shouldn't,
 678 however, impact performance when true recursion is being used. */
 679
 680 #ifdef SUPPORT_UTF
 681 utf = md->utf;       /* Local copy of the flag */
 682 #else
 683 utf = FALSE;
 684 #endif
 685
 686 /* First check that we haven't called match() too many times, or that we
 687 haven't exceeded the recursive call limit. */
 688
 689 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
 690 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
 691
 692 /* At the start of a group with an unlimited repeat that may match an empty
 693 string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
 694 done this way to save having to use another function argument, which would take
 695 up space on the stack. See also MATCH_CONDASSERT below.
 696
 697 When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
 698 such remembered pointers, to be checked when we hit the closing ket, in order
 699 to break infinite loops that match no characters. When match() is called in
 700 other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
 701 NOT be used with tail recursion, because the memory block that is used is on
 702 the stack, so a new one may be required for each match(). */
 703
 704 if (md->match_function_type == MATCH_CBEGROUP)
 705   {
 706   newptrb.epb_saved_eptr = eptr;
 707   newptrb.epb_prev = eptrb;
 708   eptrb = &newptrb;
 709   md->match_function_type = 0;
 710   }
 711
 712 /* Now start processing the opcodes. */
 713
 714 for (;;)
 715   {
 716   minimize = possessive = FALSE;
 717   op = *ecode;
 718
 719   switch(op)
 720     {
 721     case OP_MARK:
 722     md->nomatch_mark = ecode + 2;
 723     md->mark = NULL;    /* In case previously set by assertion */
 724     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
 725       eptrb, RM55);
 726     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
 727          md->mark == NULL) md->mark = ecode + 2;
 728
 729     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
 730     argument, and we must check whether that argument matches this MARK's
 731     argument. It is passed back in md->start_match_ptr (an overloading of that
 732     variable). If it does match, we reset that variable to the current subject
 733     position and return MATCH_SKIP. Otherwise, pass back the return code
 734     unaltered. */
 735
 736     else if (rrc == MATCH_SKIP_ARG &&
 737         STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0)
 738       {
 739       md->start_match_ptr = eptr;
 740       RRETURN(MATCH_SKIP);
 741       }
 742     RRETURN(rrc);
 743
 744     case OP_FAIL:
 745     RRETURN(MATCH_NOMATCH);
 746
 747     /* COMMIT overrides PRUNE, SKIP, and THEN */
 748
 749     case OP_COMMIT:
 750     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
 751       eptrb, RM52);
 752     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
 753         rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
 754         rrc != MATCH_THEN)
 755       RRETURN(rrc);
 756     RRETURN(MATCH_COMMIT);
 757
 758     /* PRUNE overrides THEN */
 759
 760     case OP_PRUNE:
 761     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
 762       eptrb, RM51);
 763     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
 764     RRETURN(MATCH_PRUNE);
 765
 766     case OP_PRUNE_ARG:
 767     md->nomatch_mark = ecode + 2;
 768     md->mark = NULL;    /* In case previously set by assertion */
 769     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
 770       eptrb, RM56);
 771     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
 772          md->mark == NULL) md->mark = ecode + 2;
 773     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
 774     RRETURN(MATCH_PRUNE);
 775
 776     /* SKIP overrides PRUNE and THEN */
 777
 778     case OP_SKIP:
 779     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
 780       eptrb, RM53);
 781     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
 782       RRETURN(rrc);
 783     md->start_match_ptr = eptr;   /* Pass back current position */
 784     RRETURN(MATCH_SKIP);
 785
 786     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
 787     nomatch_mark. There is a flag that disables this opcode when re-matching a
 788     pattern that ended with a SKIP for which there was not a matching MARK. */
 789
 790     case OP_SKIP_ARG:
 791     if (md->ignore_skip_arg)
 792       {
 793       ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
 794       break;
 795       }
 796     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
 797       eptrb, RM57);
 798     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
 799       RRETURN(rrc);
 800
 801     /* Pass back the current skip name by overloading md->start_match_ptr and
 802     returning the special MATCH_SKIP_ARG return code. This will either be
 803     caught by a matching MARK, or get to the top, where it causes a rematch
 804     with the md->ignore_skip_arg flag set. */
 805
 806     md->start_match_ptr = ecode + 2;
 807     RRETURN(MATCH_SKIP_ARG);
 808
 809     /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
 810     the branch in which it occurs can be determined. Overload the start of
 811     match pointer to do this. */
 812
 813     case OP_THEN:
 814     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
 815       eptrb, RM54);
 816     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 817     md->start_match_ptr = ecode;
 818     RRETURN(MATCH_THEN);
 819
 820     case OP_THEN_ARG:
 821     md->nomatch_mark = ecode + 2;
 822     md->mark = NULL;    /* In case previously set by assertion */
 823     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
 824       md, eptrb, RM58);
 825     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
 826          md->mark == NULL) md->mark = ecode + 2;
 827     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 828     md->start_match_ptr = ecode;
 829     RRETURN(MATCH_THEN);
 830
 831     /* Handle an atomic group that does not contain any capturing parentheses.
 832     This can be handled like an assertion. Prior to 8.13, all atomic groups
 833     were handled this way. In 8.13, the code was changed as below for ONCE, so
 834     that backups pass through the group and thereby reset captured values.
 835     However, this uses a lot more stack, so in 8.20, atomic groups that do not
 836     contain any captures generate OP_ONCE_NC, which can be handled in the old,
 837     less stack intensive way.
 838
 839     Check the alternative branches in turn - the matching won't pass the KET
 840     for this kind of subpattern. If any one branch matches, we carry on as at
 841     the end of a normal bracket, leaving the subject pointer, but resetting
 842     the start-of-match value in case it was changed by \K. */
 843
 844     case OP_ONCE_NC:
 845     prev = ecode;
 846     saved_eptr = eptr;
 847     save_mark = md->mark;
 848     do
 849       {
 850       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
 851       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
 852         {
 853         mstart = md->start_match_ptr;
 854         break;
 855         }
 856       if (rrc == MATCH_THEN)
 857         {
 858         next = ecode + GET(ecode,1);
 859         if (md->start_match_ptr < next &&
 860             (*ecode == OP_ALT || *next == OP_ALT))
 861           rrc = MATCH_NOMATCH;
 862         }
 863
 864       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 865       ecode += GET(ecode,1);
 866       md->mark = save_mark;
 867       }
 868     while (*ecode == OP_ALT);
 869
 870     /* If hit the end of the group (which could be repeated), fail */
 871
 872     if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
 873
 874     /* Continue as from after the group, updating the offsets high water
 875     mark, since extracts may have been taken. */
 876
 877     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
 878
 879     offset_top = md->end_offset_top;
 880     eptr = md->end_match_ptr;
 881
 882     /* For a non-repeating ket, just continue at this level. This also
 883     happens for a repeating ket if no characters were matched in the group.
 884     This is the forcible breaking of infinite loops as implemented in Perl
 885     5.005. */
 886
 887     if (*ecode == OP_KET || eptr == saved_eptr)
 888       {
 889       ecode += 1+LINK_SIZE;
 890       break;
 891       }
 892
 893     /* The repeating kets try the rest of the pattern or restart from the
 894     preceding bracket, in the appropriate order. The second "call" of match()
 895     uses tail recursion, to avoid using another stack frame. */
 896
 897     if (*ecode == OP_KETRMIN)
 898       {
 899       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
 900       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 901       ecode = prev;
 902       goto TAIL_RECURSE;
 903       }
 904     else  /* OP_KETRMAX */
 905       {
 906       RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
 907       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 908       ecode += 1 + LINK_SIZE;
 909       goto TAIL_RECURSE;
 910       }
 911     /* Control never gets here */
 912
 913     /* Handle a capturing bracket, other than those that are possessive with an
 914     unlimited repeat. If there is space in the offset vector, save the current
 915     subject position in the working slot at the top of the vector. We mustn't
 916     change the current values of the data slot, because they may be set from a
 917     previous iteration of this group, and be referred to by a reference inside
 918     the group. A failure to match might occur after the group has succeeded,
 919     if something later on doesn't match. For this reason, we need to restore
 920     the working value and also the values of the final offsets, in case they
 921     were set by a previous iteration of the same bracket.
 922
 923     If there isn't enough space in the offset vector, treat this as if it were
 924     a non-capturing bracket. Don't worry about setting the flag for the error
 925     case here; that is handled in the code for KET. */
 926
 927     case OP_CBRA:
 928     case OP_SCBRA:
 929     number = GET2(ecode, 1+LINK_SIZE);
 930     offset = number << 1;
 931
 932 #ifdef PCRE_DEBUG
 933     printf("start bracket %d\n", number);
 934     printf("subject=");
 935     pchars(eptr, 16, TRUE, md);
 936     printf("\n");
 937 #endif
 938
 939     if (offset < md->offset_max)
 940       {
 941       save_offset1 = md->offset_vector[offset];
 942       save_offset2 = md->offset_vector[offset+1];
 943       save_offset3 = md->offset_vector[md->offset_end - number];
 944       save_capture_last = md->capture_last;
 945       save_mark = md->mark;
 946
 947       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
 948       md->offset_vector[md->offset_end - number] =
 949         (int)(eptr - md->start_subject);
 950
 951       for (;;)
 952         {
 953         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
 954         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
 955           eptrb, RM1);
 956         if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
 957
 958         /* If we backed up to a THEN, check whether it is within the current
 959         branch by comparing the address of the THEN that is passed back with
 960         the end of the branch. If it is within the current branch, and the
 961         branch is one of two or more alternatives (it either starts or ends
 962         with OP_ALT), we have reached the limit of THEN's action, so convert
 963         the return code to NOMATCH, which will cause normal backtracking to
 964         happen from now on. Otherwise, THEN is passed back to an outer
 965         alternative. This implements Perl's treatment of parenthesized groups,
 966         where a group not containing | does not affect the current alternative,
 967         that is, (X) is NOT the same as (X|(*F)). */
 968
 969         if (rrc == MATCH_THEN)
 970           {
 971           next = ecode + GET(ecode,1);
 972           if (md->start_match_ptr < next &&
 973               (*ecode == OP_ALT || *next == OP_ALT))
 974             rrc = MATCH_NOMATCH;
 975           }
 976
 977         /* Anything other than NOMATCH is passed back. */
 978
 979         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 980         md->capture_last = save_capture_last;
 981         ecode += GET(ecode, 1);
 982         md->mark = save_mark;
 983         if (*ecode != OP_ALT) break;
 984         }
 985
 986       DPRINTF(("bracket %d failed\n", number));
 987       md->offset_vector[offset] = save_offset1;
 988       md->offset_vector[offset+1] = save_offset2;
 989       md->offset_vector[md->offset_end - number] = save_offset3;
 990
 991       /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
 992
 993       RRETURN(rrc);
 994       }
 995
 996     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
 997     as a non-capturing bracket. */
 998
 999     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1000     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1001
1002     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1003
1004     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1005     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1006
1007     /* Non-capturing or atomic group, except for possessive with unlimited
1008     repeat and ONCE group with no captures. Loop for all the alternatives.
1009
1010     When we get to the final alternative within the brackets, we used to return
1011     the result of a recursive call to match() whatever happened so it was
1012     possible to reduce stack usage by turning this into a tail recursion,
1013     except in the case of a possibly empty group. However, now that there is
1014     the possiblity of (*THEN) occurring in the final alternative, this
1015     optimization is no longer always possible.
1016
1017     We can optimize if we know there are no (*THEN)s in the pattern; at present
1018     this is the best that can be done.
1019
1020     MATCH_ONCE is returned when the end of an atomic group is successfully
1021     reached, but subsequent matching fails. It passes back up the tree (causing
1022     captured values to be reset) until the original atomic group level is
1023     reached. This is tested by comparing md->once_target with the start of the
1024     group. At this point, the return is converted into MATCH_NOMATCH so that
1025     previous backup points can be taken. */
1026
1027     case OP_ONCE:
1028     case OP_BRA:
1029     case OP_SBRA:
1030     DPRINTF(("start non-capturing bracket\n"));
1031
1032     for (;;)
1033       {
1034       if (op >= OP_SBRA || op == OP_ONCE)
1035         md->match_function_type = MATCH_CBEGROUP;
1036
1037       /* If this is not a possibly empty group, and there are no (*THEN)s in
1038       the pattern, and this is the final alternative, optimize as described
1039       above. */
1040
1041       else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1042         {
1043         ecode += PRIV(OP_lengths)[*ecode];
1044         goto TAIL_RECURSE;
1045         }
1046
1047       /* In all other cases, we have to make another call to match(). */
1048
1049       save_mark = md->mark;
1050       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1051         RM2);
1052
1053       /* See comment in the code for capturing groups above about handling
1054       THEN. */
1055
1056       if (rrc == MATCH_THEN)
1057         {
1058         next = ecode + GET(ecode,1);
1059         if (md->start_match_ptr < next &&
1060             (*ecode == OP_ALT || *next == OP_ALT))
1061           rrc = MATCH_NOMATCH;
1062         }
1063
1064       if (rrc != MATCH_NOMATCH)
1065         {
1066         if (rrc == MATCH_ONCE)
1067           {
1068           const pcre_uchar *scode = ecode;
1069           if (*scode != OP_ONCE)           /* If not at start, find it */
1070             {
1071             while (*scode == OP_ALT) scode += GET(scode, 1);
1072             scode -= GET(scode, 1);
1073             }
1074           if (md->once_target == scode) rrc = MATCH_NOMATCH;
1075           }
1076         RRETURN(rrc);
1077         }
1078       ecode += GET(ecode, 1);
1079       md->mark = save_mark;
1080       if (*ecode != OP_ALT) break;
1081       }
1082
1083     RRETURN(MATCH_NOMATCH);
1084
1085     /* Handle possessive capturing brackets with an unlimited repeat. We come
1086     here from BRAZERO with allow_zero set TRUE. The offset_vector values are
1087     handled similarly to the normal case above. However, the matching is
1088     different. The end of these brackets will always be OP_KETRPOS, which
1089     returns MATCH_KETRPOS without going further in the pattern. By this means
1090     we can handle the group by iteration rather than recursion, thereby
1091     reducing the amount of stack needed. */
1092
1093     case OP_CBRAPOS:
1094     case OP_SCBRAPOS:
1095     allow_zero = FALSE;
1096
1097     POSSESSIVE_CAPTURE:
1098     number = GET2(ecode, 1+LINK_SIZE);
1099     offset = number << 1;
1100
1101 #ifdef PCRE_DEBUG
1102     printf("start possessive bracket %d\n", number);
1103     printf("subject=");
1104     pchars(eptr, 16, TRUE, md);
1105     printf("\n");
1106 #endif
1107
1108     if (offset < md->offset_max)
1109       {
1110       matched_once = FALSE;
1111       code_offset = (int)(ecode - md->start_code);
1112
1113       save_offset1 = md->offset_vector[offset];
1114       save_offset2 = md->offset_vector[offset+1];
1115       save_offset3 = md->offset_vector[md->offset_end - number];
1116       save_capture_last = md->capture_last;
1117
1118       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1119
1120       /* Each time round the loop, save the current subject position for use
1121       when the group matches. For MATCH_MATCH, the group has matched, so we
1122       restart it with a new subject starting position, remembering that we had
1123       at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
1124       usual. If we haven't matched any alternatives in any iteration, check to
1125       see if a previous iteration matched. If so, the group has matched;
1126       continue from afterwards. Otherwise it has failed; restore the previous
1127       capture values before returning NOMATCH. */
1128
1129       for (;;)
1130         {
1131         md->offset_vector[md->offset_end - number] =
1132           (int)(eptr - md->start_subject);
1133         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1134         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1135           eptrb, RM63);
1136         if (rrc == MATCH_KETRPOS)
1137           {
1138           offset_top = md->end_offset_top;
1139           eptr = md->end_match_ptr;
1140           ecode = md->start_code + code_offset;
1141           save_capture_last = md->capture_last;
1142           matched_once = TRUE;
1143           continue;
1144           }
1145
1146         /* See comment in the code for capturing groups above about handling
1147         THEN. */
1148
1149         if (rrc == MATCH_THEN)
1150           {
1151           next = ecode + GET(ecode,1);
1152           if (md->start_match_ptr < next &&
1153               (*ecode == OP_ALT || *next == OP_ALT))
1154             rrc = MATCH_NOMATCH;
1155           }
1156
1157         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1158         md->capture_last = save_capture_last;
1159         ecode += GET(ecode, 1);
1160         if (*ecode != OP_ALT) break;
1161         }
1162
1163       if (!matched_once)
1164         {
1165         md->offset_vector[offset] = save_offset1;
1166         md->offset_vector[offset+1] = save_offset2;
1167         md->offset_vector[md->offset_end - number] = save_offset3;
1168         }
1169
1170       if (allow_zero || matched_once)
1171         {
1172         ecode += 1 + LINK_SIZE;
1173         break;
1174         }
1175
1176       RRETURN(MATCH_NOMATCH);
1177       }
1178
1179     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1180     as a non-capturing bracket. */
1181
1182     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1183     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1184
1185     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1186
1187     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1188     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1189
1190     /* Non-capturing possessive bracket with unlimited repeat. We come here
1191     from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1192     without the capturing complication. It is written out separately for speed
1193     and cleanliness. */
1194
1195     case OP_BRAPOS:
1196     case OP_SBRAPOS:
1197     allow_zero = FALSE;
1198
1199     POSSESSIVE_NON_CAPTURE:
1200     matched_once = FALSE;
1201     code_offset = (int)(ecode - md->start_code);
1202
1203     for (;;)
1204       {
1205       if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1206       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1207         eptrb, RM48);
1208       if (rrc == MATCH_KETRPOS)
1209         {
1210         offset_top = md->end_offset_top;
1211         eptr = md->end_match_ptr;
1212         ecode = md->start_code + code_offset;
1213         matched_once = TRUE;
1214         continue;
1215         }
1216
1217       /* See comment in the code for capturing groups above about handling
1218       THEN. */
1219
1220       if (rrc == MATCH_THEN)
1221         {
1222         next = ecode + GET(ecode,1);
1223         if (md->start_match_ptr < next &&
1224             (*ecode == OP_ALT || *next == OP_ALT))
1225           rrc = MATCH_NOMATCH;
1226         }
1227
1228       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1229       ecode += GET(ecode, 1);
1230       if (*ecode != OP_ALT) break;
1231       }
1232
1233     if (matched_once || allow_zero)
1234       {
1235       ecode += 1 + LINK_SIZE;
1236       break;
1237       }
1238     RRETURN(MATCH_NOMATCH);
1239
1240     /* Control never reaches here. */
1241
1242     /* Conditional group: compilation checked that there are no more than
1243     two branches. If the condition is false, skipping the first branch takes us
1244     past the end if there is only one branch, but that's OK because that is
1245     exactly what going to the ket would do. */
1246
1247     case OP_COND:
1248     case OP_SCOND:
1249     codelink = GET(ecode, 1);
1250
1251     /* Because of the way auto-callout works during compile, a callout item is
1252     inserted between OP_COND and an assertion condition. */
1253
1254     if (ecode[LINK_SIZE+1] == OP_CALLOUT)
1255       {
1256       if (PUBL(callout) != NULL)
1257         {
1258         PUBL(callout_block) cb;
1259         cb.version          = 2;   /* Version 1 of the callout block */
1260         cb.callout_number   = ecode[LINK_SIZE+2];
1261         cb.offset_vector    = md->offset_vector;
1262 #ifdef COMPILE_PCRE8
1263         cb.subject          = (PCRE_SPTR)md->start_subject;
1264 #else
1265         cb.subject          = (PCRE_SPTR16)md->start_subject;
1266 #endif
1267         cb.subject_length   = (int)(md->end_subject - md->start_subject);
1268         cb.start_match      = (int)(mstart - md->start_subject);
1269         cb.current_position = (int)(eptr - md->start_subject);
1270         cb.pattern_position = GET(ecode, LINK_SIZE + 3);
1271         cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
1272         cb.capture_top      = offset_top/2;
1273         cb.capture_last     = md->capture_last;
1274         cb.callout_data     = md->callout_data;
1275         cb.mark             = md->nomatch_mark;
1276         if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1277         if (rrc < 0) RRETURN(rrc);
1278         }
1279       ecode += PRIV(OP_lengths)[OP_CALLOUT];
1280       }
1281
1282     condcode = ecode[LINK_SIZE+1];
1283
1284     /* Now see what the actual condition is */
1285
1286     if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
1287       {
1288       if (md->recursive == NULL)                /* Not recursing => FALSE */
1289         {
1290         condition = FALSE;
1291         ecode += GET(ecode, 1);
1292         }
1293       else
1294         {
1295         int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
1296         condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1297
1298         /* If the test is for recursion into a specific subpattern, and it is
1299         false, but the test was set up by name, scan the table to see if the
1300         name refers to any other numbers, and test them. The condition is true
1301         if any one is set. */
1302
1303         if (!condition && condcode == OP_NRREF)
1304           {
1305           pcre_uchar *slotA = md->name_table;
1306           for (i = 0; i < md->name_count; i++)
1307             {
1308             if (GET2(slotA, 0) == recno) break;
1309             slotA += md->name_entry_size;
1310             }
1311
1312           /* Found a name for the number - there can be only one; duplicate
1313           names for different numbers are allowed, but not vice versa. First
1314           scan down for duplicates. */
1315
1316           if (i < md->name_count)
1317             {
1318             pcre_uchar *slotB = slotA;
1319             while (slotB > md->name_table)
1320               {
1321               slotB -= md->name_entry_size;
1322               if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1323                 {
1324                 condition = GET2(slotB, 0) == md->recursive->group_num;
1325                 if (condition) break;
1326                 }
1327               else break;
1328               }
1329
1330             /* Scan up for duplicates */
1331
1332             if (!condition)
1333               {
1334               slotB = slotA;
1335               for (i++; i < md->name_count; i++)
1336                 {
1337                 slotB += md->name_entry_size;
1338                 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1339                   {
1340                   condition = GET2(slotB, 0) == md->recursive->group_num;
1341                   if (condition) break;
1342                   }
1343                 else break;
1344                 }
1345               }
1346             }
1347           }
1348
1349         /* Chose branch according to the condition */
1350
1351         ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1352         }
1353       }
1354
1355     else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
1356       {
1357       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
1358       condition = offset < offset_top && md->offset_vector[offset] >= 0;
1359
1360       /* If the numbered capture is unset, but the reference was by name,
1361       scan the table to see if the name refers to any other numbers, and test
1362       them. The condition is true if any one is set. This is tediously similar
1363       to the code above, but not close enough to try to amalgamate. */
1364
1365       if (!condition && condcode == OP_NCREF)
1366         {
1367         int refno = offset >> 1;
1368         pcre_uchar *slotA = md->name_table;
1369
1370         for (i = 0; i < md->name_count; i++)
1371           {
1372           if (GET2(slotA, 0) == refno) break;
1373           slotA += md->name_entry_size;
1374           }
1375
1376         /* Found a name for the number - there can be only one; duplicate names
1377         for different numbers are allowed, but not vice versa. First scan down
1378         for duplicates. */
1379
1380         if (i < md->name_count)
1381           {
1382           pcre_uchar *slotB = slotA;
1383           while (slotB > md->name_table)
1384             {
1385             slotB -= md->name_entry_size;
1386             if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1387               {
1388               offset = GET2(slotB, 0) << 1;
1389               condition = offset < offset_top &&
1390                 md->offset_vector[offset] >= 0;
1391               if (condition) break;
1392               }
1393             else break;
1394             }
1395
1396           /* Scan up for duplicates */
1397
1398           if (!condition)
1399             {
1400             slotB = slotA;
1401             for (i++; i < md->name_count; i++)
1402               {
1403               slotB += md->name_entry_size;
1404               if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1405                 {
1406                 offset = GET2(slotB, 0) << 1;
1407                 condition = offset < offset_top &&
1408                   md->offset_vector[offset] >= 0;
1409                 if (condition) break;
1410                 }
1411               else break;
1412               }
1413             }
1414           }
1415         }
1416
1417       /* Chose branch according to the condition */
1418
1419       ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1420       }
1421
1422     else if (condcode == OP_DEF)     /* DEFINE - always false */
1423       {
1424       condition = FALSE;
1425       ecode += GET(ecode, 1);
1426       }
1427
1428     /* The condition is an assertion. Call match() to evaluate it - setting
1429     md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of
1430     an assertion. */
1431
1432     else
1433       {
1434       md->match_function_type = MATCH_CONDASSERT;
1435       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
1436       if (rrc == MATCH_MATCH)
1437         {
1438         if (md->end_offset_top > offset_top)
1439           offset_top = md->end_offset_top;  /* Captures may have happened */
1440         condition = TRUE;
1441         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1442         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1443         }
1444
1445       /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1446       assertion; it is therefore treated as NOMATCH. */
1447
1448       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1449         {
1450         RRETURN(rrc);         /* Need braces because of following else */
1451         }
1452       else
1453         {
1454         condition = FALSE;
1455         ecode += codelink;
1456         }
1457       }
1458
1459     /* We are now at the branch that is to be obeyed. As there is only one, can
1460     use tail recursion to avoid using another stack frame, except when there is
1461     unlimited repeat of a possibly empty group. In the latter case, a recursive
1462     call to match() is always required, unless the second alternative doesn't
1463     exist, in which case we can just plough on. Note that, for compatibility
1464     with Perl, the | in a conditional group is NOT treated as creating two
1465     alternatives. If a THEN is encountered in the branch, it propagates out to
1466     the enclosing alternative (unless nested in a deeper set of alternatives,
1467     of course). */
1468
1469     if (condition || *ecode == OP_ALT)
1470       {
1471       if (op != OP_SCOND)
1472         {
1473         ecode += 1 + LINK_SIZE;
1474         goto TAIL_RECURSE;
1475         }
1476
1477       md->match_function_type = MATCH_CBEGROUP;
1478       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
1479       RRETURN(rrc);
1480       }
1481
1482      /* Condition false & no alternative; continue after the group. */
1483
1484     else
1485       {
1486       ecode += 1 + LINK_SIZE;
1487       }
1488     break;
1489
1490
1491     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1492     to close any currently open capturing brackets. */
1493
1494     case OP_CLOSE:
1495     number = GET2(ecode, 1);
1496     offset = number << 1;
1497
1498 #ifdef PCRE_DEBUG
1499       printf("end bracket %d at *ACCEPT", number);
1500       printf("\n");
1501 #endif
1502
1503     md->capture_last = number;
1504     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1505       {
1506       md->offset_vector[offset] =
1507         md->offset_vector[md->offset_end - number];
1508       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1509       if (offset_top <= offset) offset_top = offset + 2;
1510       }
1511     ecode += 1 + IMM2_SIZE;
1512     break;
1513
1514
1515     /* End of the pattern, either real or forced. */
1516
1517     case OP_END:
1518     case OP_ACCEPT:
1519     case OP_ASSERT_ACCEPT:
1520
1521     /* If we have matched an empty string, fail if not in an assertion and not
1522     in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1523     is set and we have matched at the start of the subject. In both cases,
1524     backtracking will then try other alternatives, if any. */
1525
1526     if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1527          md->recursive == NULL &&
1528          (md->notempty ||
1529            (md->notempty_atstart &&
1530              mstart == md->start_subject + md->start_offset)))
1531       RRETURN(MATCH_NOMATCH);
1532
1533     /* Otherwise, we have a match. */
1534
1535     md->end_match_ptr = eptr;           /* Record where we ended */
1536     md->end_offset_top = offset_top;    /* and how many extracts were taken */
1537     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1538
1539     /* For some reason, the macros don't work properly if an expression is
1540     given as the argument to RRETURN when the heap is in use. */
1541
1542     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1543     RRETURN(rrc);
1544
1545     /* Assertion brackets. Check the alternative branches in turn - the
1546     matching won't pass the KET for an assertion. If any one branch matches,
1547     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1548     start of each branch to move the current point backwards, so the code at
1549     this level is identical to the lookahead case. When the assertion is part
1550     of a condition, we want to return immediately afterwards. The caller of
1551     this incarnation of the match() function will have set MATCH_CONDASSERT in
1552     md->match_function type, and one of these opcodes will be the first opcode
1553     that is processed. We use a local variable that is preserved over calls to
1554     match() to remember this case. */
1555
1556     case OP_ASSERT:
1557     case OP_ASSERTBACK:
1558     save_mark = md->mark;
1559     if (md->match_function_type == MATCH_CONDASSERT)
1560       {
1561       condassert = TRUE;
1562       md->match_function_type = 0;
1563       }
1564     else condassert = FALSE;
1565
1566     do
1567       {
1568       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1569       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1570         {
1571         mstart = md->start_match_ptr;   /* In case \K reset it */
1572         break;
1573         }
1574       md->mark = save_mark;
1575
1576       /* A COMMIT failure must fail the entire assertion, without trying any
1577       subsequent branches. */
1578
1579       if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH);
1580
1581       /* PCRE does not allow THEN to escape beyond an assertion; it
1582       is treated as NOMATCH. */
1583
1584       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1585       ecode += GET(ecode, 1);
1586       }
1587     while (*ecode == OP_ALT);
1588
1589     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1590
1591     /* If checking an assertion for a condition, return MATCH_MATCH. */
1592
1593     if (condassert) RRETURN(MATCH_MATCH);
1594
1595     /* Continue from after the assertion, updating the offsets high water
1596     mark, since extracts may have been taken during the assertion. */
1597
1598     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1599     ecode += 1 + LINK_SIZE;
1600     offset_top = md->end_offset_top;
1601     continue;
1602
1603     /* Negative assertion: all branches must fail to match. Encountering SKIP,
1604     PRUNE, or COMMIT means we must assume failure without checking subsequent
1605     branches. */
1606
1607     case OP_ASSERT_NOT:
1608     case OP_ASSERTBACK_NOT:
1609     save_mark = md->mark;
1610     if (md->match_function_type == MATCH_CONDASSERT)
1611       {
1612       condassert = TRUE;
1613       md->match_function_type = 0;
1614       }
1615     else condassert = FALSE;
1616
1617     do
1618       {
1619       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1620       md->mark = save_mark;
1621       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1622       if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1623         {
1624         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1625         break;
1626         }
1627
1628       /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1629       as NOMATCH. */
1630
1631       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1632       ecode += GET(ecode,1);
1633       }
1634     while (*ecode == OP_ALT);
1635
1636     if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1637
1638     ecode += 1 + LINK_SIZE;
1639     continue;
1640
1641     /* Move the subject pointer back. This occurs only at the start of
1642     each branch of a lookbehind assertion. If we are too close to the start to
1643     move back, this match function fails. When working with UTF-8 we move
1644     back a number of characters, not bytes. */
1645
1646     case OP_REVERSE:
1647 #ifdef SUPPORT_UTF
1648     if (utf)
1649       {
1650       i = GET(ecode, 1);
1651       while (i-- > 0)
1652         {
1653         eptr--;
1654         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1655         BACKCHAR(eptr);
1656         }
1657       }
1658     else
1659 #endif
1660
1661     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1662
1663       {
1664       eptr -= GET(ecode, 1);
1665       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1666       }
1667
1668     /* Save the earliest consulted character, then skip to next op code */
1669
1670     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1671     ecode += 1 + LINK_SIZE;
1672     break;
1673
1674     /* The callout item calls an external function, if one is provided, passing
1675     details of the match so far. This is mainly for debugging, though the
1676     function is able to force a failure. */
1677
1678     case OP_CALLOUT:
1679     if (PUBL(callout) != NULL)
1680       {
1681       PUBL(callout_block) cb;
1682       cb.version          = 2;   /* Version 1 of the callout block */
1683       cb.callout_number   = ecode[1];
1684       cb.offset_vector    = md->offset_vector;
1685 #ifdef COMPILE_PCRE8
1686       cb.subject          = (PCRE_SPTR)md->start_subject;
1687 #else
1688       cb.subject          = (PCRE_SPTR16)md->start_subject;
1689 #endif
1690       cb.subject_length   = (int)(md->end_subject - md->start_subject);
1691       cb.start_match      = (int)(mstart - md->start_subject);
1692       cb.current_position = (int)(eptr - md->start_subject);
1693       cb.pattern_position = GET(ecode, 2);
1694       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1695       cb.capture_top      = offset_top/2;
1696       cb.capture_last     = md->capture_last;
1697       cb.callout_data     = md->callout_data;
1698       cb.mark             = md->nomatch_mark;
1699       if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1700       if (rrc < 0) RRETURN(rrc);
1701       }
1702     ecode += 2 + 2*LINK_SIZE;
1703     break;
1704
1705     /* Recursion either matches the current regex, or some subexpression. The
1706     offset data is the offset to the starting bracket from the start of the
1707     whole pattern. (This is so that it works from duplicated subpatterns.)
1708
1709     The state of the capturing groups is preserved over recursion, and
1710     re-instated afterwards. We don't know how many are started and not yet
1711     finished (offset_top records the completed total) so we just have to save
1712     all the potential data. There may be up to 65535 such values, which is too
1713     large to put on the stack, but using malloc for small numbers seems
1714     expensive. As a compromise, the stack is used when there are no more than
1715     REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
1716
1717     There are also other values that have to be saved. We use a chained
1718     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1719     for the original version of this logic. It has, however, been hacked around
1720     a lot, so he is not to blame for the current way it works. */
1721
1722     case OP_RECURSE:
1723       {
1724       recursion_info *ri;
1725       int recno;
1726
1727       callpat = md->start_code + GET(ecode, 1);
1728       recno = (callpat == md->start_code)? 0 :
1729         GET2(callpat, 1 + LINK_SIZE);
1730
1731       /* Check for repeating a recursion without advancing the subject pointer.
1732       This should catch convoluted mutual recursions. (Some simple cases are
1733       caught at compile time.) */
1734
1735       for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1736         if (recno == ri->group_num && eptr == ri->subject_position)
1737           RRETURN(PCRE_ERROR_RECURSELOOP);
1738
1739       /* Add to "recursing stack" */
1740
1741       new_recursive.group_num = recno;
1742       new_recursive.subject_position = eptr;
1743       new_recursive.prevrec = md->recursive;
1744       md->recursive = &new_recursive;
1745
1746       /* Where to continue from afterwards */
1747
1748       ecode += 1 + LINK_SIZE;
1749
1750       /* Now save the offset data */
1751
1752       new_recursive.saved_max = md->offset_end;
1753       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1754         new_recursive.offset_save = stacksave;
1755       else
1756         {
1757         new_recursive.offset_save =
1758           (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1759         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1760         }
1761       memcpy(new_recursive.offset_save, md->offset_vector,
1762             new_recursive.saved_max * sizeof(int));
1763
1764       /* OK, now we can do the recursion. After processing each alternative,
1765       restore the offset data. If there were nested recursions, md->recursive
1766       might be changed, so reset it before looping. */
1767
1768       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1769       cbegroup = (*callpat >= OP_SBRA);
1770       do
1771         {
1772         if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1773         RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1774           md, eptrb, RM6);
1775         memcpy(md->offset_vector, new_recursive.offset_save,
1776             new_recursive.saved_max * sizeof(int));
1777         md->recursive = new_recursive.prevrec;
1778         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1779           {
1780           DPRINTF(("Recursion matched\n"));
1781           if (new_recursive.offset_save != stacksave)
1782             (PUBL(free))(new_recursive.offset_save);
1783
1784           /* Set where we got to in the subject, and reset the start in case
1785           it was changed by \K. This *is* propagated back out of a recursion,
1786           for Perl compatibility. */
1787
1788           eptr = md->end_match_ptr;
1789           mstart = md->start_match_ptr;
1790           goto RECURSION_MATCHED;        /* Exit loop; end processing */
1791           }
1792
1793         /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it
1794         is treated as NOMATCH. */
1795
1796         else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&
1797                  rrc != MATCH_COMMIT)
1798           {
1799           DPRINTF(("Recursion gave error %d\n", rrc));
1800           if (new_recursive.offset_save != stacksave)
1801             (PUBL(free))(new_recursive.offset_save);
1802           RRETURN(rrc);
1803           }
1804
1805         md->recursive = &new_recursive;
1806         callpat += GET(callpat, 1);
1807         }
1808       while (*callpat == OP_ALT);
1809
1810       DPRINTF(("Recursion didn't match\n"));
1811       md->recursive = new_recursive.prevrec;
1812       if (new_recursive.offset_save != stacksave)
1813         (PUBL(free))(new_recursive.offset_save);
1814       RRETURN(MATCH_NOMATCH);
1815       }
1816
1817     RECURSION_MATCHED:
1818     break;
1819
1820     /* An alternation is the end of a branch; scan along to find the end of the
1821     bracketed group and go to there. */
1822
1823     case OP_ALT:
1824     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1825     break;
1826
1827     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1828     indicating that it may occur zero times. It may repeat infinitely, or not
1829     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1830     with fixed upper repeat limits are compiled as a number of copies, with the
1831     optional ones preceded by BRAZERO or BRAMINZERO. */
1832
1833     case OP_BRAZERO:
1834     next = ecode + 1;
1835     RMATCH(eptr, next, offset_top, md, eptrb, RM10);
1836     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1837     do next += GET(next, 1); while (*next == OP_ALT);
1838     ecode = next + 1 + LINK_SIZE;
1839     break;
1840
1841     case OP_BRAMINZERO:
1842     next = ecode + 1;
1843     do next += GET(next, 1); while (*next == OP_ALT);
1844     RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
1845     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1846     ecode++;
1847     break;
1848
1849     case OP_SKIPZERO:
1850     next = ecode+1;
1851     do next += GET(next,1); while (*next == OP_ALT);
1852     ecode = next + 1 + LINK_SIZE;
1853     break;
1854
1855     /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
1856     here; just jump to the group, with allow_zero set TRUE. */
1857
1858     case OP_BRAPOSZERO:
1859     op = *(++ecode);
1860     allow_zero = TRUE;
1861     if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
1862       goto POSSESSIVE_NON_CAPTURE;
1863
1864     /* End of a group, repeated or non-repeating. */
1865
1866     case OP_KET:
1867     case OP_KETRMIN:
1868     case OP_KETRMAX:
1869     case OP_KETRPOS:
1870     prev = ecode - GET(ecode, 1);
1871
1872     /* If this was a group that remembered the subject start, in order to break
1873     infinite repeats of empty string matches, retrieve the subject start from
1874     the chain. Otherwise, set it NULL. */
1875
1876     if (*prev >= OP_SBRA || *prev == OP_ONCE)
1877       {
1878       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1879       eptrb = eptrb->epb_prev;              /* Backup to previous group */
1880       }
1881     else saved_eptr = NULL;
1882
1883     /* If we are at the end of an assertion group or a non-capturing atomic
1884     group, stop matching and return MATCH_MATCH, but record the current high
1885     water mark for use by positive assertions. We also need to record the match
1886     start in case it was changed by \K. */
1887
1888     if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1889          *prev == OP_ONCE_NC)
1890       {
1891       md->end_match_ptr = eptr;      /* For ONCE_NC */
1892       md->end_offset_top = offset_top;
1893       md->start_match_ptr = mstart;
1894       RRETURN(MATCH_MATCH);         /* Sets md->mark */
1895       }
1896
1897     /* For capturing groups we have to check the group number back at the start
1898     and if necessary complete handling an extraction by setting the offsets and
1899     bumping the high water mark. Whole-pattern recursion is coded as a recurse
1900     into group 0, so it won't be picked up here. Instead, we catch it when the
1901     OP_END is reached. Other recursion is handled here. We just have to record
1902     the current subject position and start match pointer and give a MATCH
1903     return. */
1904
1905     if (*prev == OP_CBRA || *prev == OP_SCBRA ||
1906         *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
1907       {
1908       number = GET2(prev, 1+LINK_SIZE);
1909       offset = number << 1;
1910
1911 #ifdef PCRE_DEBUG
1912       printf("end bracket %d", number);
1913       printf("\n");
1914 #endif
1915
1916       /* Handle a recursively called group. */
1917
1918       if (md->recursive != NULL && md->recursive->group_num == number)
1919         {
1920         md->end_match_ptr = eptr;
1921         md->start_match_ptr = mstart;
1922         RRETURN(MATCH_MATCH);
1923         }
1924
1925       /* Deal with capturing */
1926
1927       md->capture_last = number;
1928       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1929         {
1930         /* If offset is greater than offset_top, it means that we are
1931         "skipping" a capturing group, and that group's offsets must be marked
1932         unset. In earlier versions of PCRE, all the offsets were unset at the
1933         start of matching, but this doesn't work because atomic groups and
1934         assertions can cause a value to be set that should later be unset.
1935         Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
1936         part of the atomic group, but this is not on the final matching path,
1937         so must be unset when 2 is set. (If there is no group 2, there is no
1938         problem, because offset_top will then be 2, indicating no capture.) */
1939
1940         if (offset > offset_top)
1941           {
1942           int *iptr = md->offset_vector + offset_top;
1943           int *iend = md->offset_vector + offset;
1944           while (iptr < iend) *iptr++ = -1;
1945           }
1946
1947         /* Now make the extraction */
1948
1949         md->offset_vector[offset] =
1950           md->offset_vector[md->offset_end - number];
1951         md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1952         if (offset_top <= offset) offset_top = offset + 2;
1953         }
1954       }
1955
1956     /* For an ordinary non-repeating ket, just continue at this level. This
1957     also happens for a repeating ket if no characters were matched in the
1958     group. This is the forcible breaking of infinite loops as implemented in
1959     Perl 5.005. For a non-repeating atomic group that includes captures,
1960     establish a backup point by processing the rest of the pattern at a lower
1961     level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
1962     original OP_ONCE level, thereby bypassing intermediate backup points, but
1963     resetting any captures that happened along the way. */
1964
1965     if (*ecode == OP_KET || eptr == saved_eptr)
1966       {
1967       if (*prev == OP_ONCE)
1968         {
1969         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
1970         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1971         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
1972         RRETURN(MATCH_ONCE);
1973         }
1974       ecode += 1 + LINK_SIZE;    /* Carry on at this level */
1975       break;
1976       }
1977
1978     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
1979     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
1980     at a time from the outer level, thus saving stack. */
1981
1982     if (*ecode == OP_KETRPOS)
1983       {
1984       md->end_match_ptr = eptr;
1985       md->end_offset_top = offset_top;
1986       RRETURN(MATCH_KETRPOS);
1987       }
1988
1989     /* The normal repeating kets try the rest of the pattern or restart from
1990     the preceding bracket, in the appropriate order. In the second case, we can
1991     use tail recursion to avoid using another stack frame, unless we have an
1992     an atomic group or an unlimited repeat of a group that can match an empty
1993     string. */
1994
1995     if (*ecode == OP_KETRMIN)
1996       {
1997       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
1998       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1999       if (*prev == OP_ONCE)
2000         {
2001         RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
2002         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2003         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
2004         RRETURN(MATCH_ONCE);
2005         }
2006       if (*prev >= OP_SBRA)    /* Could match an empty string */
2007         {
2008         RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
2009         RRETURN(rrc);
2010         }
2011       ecode = prev;
2012       goto TAIL_RECURSE;
2013       }
2014     else  /* OP_KETRMAX */
2015       {
2016       RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
2017       if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
2018       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2019       if (*prev == OP_ONCE)
2020         {
2021         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
2022         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2023         md->once_target = prev;
2024         RRETURN(MATCH_ONCE);
2025         }
2026       ecode += 1 + LINK_SIZE;
2027       goto TAIL_RECURSE;
2028       }
2029     /* Control never gets here */
2030
2031     /* Not multiline mode: start of subject assertion, unless notbol. */
2032
2033     case OP_CIRC:
2034     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2035
2036     /* Start of subject assertion */
2037
2038     case OP_SOD:
2039     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
2040     ecode++;
2041     break;
2042
2043     /* Multiline mode: start of subject unless notbol, or after any newline. */
2044
2045     case OP_CIRCM:
2046     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2047     if (eptr != md->start_subject &&
2048         (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
2049       RRETURN(MATCH_NOMATCH);
2050     ecode++;
2051     break;
2052
2053     /* Start of match assertion */
2054
2055     case OP_SOM:
2056     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
2057     ecode++;
2058     break;
2059
2060     /* Reset the start of match point */
2061
2062     case OP_SET_SOM:
2063     mstart = eptr;
2064     ecode++;
2065     break;
2066
2067     /* Multiline mode: assert before any newline, or before end of subject
2068     unless noteol is set. */
2069
2070     case OP_DOLLM:
2071     if (eptr < md->end_subject)
2072       {
2073       if (!IS_NEWLINE(eptr))
2074         {
2075         if (md->partial != 0 &&
2076             eptr + 1 >= md->end_subject &&
2077             NLBLOCK->nltype == NLTYPE_FIXED &&
2078             NLBLOCK->nllen == 2 &&
2079             *eptr == NLBLOCK->nl[0])
2080           {
2081           md->hitend = TRUE;
2082           if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2083           }
2084         RRETURN(MATCH_NOMATCH);
2085         }
2086       }
2087     else
2088       {
2089       if (md->noteol) RRETURN(MATCH_NOMATCH);
2090       SCHECK_PARTIAL();
2091       }
2092     ecode++;
2093     break;
2094
2095     /* Not multiline mode: assert before a terminating newline or before end of
2096     subject unless noteol is set. */
2097
2098     case OP_DOLL:
2099     if (md->noteol) RRETURN(MATCH_NOMATCH);
2100     if (!md->endonly) goto ASSERT_NL_OR_EOS;
2101
2102     /* ... else fall through for endonly */
2103
2104     /* End of subject assertion (\z) */
2105
2106     case OP_EOD:
2107     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
2108     SCHECK_PARTIAL();
2109     ecode++;
2110     break;
2111
2112     /* End of subject or ending \n assertion (\Z) */
2113
2114     case OP_EODN:
2115     ASSERT_NL_OR_EOS:
2116     if (eptr < md->end_subject &&
2117         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2118       {
2119       if (md->partial != 0 &&
2120           eptr + 1 >= md->end_subject &&
2121           NLBLOCK->nltype == NLTYPE_FIXED &&
2122           NLBLOCK->nllen == 2 &&
2123           *eptr == NLBLOCK->nl[0])
2124         {
2125         md->hitend = TRUE;
2126         if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2127         }
2128       RRETURN(MATCH_NOMATCH);
2129       }
2130
2131     /* Either at end of string or \n before end. */
2132
2133     SCHECK_PARTIAL();
2134     ecode++;
2135     break;
2136
2137     /* Word boundary assertions */
2138
2139     case OP_NOT_WORD_BOUNDARY:
2140     case OP_WORD_BOUNDARY:
2141       {
2142
2143       /* Find out if the previous and current characters are "word" characters.
2144       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
2145       be "non-word" characters. Remember the earliest consulted character for
2146       partial matching. */
2147
2148 #ifdef SUPPORT_UTF
2149       if (utf)
2150         {
2151         /* Get status of previous character */
2152
2153         if (eptr == md->start_subject) prev_is_word = FALSE; else
2154           {
2155           PCRE_PUCHAR lastptr = eptr - 1;
2156           BACKCHAR(lastptr);
2157           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2158           GETCHAR(c, lastptr);
2159 #ifdef SUPPORT_UCP
2160           if (md->use_ucp)
2161             {
2162             if (c == '_') prev_is_word = TRUE; else
2163               {
2164               int cat = UCD_CATEGORY(c);
2165               prev_is_word = (cat == ucp_L || cat == ucp_N);
2166               }
2167             }
2168           else
2169 #endif
2170           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2171           }
2172
2173         /* Get status of next character */
2174
2175         if (eptr >= md->end_subject)
2176           {
2177           SCHECK_PARTIAL();
2178           cur_is_word = FALSE;
2179           }
2180         else
2181           {
2182           GETCHAR(c, eptr);
2183 #ifdef SUPPORT_UCP
2184           if (md->use_ucp)
2185             {
2186             if (c == '_') cur_is_word = TRUE; else
2187               {
2188               int cat = UCD_CATEGORY(c);
2189               cur_is_word = (cat == ucp_L || cat == ucp_N);
2190               }
2191             }
2192           else
2193 #endif
2194           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2195           }
2196         }
2197       else
2198 #endif
2199
2200       /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
2201       consistency with the behaviour of \w we do use it in this case. */
2202
2203         {
2204         /* Get status of previous character */
2205
2206         if (eptr == md->start_subject) prev_is_word = FALSE; else
2207           {
2208           if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
2209 #ifdef SUPPORT_UCP
2210           if (md->use_ucp)
2211             {
2212             c = eptr[-1];
2213             if (c == '_') prev_is_word = TRUE; else
2214               {
2215               int cat = UCD_CATEGORY(c);
2216               prev_is_word = (cat == ucp_L || cat == ucp_N);
2217               }
2218             }
2219           else
2220 #endif
2221           prev_is_word = MAX_255(eptr[-1])
2222             && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2223           }
2224
2225         /* Get status of next character */
2226
2227         if (eptr >= md->end_subject)
2228           {
2229           SCHECK_PARTIAL();
2230           cur_is_word = FALSE;
2231           }
2232         else
2233 #ifdef SUPPORT_UCP
2234         if (md->use_ucp)
2235           {
2236           c = *eptr;
2237           if (c == '_') cur_is_word = TRUE; else
2238             {
2239             int cat = UCD_CATEGORY(c);
2240             cur_is_word = (cat == ucp_L || cat == ucp_N);
2241             }
2242           }
2243         else
2244 #endif
2245         cur_is_word = MAX_255(*eptr)
2246           && ((md->ctypes[*eptr] & ctype_word) != 0);
2247         }
2248
2249       /* Now see if the situation is what we want */
2250
2251       if ((*ecode++ == OP_WORD_BOUNDARY)?
2252            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2253         RRETURN(MATCH_NOMATCH);
2254       }
2255     break;
2256
2257     /* Match any single character type except newline; have to take care with
2258     CRLF newlines and partial matching. */
2259
2260     case OP_ANY:
2261     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2262     if (md->partial != 0 &&
2263         eptr + 1 >= md->end_subject &&
2264         NLBLOCK->nltype == NLTYPE_FIXED &&
2265         NLBLOCK->nllen == 2 &&
2266         *eptr == NLBLOCK->nl[0])
2267       {
2268       md->hitend = TRUE;
2269       if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2270       }
2271
2272     /* Fall through */
2273
2274     /* Match any single character whatsoever. */
2275
2276     case OP_ALLANY:
2277     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2278       {                            /* not be updated before SCHECK_PARTIAL. */
2279       SCHECK_PARTIAL();
2280       RRETURN(MATCH_NOMATCH);
2281       }
2282     eptr++;
2283 #ifdef SUPPORT_UTF
2284     if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2285 #endif
2286     ecode++;
2287     break;
2288
2289     /* Match a single byte, even in UTF-8 mode. This opcode really does match
2290     any byte, even newline, independent of the setting of PCRE_DOTALL. */
2291
2292     case OP_ANYBYTE:
2293     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2294       {                            /* not be updated before SCHECK_PARTIAL. */
2295       SCHECK_PARTIAL();
2296       RRETURN(MATCH_NOMATCH);
2297       }
2298     eptr++;
2299     ecode++;
2300     break;
2301
2302     case OP_NOT_DIGIT:
2303     if (eptr >= md->end_subject)
2304       {
2305       SCHECK_PARTIAL();
2306       RRETURN(MATCH_NOMATCH);
2307       }
2308     GETCHARINCTEST(c, eptr);
2309     if (
2310 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2311        c < 256 &&
2312 #endif
2313        (md->ctypes[c] & ctype_digit) != 0
2314        )
2315       RRETURN(MATCH_NOMATCH);
2316     ecode++;
2317     break;
2318
2319     case OP_DIGIT:
2320     if (eptr >= md->end_subject)
2321       {
2322       SCHECK_PARTIAL();
2323       RRETURN(MATCH_NOMATCH);
2324       }
2325     GETCHARINCTEST(c, eptr);
2326     if (
2327 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2328        c > 255 ||
2329 #endif
2330        (md->ctypes[c] & ctype_digit) == 0
2331        )
2332       RRETURN(MATCH_NOMATCH);
2333     ecode++;
2334     break;
2335
2336     case OP_NOT_WHITESPACE:
2337     if (eptr >= md->end_subject)
2338       {
2339       SCHECK_PARTIAL();
2340       RRETURN(MATCH_NOMATCH);
2341       }
2342     GETCHARINCTEST(c, eptr);
2343     if (
2344 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2345        c < 256 &&
2346 #endif
2347        (md->ctypes[c] & ctype_space) != 0
2348        )
2349       RRETURN(MATCH_NOMATCH);
2350     ecode++;
2351     break;
2352
2353     case OP_WHITESPACE:
2354     if (eptr >= md->end_subject)
2355       {
2356       SCHECK_PARTIAL();
2357       RRETURN(MATCH_NOMATCH);
2358       }
2359     GETCHARINCTEST(c, eptr);
2360     if (
2361 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2362        c > 255 ||
2363 #endif
2364        (md->ctypes[c] & ctype_space) == 0
2365        )
2366       RRETURN(MATCH_NOMATCH);
2367     ecode++;
2368     break;
2369
2370     case OP_NOT_WORDCHAR:
2371     if (eptr >= md->end_subject)
2372       {
2373       SCHECK_PARTIAL();
2374       RRETURN(MATCH_NOMATCH);
2375       }
2376     GETCHARINCTEST(c, eptr);
2377     if (
2378 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2379        c < 256 &&
2380 #endif
2381        (md->ctypes[c] & ctype_word) != 0
2382        )
2383       RRETURN(MATCH_NOMATCH);
2384     ecode++;
2385     break;
2386
2387     case OP_WORDCHAR:
2388     if (eptr >= md->end_subject)
2389       {
2390       SCHECK_PARTIAL();
2391       RRETURN(MATCH_NOMATCH);
2392       }
2393     GETCHARINCTEST(c, eptr);
2394     if (
2395 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2396        c > 255 ||
2397 #endif
2398        (md->ctypes[c] & ctype_word) == 0
2399        )
2400       RRETURN(MATCH_NOMATCH);
2401     ecode++;
2402     break;
2403
2404     case OP_ANYNL:
2405     if (eptr >= md->end_subject)
2406       {
2407       SCHECK_PARTIAL();
2408       RRETURN(MATCH_NOMATCH);
2409       }
2410     GETCHARINCTEST(c, eptr);
2411     switch(c)
2412       {
2413       default: RRETURN(MATCH_NOMATCH);
2414
2415       case 0x000d:
2416       if (eptr >= md->end_subject)
2417         {
2418         SCHECK_PARTIAL();
2419         }
2420       else if (*eptr == 0x0a) eptr++;
2421       break;
2422
2423       case 0x000a:
2424       break;
2425
2426       case 0x000b:
2427       case 0x000c:
2428       case 0x0085:
2429       case 0x2028:
2430       case 0x2029:
2431       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2432       break;
2433       }
2434     ecode++;
2435     break;
2436
2437     case OP_NOT_HSPACE:
2438     if (eptr >= md->end_subject)
2439       {
2440       SCHECK_PARTIAL();
2441       RRETURN(MATCH_NOMATCH);
2442       }
2443     GETCHARINCTEST(c, eptr);
2444     switch(c)
2445       {
2446       default: break;
2447       case 0x09:      /* HT */
2448       case 0x20:      /* SPACE */
2449       case 0xa0:      /* NBSP */
2450       case 0x1680:    /* OGHAM SPACE MARK */
2451       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2452       case 0x2000:    /* EN QUAD */
2453       case 0x2001:    /* EM QUAD */
2454       case 0x2002:    /* EN SPACE */
2455       case 0x2003:    /* EM SPACE */
2456       case 0x2004:    /* THREE-PER-EM SPACE */
2457       case 0x2005:    /* FOUR-PER-EM SPACE */
2458       case 0x2006:    /* SIX-PER-EM SPACE */
2459       case 0x2007:    /* FIGURE SPACE */
2460       case 0x2008:    /* PUNCTUATION SPACE */
2461       case 0x2009:    /* THIN SPACE */
2462       case 0x200A:    /* HAIR SPACE */
2463       case 0x202f:    /* NARROW NO-BREAK SPACE */
2464       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2465       case 0x3000:    /* IDEOGRAPHIC SPACE */
2466       RRETURN(MATCH_NOMATCH);
2467       }
2468     ecode++;
2469     break;
2470
2471     case OP_HSPACE:
2472     if (eptr >= md->end_subject)
2473       {
2474       SCHECK_PARTIAL();
2475       RRETURN(MATCH_NOMATCH);
2476       }
2477     GETCHARINCTEST(c, eptr);
2478     switch(c)
2479       {
2480       default: RRETURN(MATCH_NOMATCH);
2481       case 0x09:      /* HT */
2482       case 0x20:      /* SPACE */
2483       case 0xa0:      /* NBSP */
2484       case 0x1680:    /* OGHAM SPACE MARK */
2485       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2486       case 0x2000:    /* EN QUAD */
2487       case 0x2001:    /* EM QUAD */
2488       case 0x2002:    /* EN SPACE */
2489       case 0x2003:    /* EM SPACE */
2490       case 0x2004:    /* THREE-PER-EM SPACE */
2491       case 0x2005:    /* FOUR-PER-EM SPACE */
2492       case 0x2006:    /* SIX-PER-EM SPACE */
2493       case 0x2007:    /* FIGURE SPACE */
2494       case 0x2008:    /* PUNCTUATION SPACE */
2495       case 0x2009:    /* THIN SPACE */
2496       case 0x200A:    /* HAIR SPACE */
2497       case 0x202f:    /* NARROW NO-BREAK SPACE */
2498       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2499       case 0x3000:    /* IDEOGRAPHIC SPACE */
2500       break;
2501       }
2502     ecode++;
2503     break;
2504
2505     case OP_NOT_VSPACE:
2506     if (eptr >= md->end_subject)
2507       {
2508       SCHECK_PARTIAL();
2509       RRETURN(MATCH_NOMATCH);
2510       }
2511     GETCHARINCTEST(c, eptr);
2512     switch(c)
2513       {
2514       default: break;
2515       case 0x0a:      /* LF */
2516       case 0x0b:      /* VT */
2517       case 0x0c:      /* FF */
2518       case 0x0d:      /* CR */
2519       case 0x85:      /* NEL */
2520       case 0x2028:    /* LINE SEPARATOR */
2521       case 0x2029:    /* PARAGRAPH SEPARATOR */
2522       RRETURN(MATCH_NOMATCH);
2523       }
2524     ecode++;
2525     break;
2526
2527     case OP_VSPACE:
2528     if (eptr >= md->end_subject)
2529       {
2530       SCHECK_PARTIAL();
2531       RRETURN(MATCH_NOMATCH);
2532       }
2533     GETCHARINCTEST(c, eptr);
2534     switch(c)
2535       {
2536       default: RRETURN(MATCH_NOMATCH);
2537       case 0x0a:      /* LF */
2538       case 0x0b:      /* VT */
2539       case 0x0c:      /* FF */
2540       case 0x0d:      /* CR */
2541       case 0x85:      /* NEL */
2542       case 0x2028:    /* LINE SEPARATOR */
2543       case 0x2029:    /* PARAGRAPH SEPARATOR */
2544       break;
2545       }
2546     ecode++;
2547     break;
2548
2549 #ifdef SUPPORT_UCP
2550     /* Check the next character by Unicode property. We will get here only
2551     if the support is in the binary; otherwise a compile-time error occurs. */
2552
2553     case OP_PROP:
2554     case OP_NOTPROP:
2555     if (eptr >= md->end_subject)
2556       {
2557       SCHECK_PARTIAL();
2558       RRETURN(MATCH_NOMATCH);
2559       }
2560     GETCHARINCTEST(c, eptr);
2561       {
2562       const pcre_uint8 chartype = UCD_CHARTYPE(c);
2563
2564       switch(ecode[1])
2565         {
2566         case PT_ANY:
2567         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2568         break;
2569
2570         case PT_LAMP:
2571         if ((chartype == ucp_Lu ||
2572              chartype == ucp_Ll ||
2573              chartype == ucp_Lt) == (op == OP_NOTPROP))
2574           RRETURN(MATCH_NOMATCH);
2575         break;
2576
2577         case PT_GC:
2578         if ((ecode[2] != PRIV(ucp_gentype)[chartype]) == (op == OP_PROP))
2579           RRETURN(MATCH_NOMATCH);
2580         break;
2581
2582         case PT_PC:
2583         if ((ecode[2] != chartype) == (op == OP_PROP))
2584           RRETURN(MATCH_NOMATCH);
2585         break;
2586
2587         case PT_SC:
2588         if ((ecode[2] != UCD_SCRIPT(c)) == (op == OP_PROP))
2589           RRETURN(MATCH_NOMATCH);
2590         break;
2591
2592         /* These are specials */
2593
2594         case PT_ALNUM:
2595         if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
2596              PRIV(ucp_gentype)[chartype] == ucp_N) == (op == OP_NOTPROP))
2597           RRETURN(MATCH_NOMATCH);
2598         break;
2599
2600         case PT_SPACE:    /* Perl space */
2601         if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
2602              c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2603                == (op == OP_NOTPROP))
2604           RRETURN(MATCH_NOMATCH);
2605         break;
2606
2607         case PT_PXSPACE:  /* POSIX space */
2608         if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
2609              c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2610              c == CHAR_FF || c == CHAR_CR)
2611                == (op == OP_NOTPROP))
2612           RRETURN(MATCH_NOMATCH);
2613         break;
2614
2615         case PT_WORD:
2616         if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
2617              PRIV(ucp_gentype)[chartype] == ucp_N ||
2618              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2619           RRETURN(MATCH_NOMATCH);
2620         break;
2621
2622         /* This should never occur */
2623
2624         default:
2625         RRETURN(PCRE_ERROR_INTERNAL);
2626         }
2627
2628       ecode += 3;
2629       }
2630     break;
2631
2632     /* Match an extended Unicode sequence. We will get here only if the support
2633     is in the binary; otherwise a compile-time error occurs. */
2634
2635     case OP_EXTUNI:
2636     if (eptr >= md->end_subject)
2637       {
2638       SCHECK_PARTIAL();
2639       RRETURN(MATCH_NOMATCH);
2640       }
2641     GETCHARINCTEST(c, eptr);
2642     if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
2643     while (eptr < md->end_subject)
2644       {
2645       int len = 1;
2646       if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2647       if (UCD_CATEGORY(c) != ucp_M) break;
2648       eptr += len;
2649       }
2650     CHECK_PARTIAL();
2651     ecode++;
2652     break;
2653 #endif
2654
2655
2656     /* Match a back reference, possibly repeatedly. Look past the end of the
2657     item to see if there is repeat information following. The code is similar
2658     to that for character classes, but repeated for efficiency. Then obey
2659     similar code to character type repeats - written out again for speed.
2660     However, if the referenced string is the empty string, always treat
2661     it as matched, any number of times (otherwise there could be infinite
2662     loops). */
2663
2664     case OP_REF:
2665     case OP_REFI:
2666     caseless = op == OP_REFI;
2667     offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2668     ecode += 1 + IMM2_SIZE;
2669
2670     /* If the reference is unset, there are two possibilities:
2671
2672     (a) In the default, Perl-compatible state, set the length negative;
2673     this ensures that every attempt at a match fails. We can't just fail
2674     here, because of the possibility of quantifiers with zero minima.
2675
2676     (b) If the JavaScript compatibility flag is set, set the length to zero
2677     so that the back reference matches an empty string.
2678
2679     Otherwise, set the length to the length of what was matched by the
2680     referenced subpattern. */
2681
2682     if (offset >= offset_top || md->offset_vector[offset] < 0)
2683       length = (md->jscript_compat)? 0 : -1;
2684     else
2685       length = md->offset_vector[offset+1] - md->offset_vector[offset];
2686
2687     /* Set up for repetition, or handle the non-repeated case */
2688
2689     switch (*ecode)
2690       {
2691       case OP_CRSTAR:
2692       case OP_CRMINSTAR:
2693       case OP_CRPLUS:
2694       case OP_CRMINPLUS:
2695       case OP_CRQUERY:
2696       case OP_CRMINQUERY:
2697       c = *ecode++ - OP_CRSTAR;
2698       minimize = (c & 1) != 0;
2699       min = rep_min[c];                 /* Pick up values from tables; */
2700       max = rep_max[c];                 /* zero for max => infinity */
2701       if (max == 0) max = INT_MAX;
2702       break;
2703
2704       case OP_CRRANGE:
2705       case OP_CRMINRANGE:
2706       minimize = (*ecode == OP_CRMINRANGE);
2707       min = GET2(ecode, 1);
2708       max = GET2(ecode, 1 + IMM2_SIZE);
2709       if (max == 0) max = INT_MAX;
2710       ecode += 1 + 2 * IMM2_SIZE;
2711       break;
2712
2713       default:               /* No repeat follows */
2714       if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2715         {
2716         if (length == -2) eptr = md->end_subject;   /* Partial match */
2717         CHECK_PARTIAL();
2718         RRETURN(MATCH_NOMATCH);
2719         }
2720       eptr += length;
2721       continue;              /* With the main loop */
2722       }
2723
2724     /* Handle repeated back references. If the length of the reference is
2725     zero, just continue with the main loop. If the length is negative, it
2726     means the reference is unset in non-Java-compatible mode. If the minimum is
2727     zero, we can continue at the same level without recursion. For any other
2728     minimum, carrying on will result in NOMATCH. */
2729
2730     if (length == 0) continue;
2731     if (length < 0 && min == 0) continue;
2732
2733     /* First, ensure the minimum number of matches are present. We get back
2734     the length of the reference string explicitly rather than passing the
2735     address of eptr, so that eptr can be a register variable. */
2736
2737     for (i = 1; i <= min; i++)
2738       {
2739       int slength;
2740       if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2741         {
2742         if (slength == -2) eptr = md->end_subject;   /* Partial match */
2743         CHECK_PARTIAL();
2744         RRETURN(MATCH_NOMATCH);
2745         }
2746       eptr += slength;
2747       }
2748
2749     /* If min = max, continue at the same level without recursion.
2750     They are not both allowed to be zero. */
2751
2752     if (min == max) continue;
2753
2754     /* If minimizing, keep trying and advancing the pointer */
2755
2756     if (minimize)
2757       {
2758       for (fi = min;; fi++)
2759         {
2760         int slength;
2761         RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2762         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2763         if (fi >= max) RRETURN(MATCH_NOMATCH);
2764         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2765           {
2766           if (slength == -2) eptr = md->end_subject;   /* Partial match */
2767           CHECK_PARTIAL();
2768           RRETURN(MATCH_NOMATCH);
2769           }
2770         eptr += slength;
2771         }
2772       /* Control never gets here */
2773       }
2774
2775     /* If maximizing, find the longest string and work backwards */
2776
2777     else
2778       {
2779       pp = eptr;
2780       for (i = min; i < max; i++)
2781         {
2782         int slength;
2783         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2784           {
2785           /* Can't use CHECK_PARTIAL because we don't want to update eptr in
2786           the soft partial matching case. */
2787
2788           if (slength == -2 && md->partial != 0 &&
2789               md->end_subject > md->start_used_ptr)
2790             {
2791             md->hitend = TRUE;
2792             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2793             }
2794           break;
2795           }
2796         eptr += slength;
2797         }
2798
2799       while (eptr >= pp)
2800         {
2801         RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
2802         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2803         eptr -= length;
2804         }
2805       RRETURN(MATCH_NOMATCH);
2806       }
2807     /* Control never gets here */
2808
2809     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2810     used when all the characters in the class have values in the range 0-255,
2811     and either the matching is caseful, or the characters are in the range
2812     0-127 when UTF-8 processing is enabled. The only difference between
2813     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2814     encountered.
2815
2816     First, look past the end of the item to see if there is repeat information
2817     following. Then obey similar code to character type repeats - written out
2818     again for speed. */
2819
2820     case OP_NCLASS:
2821     case OP_CLASS:
2822       {
2823       /* The data variable is saved across frames, so the byte map needs to
2824       be stored there. */
2825 #define BYTE_MAP ((pcre_uint8 *)data)
2826       data = ecode + 1;                /* Save for matching */
2827       ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2828
2829       switch (*ecode)
2830         {
2831         case OP_CRSTAR:
2832         case OP_CRMINSTAR:
2833         case OP_CRPLUS:
2834         case OP_CRMINPLUS:
2835         case OP_CRQUERY:
2836         case OP_CRMINQUERY:
2837         c = *ecode++ - OP_CRSTAR;
2838         minimize = (c & 1) != 0;
2839         min = rep_min[c];                 /* Pick up values from tables; */
2840         max = rep_max[c];                 /* zero for max => infinity */
2841         if (max == 0) max = INT_MAX;
2842         break;
2843
2844         case OP_CRRANGE:
2845         case OP_CRMINRANGE:
2846         minimize = (*ecode == OP_CRMINRANGE);
2847         min = GET2(ecode, 1);
2848         max = GET2(ecode, 1 + IMM2_SIZE);
2849         if (max == 0) max = INT_MAX;
2850         ecode += 1 + 2 * IMM2_SIZE;
2851         break;
2852
2853         default:               /* No repeat follows */
2854         min = max = 1;
2855         break;
2856         }
2857
2858       /* First, ensure the minimum number of matches are present. */
2859
2860 #ifdef SUPPORT_UTF
2861       if (utf)
2862         {
2863         for (i = 1; i <= min; i++)
2864           {
2865           if (eptr >= md->end_subject)
2866             {
2867             SCHECK_PARTIAL();
2868             RRETURN(MATCH_NOMATCH);
2869             }
2870           GETCHARINC(c, eptr);
2871           if (c > 255)
2872             {
2873             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2874             }
2875           else
2876             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2877           }
2878         }
2879       else
2880 #endif
2881       /* Not UTF mode */
2882         {
2883         for (i = 1; i <= min; i++)
2884           {
2885           if (eptr >= md->end_subject)
2886             {
2887             SCHECK_PARTIAL();
2888             RRETURN(MATCH_NOMATCH);
2889             }
2890           c = *eptr++;
2891 #ifndef COMPILE_PCRE8
2892           if (c > 255)
2893             {
2894             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2895             }
2896           else
2897 #endif
2898             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2899           }
2900         }
2901
2902       /* If max == min we can continue with the main loop without the
2903       need to recurse. */
2904
2905       if (min == max) continue;
2906
2907       /* If minimizing, keep testing the rest of the expression and advancing
2908       the pointer while it matches the class. */
2909
2910       if (minimize)
2911         {
2912 #ifdef SUPPORT_UTF
2913         if (utf)
2914           {
2915           for (fi = min;; fi++)
2916             {
2917             RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
2918             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2919             if (fi >= max) RRETURN(MATCH_NOMATCH);
2920             if (eptr >= md->end_subject)
2921               {
2922               SCHECK_PARTIAL();
2923               RRETURN(MATCH_NOMATCH);
2924               }
2925             GETCHARINC(c, eptr);
2926             if (c > 255)
2927               {
2928               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2929               }
2930             else
2931               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2932             }
2933           }
2934         else
2935 #endif
2936         /* Not UTF mode */
2937           {
2938           for (fi = min;; fi++)
2939             {
2940             RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
2941             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2942             if (fi >= max) RRETURN(MATCH_NOMATCH);
2943             if (eptr >= md->end_subject)
2944               {
2945               SCHECK_PARTIAL();
2946               RRETURN(MATCH_NOMATCH);
2947               }
2948             c = *eptr++;
2949 #ifndef COMPILE_PCRE8
2950             if (c > 255)
2951               {
2952               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2953               }
2954             else
2955 #endif
2956               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2957             }
2958           }
2959         /* Control never gets here */
2960         }
2961
2962       /* If maximizing, find the longest possible run, then work backwards. */
2963
2964       else
2965         {
2966         pp = eptr;
2967
2968 #ifdef SUPPORT_UTF
2969         if (utf)
2970           {
2971           for (i = min; i < max; i++)
2972             {
2973             int len = 1;
2974             if (eptr >= md->end_subject)
2975               {
2976               SCHECK_PARTIAL();
2977               break;
2978               }
2979             GETCHARLEN(c, eptr, len);
2980             if (c > 255)
2981               {
2982               if (op == OP_CLASS) break;
2983               }
2984             else
2985               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
2986             eptr += len;
2987             }
2988           for (;;)
2989             {
2990             RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
2991             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2992             if (eptr-- == pp) break;        /* Stop if tried at original pos */
2993             BACKCHAR(eptr);
2994             }
2995           }
2996         else
2997 #endif
2998           /* Not UTF mode */
2999           {
3000           for (i = min; i < max; i++)
3001             {
3002             if (eptr >= md->end_subject)
3003               {
3004               SCHECK_PARTIAL();
3005               break;
3006               }
3007             c = *eptr;
3008 #ifndef COMPILE_PCRE8
3009             if (c > 255)
3010               {
3011               if (op == OP_CLASS) break;
3012               }
3013             else
3014 #endif
3015               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3016             eptr++;
3017             }
3018           while (eptr >= pp)
3019             {
3020             RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
3021             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3022             eptr--;
3023             }
3024           }
3025
3026         RRETURN(MATCH_NOMATCH);
3027         }
3028 #undef BYTE_MAP
3029       }
3030     /* Control never gets here */
3031
3032
3033     /* Match an extended character class. This opcode is encountered only
3034     when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
3035     mode, because Unicode properties are supported in non-UTF-8 mode. */
3036
3037 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3038     case OP_XCLASS:
3039       {
3040       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
3041       ecode += GET(ecode, 1);                      /* Advance past the item */
3042
3043       switch (*ecode)
3044         {
3045         case OP_CRSTAR:
3046         case OP_CRMINSTAR:
3047         case OP_CRPLUS:
3048         case OP_CRMINPLUS:
3049         case OP_CRQUERY:
3050         case OP_CRMINQUERY:
3051         c = *ecode++ - OP_CRSTAR;
3052         minimize = (c & 1) != 0;
3053         min = rep_min[c];                 /* Pick up values from tables; */
3054         max = rep_max[c];                 /* zero for max => infinity */
3055         if (max == 0) max = INT_MAX;
3056         break;
3057
3058         case OP_CRRANGE:
3059         case OP_CRMINRANGE:
3060         minimize = (*ecode == OP_CRMINRANGE);
3061         min = GET2(ecode, 1);
3062         max = GET2(ecode, 1 + IMM2_SIZE);
3063         if (max == 0) max = INT_MAX;
3064         ecode += 1 + 2 * IMM2_SIZE;
3065         break;
3066
3067         default:               /* No repeat follows */
3068         min = max = 1;
3069         break;
3070         }
3071
3072       /* First, ensure the minimum number of matches are present. */
3073
3074       for (i = 1; i <= min; i++)
3075         {
3076         if (eptr >= md->end_subject)
3077           {
3078           SCHECK_PARTIAL();
3079           RRETURN(MATCH_NOMATCH);
3080           }
3081         GETCHARINCTEST(c, eptr);
3082         if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3083         }
3084
3085       /* If max == min we can continue with the main loop without the
3086       need to recurse. */
3087
3088       if (min == max) continue;
3089
3090       /* If minimizing, keep testing the rest of the expression and advancing
3091       the pointer while it matches the class. */
3092
3093       if (minimize)
3094         {
3095         for (fi = min;; fi++)
3096           {
3097           RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
3098           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3099           if (fi >= max) RRETURN(MATCH_NOMATCH);
3100           if (eptr >= md->end_subject)
3101             {
3102             SCHECK_PARTIAL();
3103             RRETURN(MATCH_NOMATCH);
3104             }
3105           GETCHARINCTEST(c, eptr);
3106           if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3107           }
3108         /* Control never gets here */
3109         }
3110
3111       /* If maximizing, find the longest possible run, then work backwards. */
3112
3113       else
3114         {
3115         pp = eptr;
3116         for (i = min; i < max; i++)
3117           {
3118           int len = 1;
3119           if (eptr >= md->end_subject)
3120             {
3121             SCHECK_PARTIAL();
3122             break;
3123             }
3124 #ifdef SUPPORT_UTF
3125           GETCHARLENTEST(c, eptr, len);
3126 #else
3127           c = *eptr;
3128 #endif
3129           if (!PRIV(xclass)(c, data, utf)) break;
3130           eptr += len;
3131           }
3132         for(;;)
3133           {
3134           RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3135           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3136           if (eptr-- == pp) break;        /* Stop if tried at original pos */
3137 #ifdef SUPPORT_UTF
3138           if (utf) BACKCHAR(eptr);
3139 #endif
3140           }
3141         RRETURN(MATCH_NOMATCH);
3142         }
3143
3144       /* Control never gets here */
3145       }
3146 #endif    /* End of XCLASS */
3147
3148     /* Match a single character, casefully */
3149
3150     case OP_CHAR:
3151 #ifdef SUPPORT_UTF
3152     if (utf)
3153       {
3154       length = 1;
3155       ecode++;
3156       GETCHARLEN(fc, ecode, length);
3157       if (length > md->end_subject - eptr)
3158         {
3159         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
3160         RRETURN(MATCH_NOMATCH);
3161         }
3162       while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
3163       }
3164     else
3165 #endif
3166     /* Not UTF mode */
3167       {
3168       if (md->end_subject - eptr < 1)
3169         {
3170         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
3171         RRETURN(MATCH_NOMATCH);
3172         }
3173       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
3174       ecode += 2;
3175       }
3176     break;
3177
3178     /* Match a single character, caselessly. If we are at the end of the
3179     subject, give up immediately. */
3180
3181     case OP_CHARI:
3182     if (eptr >= md->end_subject)
3183       {
3184       SCHECK_PARTIAL();
3185       RRETURN(MATCH_NOMATCH);
3186       }
3187
3188 #ifdef SUPPORT_UTF
3189     if (utf)
3190       {
3191       length = 1;
3192       ecode++;
3193       GETCHARLEN(fc, ecode, length);
3194
3195       /* If the pattern character's value is < 128, we have only one byte, and
3196       we know that its other case must also be one byte long, so we can use the
3197       fast lookup table. We know that there is at least one byte left in the
3198       subject. */
3199
3200       if (fc < 128)
3201         {
3202         if (md->lcc[fc]
3203             != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3204         ecode++;
3205         eptr++;
3206         }
3207
3208       /* Otherwise we must pick up the subject character. Note that we cannot
3209       use the value of "length" to check for sufficient bytes left, because the
3210       other case of the character may have more or fewer bytes.  */
3211
3212       else
3213         {
3214         unsigned int dc;
3215         GETCHARINC(dc, eptr);
3216         ecode += length;
3217
3218         /* If we have Unicode property support, we can use it to test the other
3219         case of the character, if there is one. */
3220
3221         if (fc != dc)
3222           {
3223 #ifdef SUPPORT_UCP
3224           if (dc != UCD_OTHERCASE(fc))
3225 #endif
3226             RRETURN(MATCH_NOMATCH);
3227           }
3228         }
3229       }
3230     else
3231 #endif   /* SUPPORT_UTF */
3232
3233     /* Not UTF mode */
3234       {
3235       if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3236           != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3237       eptr++;
3238       ecode += 2;
3239       }
3240     break;
3241
3242     /* Match a single character repeatedly. */
3243
3244     case OP_EXACT:
3245     case OP_EXACTI:
3246     min = max = GET2(ecode, 1);
3247     ecode += 1 + IMM2_SIZE;
3248     goto REPEATCHAR;
3249
3250     case OP_POSUPTO:
3251     case OP_POSUPTOI:
3252     possessive = TRUE;
3253     /* Fall through */
3254
3255     case OP_UPTO:
3256     case OP_UPTOI:
3257     case OP_MINUPTO:
3258     case OP_MINUPTOI:
3259     min = 0;
3260     max = GET2(ecode, 1);
3261     minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3262     ecode += 1 + IMM2_SIZE;
3263     goto REPEATCHAR;
3264
3265     case OP_POSSTAR:
3266     case OP_POSSTARI:
3267     possessive = TRUE;
3268     min = 0;
3269     max = INT_MAX;
3270     ecode++;
3271     goto REPEATCHAR;
3272
3273     case OP_POSPLUS:
3274     case OP_POSPLUSI:
3275     possessive = TRUE;
3276     min = 1;
3277     max = INT_MAX;
3278     ecode++;
3279     goto REPEATCHAR;
3280
3281     case OP_POSQUERY:
3282     case OP_POSQUERYI:
3283     possessive = TRUE;
3284     min = 0;
3285     max = 1;
3286     ecode++;
3287     goto REPEATCHAR;
3288
3289     case OP_STAR:
3290     case OP_STARI:
3291     case OP_MINSTAR:
3292     case OP_MINSTARI:
3293     case OP_PLUS:
3294     case OP_PLUSI:
3295     case OP_MINPLUS:
3296     case OP_MINPLUSI:
3297     case OP_QUERY:
3298     case OP_QUERYI:
3299     case OP_MINQUERY:
3300     case OP_MINQUERYI:
3301     c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
3302     minimize = (c & 1) != 0;
3303     min = rep_min[c];                 /* Pick up values from tables; */
3304     max = rep_max[c];                 /* zero for max => infinity */
3305     if (max == 0) max = INT_MAX;
3306
3307     /* Common code for all repeated single-character matches. */
3308
3309     REPEATCHAR:
3310 #ifdef SUPPORT_UTF
3311     if (utf)
3312       {
3313       length = 1;
3314       charptr = ecode;
3315       GETCHARLEN(fc, ecode, length);
3316       ecode += length;
3317
3318       /* Handle multibyte character matching specially here. There is
3319       support for caseless matching if UCP support is present. */
3320
3321       if (length > 1)
3322         {
3323 #ifdef SUPPORT_UCP
3324         unsigned int othercase;
3325         if (op >= OP_STARI &&     /* Caseless */
3326             (othercase = UCD_OTHERCASE(fc)) != fc)
3327           oclength = PRIV(ord2utf)(othercase, occhars);
3328         else oclength = 0;
3329 #endif  /* SUPPORT_UCP */
3330
3331         for (i = 1; i <= min; i++)
3332           {
3333           if (eptr <= md->end_subject - length &&
3334             memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3335 #ifdef SUPPORT_UCP
3336           else if (oclength > 0 &&
3337                    eptr <= md->end_subject - oclength &&
3338                    memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3339 #endif  /* SUPPORT_UCP */
3340           else
3341             {
3342             CHECK_PARTIAL();
3343             RRETURN(MATCH_NOMATCH);
3344             }
3345           }
3346
3347         if (min == max) continue;
3348
3349         if (minimize)
3350           {
3351           for (fi = min;; fi++)
3352             {
3353             RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3354             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3355             if (fi >= max) RRETURN(MATCH_NOMATCH);
3356             if (eptr <= md->end_subject - length &&
3357               memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3358 #ifdef SUPPORT_UCP
3359             else if (oclength > 0 &&
3360                      eptr <= md->end_subject - oclength &&
3361                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3362 #endif  /* SUPPORT_UCP */
3363             else
3364               {
3365               CHECK_PARTIAL();
3366               RRETURN(MATCH_NOMATCH);
3367               }
3368             }
3369           /* Control never gets here */
3370           }
3371
3372         else  /* Maximize */
3373           {
3374           pp = eptr;
3375           for (i = min; i < max; i++)
3376             {
3377             if (eptr <= md->end_subject - length &&
3378                 memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3379 #ifdef SUPPORT_UCP
3380             else if (oclength > 0 &&
3381                      eptr <= md->end_subject - oclength &&
3382                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3383 #endif  /* SUPPORT_UCP */
3384             else
3385               {
3386               CHECK_PARTIAL();
3387               break;
3388               }
3389             }
3390
3391           if (possessive) continue;
3392
3393           for(;;)
3394             {
3395             RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3396             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3397             if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
3398 #ifdef SUPPORT_UCP
3399             eptr--;
3400             BACKCHAR(eptr);
3401 #else   /* without SUPPORT_UCP */
3402             eptr -= length;
3403 #endif  /* SUPPORT_UCP */
3404             }
3405           }
3406         /* Control never gets here */
3407         }
3408
3409       /* If the length of a UTF-8 character is 1, we fall through here, and
3410       obey the code as for non-UTF-8 characters below, though in this case the
3411       value of fc will always be < 128. */
3412       }
3413     else
3414 #endif  /* SUPPORT_UTF */
3415       /* When not in UTF-8 mode, load a single-byte character. */
3416       fc = *ecode++;
3417
3418     /* The value of fc at this point is always one character, though we may
3419     or may not be in UTF mode. The code is duplicated for the caseless and
3420     caseful cases, for speed, since matching characters is likely to be quite
3421     common. First, ensure the minimum number of matches are present. If min =
3422     max, continue at the same level without recursing. Otherwise, if
3423     minimizing, keep trying the rest of the expression and advancing one
3424     matching character if failing, up to the maximum. Alternatively, if
3425     maximizing, find the maximum number of characters and work backwards. */
3426
3427     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3428       max, (char *)eptr));
3429
3430     if (op >= OP_STARI)  /* Caseless */
3431       {
3432 #ifdef COMPILE_PCRE8
3433       /* fc must be < 128 if UTF is enabled. */
3434       foc = md->fcc[fc];
3435 #else
3436 #ifdef SUPPORT_UTF
3437 #ifdef SUPPORT_UCP
3438       if (utf && fc > 127)
3439         foc = UCD_OTHERCASE(fc);
3440 #else
3441       if (utf && fc > 127)
3442         foc = fc;
3443 #endif /* SUPPORT_UCP */
3444       else
3445 #endif /* SUPPORT_UTF */
3446         foc = TABLE_GET(fc, md->fcc, fc);
3447 #endif /* COMPILE_PCRE8 */
3448
3449       for (i = 1; i <= min; i++)
3450         {
3451         if (eptr >= md->end_subject)
3452           {
3453           SCHECK_PARTIAL();
3454           RRETURN(MATCH_NOMATCH);
3455           }
3456         if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3457         eptr++;
3458         }
3459       if (min == max) continue;
3460       if (minimize)
3461         {
3462         for (fi = min;; fi++)
3463           {
3464           RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3465           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3466           if (fi >= max) RRETURN(MATCH_NOMATCH);
3467           if (eptr >= md->end_subject)
3468             {
3469             SCHECK_PARTIAL();
3470             RRETURN(MATCH_NOMATCH);
3471             }
3472           if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3473           eptr++;
3474           }
3475         /* Control never gets here */
3476         }
3477       else  /* Maximize */
3478         {
3479         pp = eptr;
3480         for (i = min; i < max; i++)
3481           {
3482           if (eptr >= md->end_subject)
3483             {
3484             SCHECK_PARTIAL();
3485             break;
3486             }
3487           if (fc != *eptr && foc != *eptr) break;
3488           eptr++;
3489           }
3490
3491         if (possessive) continue;
3492
3493         while (eptr >= pp)
3494           {
3495           RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
3496           eptr--;
3497           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3498           }
3499         RRETURN(MATCH_NOMATCH);
3500         }
3501       /* Control never gets here */
3502       }
3503
3504     /* Caseful comparisons (includes all multi-byte characters) */
3505
3506     else
3507       {
3508       for (i = 1; i <= min; i++)
3509         {
3510         if (eptr >= md->end_subject)
3511           {
3512           SCHECK_PARTIAL();
3513           RRETURN(MATCH_NOMATCH);
3514           }
3515         if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
3516         }
3517
3518       if (min == max) continue;
3519
3520       if (minimize)
3521         {
3522         for (fi = min;; fi++)
3523           {
3524           RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3525           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3526           if (fi >= max) RRETURN(MATCH_NOMATCH);
3527           if (eptr >= md->end_subject)
3528             {
3529             SCHECK_PARTIAL();
3530             RRETURN(MATCH_NOMATCH);
3531             }
3532           if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
3533           }
3534         /* Control never gets here */
3535         }
3536       else  /* Maximize */
3537         {
3538         pp = eptr;
3539         for (i = min; i < max; i++)
3540           {
3541           if (eptr >= md->end_subject)
3542             {
3543             SCHECK_PARTIAL();
3544             break;
3545             }
3546           if (fc != *eptr) break;
3547           eptr++;
3548           }
3549         if (possessive) continue;
3550
3551         while (eptr >= pp)
3552           {
3553           RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
3554           eptr--;
3555           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3556           }
3557         RRETURN(MATCH_NOMATCH);
3558         }
3559       }
3560     /* Control never gets here */
3561
3562     /* Match a negated single one-byte character. The character we are
3563     checking can be multibyte. */
3564
3565     case OP_NOT:
3566     case OP_NOTI:
3567     if (eptr >= md->end_subject)
3568       {
3569       SCHECK_PARTIAL();
3570       RRETURN(MATCH_NOMATCH);
3571       }
3572 #ifdef SUPPORT_UTF
3573     if (utf)
3574       {
3575       unsigned int ch, och;
3576
3577       ecode++;
3578       GETCHARINC(ch, ecode);
3579       GETCHARINC(c, eptr);
3580
3581       if (op == OP_NOT)
3582         {
3583         if (ch == c) RRETURN(MATCH_NOMATCH);
3584         }
3585       else
3586         {
3587 #ifdef SUPPORT_UCP
3588         if (ch > 127)
3589           och = UCD_OTHERCASE(ch);
3590 #else
3591         if (ch > 127)
3592           och = ch;
3593 #endif /* SUPPORT_UCP */
3594         else
3595           och = TABLE_GET(ch, md->fcc, ch);
3596         if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3597         }
3598       }
3599     else
3600 #endif
3601       {
3602       unsigned int ch = ecode[1];
3603       c = *eptr++;
3604       if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
3605         RRETURN(MATCH_NOMATCH);
3606       ecode += 2;
3607       }
3608     break;
3609
3610     /* Match a negated single one-byte character repeatedly. This is almost a
3611     repeat of the code for a repeated single character, but I haven't found a
3612     nice way of commoning these up that doesn't require a test of the
3613     positive/negative option for each character match. Maybe that wouldn't add
3614     very much to the time taken, but character matching *is* what this is all
3615     about... */
3616
3617     case OP_NOTEXACT:
3618     case OP_NOTEXACTI:
3619     min = max = GET2(ecode, 1);
3620     ecode += 1 + IMM2_SIZE;
3621     goto REPEATNOTCHAR;
3622
3623     case OP_NOTUPTO:
3624     case OP_NOTUPTOI:
3625     case OP_NOTMINUPTO:
3626     case OP_NOTMINUPTOI:
3627     min = 0;
3628     max = GET2(ecode, 1);
3629     minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3630     ecode += 1 + IMM2_SIZE;
3631     goto REPEATNOTCHAR;
3632
3633     case OP_NOTPOSSTAR:
3634     case OP_NOTPOSSTARI:
3635     possessive = TRUE;
3636     min = 0;
3637     max = INT_MAX;
3638     ecode++;
3639     goto REPEATNOTCHAR;
3640
3641     case OP_NOTPOSPLUS:
3642     case OP_NOTPOSPLUSI:
3643     possessive = TRUE;
3644     min = 1;
3645     max = INT_MAX;
3646     ecode++;
3647     goto REPEATNOTCHAR;
3648
3649     case OP_NOTPOSQUERY:
3650     case OP_NOTPOSQUERYI:
3651     possessive = TRUE;
3652     min = 0;
3653     max = 1;
3654     ecode++;
3655     goto REPEATNOTCHAR;
3656
3657     case OP_NOTPOSUPTO:
3658     case OP_NOTPOSUPTOI:
3659     possessive = TRUE;
3660     min = 0;
3661     max = GET2(ecode, 1);
3662     ecode += 1 + IMM2_SIZE;
3663     goto REPEATNOTCHAR;
3664
3665     case OP_NOTSTAR:
3666     case OP_NOTSTARI:
3667     case OP_NOTMINSTAR:
3668     case OP_NOTMINSTARI:
3669     case OP_NOTPLUS:
3670     case OP_NOTPLUSI:
3671     case OP_NOTMINPLUS:
3672     case OP_NOTMINPLUSI:
3673     case OP_NOTQUERY:
3674     case OP_NOTQUERYI:
3675     case OP_NOTMINQUERY:
3676     case OP_NOTMINQUERYI:
3677     c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
3678     minimize = (c & 1) != 0;
3679     min = rep_min[c];                 /* Pick up values from tables; */
3680     max = rep_max[c];                 /* zero for max => infinity */
3681     if (max == 0) max = INT_MAX;
3682
3683     /* Common code for all repeated single-byte matches. */
3684
3685     REPEATNOTCHAR:
3686     GETCHARINCTEST(fc, ecode);
3687
3688     /* The code is duplicated for the caseless and caseful cases, for speed,
3689     since matching characters is likely to be quite common. First, ensure the
3690     minimum number of matches are present. If min = max, continue at the same
3691     level without recursing. Otherwise, if minimizing, keep trying the rest of
3692     the expression and advancing one matching character if failing, up to the
3693     maximum. Alternatively, if maximizing, find the maximum number of
3694     characters and work backwards. */
3695
3696     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3697       max, (char *)eptr));
3698
3699     if (op >= OP_NOTSTARI)     /* Caseless */
3700       {
3701 #ifdef SUPPORT_UTF
3702 #ifdef SUPPORT_UCP
3703       if (utf && fc > 127)
3704         foc = UCD_OTHERCASE(fc);
3705 #else
3706       if (utf && fc > 127)
3707         foc = fc;
3708 #endif /* SUPPORT_UCP */
3709       else
3710 #endif /* SUPPORT_UTF */
3711         foc = TABLE_GET(fc, md->fcc, fc);
3712
3713 #ifdef SUPPORT_UTF
3714       if (utf)
3715         {
3716         unsigned int d;
3717         for (i = 1; i <= min; i++)
3718           {
3719           if (eptr >= md->end_subject)
3720             {
3721             SCHECK_PARTIAL();
3722             RRETURN(MATCH_NOMATCH);
3723             }
3724           GETCHARINC(d, eptr);
3725           if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3726           }
3727         }
3728       else
3729 #endif
3730       /* Not UTF mode */
3731         {
3732         for (i = 1; i <= min; i++)
3733           {
3734           if (eptr >= md->end_subject)
3735             {
3736             SCHECK_PARTIAL();
3737             RRETURN(MATCH_NOMATCH);
3738             }
3739           if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3740           eptr++;
3741           }
3742         }
3743
3744       if (min == max) continue;
3745
3746       if (minimize)
3747         {
3748 #ifdef SUPPORT_UTF
3749         if (utf)
3750           {
3751           unsigned int d;
3752           for (fi = min;; fi++)
3753             {
3754             RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3755             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3756             if (fi >= max) RRETURN(MATCH_NOMATCH);
3757             if (eptr >= md->end_subject)
3758               {
3759               SCHECK_PARTIAL();
3760               RRETURN(MATCH_NOMATCH);
3761               }
3762             GETCHARINC(d, eptr);
3763             if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3764             }
3765           }
3766         else
3767 #endif
3768         /* Not UTF mode */
3769           {
3770           for (fi = min;; fi++)
3771             {
3772             RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3773             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3774             if (fi >= max) RRETURN(MATCH_NOMATCH);
3775             if (eptr >= md->end_subject)
3776               {
3777               SCHECK_PARTIAL();
3778               RRETURN(MATCH_NOMATCH);
3779               }
3780             if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3781             eptr++;
3782             }
3783           }
3784         /* Control never gets here */
3785         }
3786
3787       /* Maximize case */
3788
3789       else
3790         {
3791         pp = eptr;
3792
3793 #ifdef SUPPORT_UTF
3794         if (utf)
3795           {
3796           unsigned int d;
3797           for (i = min; i < max; i++)
3798             {
3799             int len = 1;
3800             if (eptr >= md->end_subject)
3801               {
3802               SCHECK_PARTIAL();
3803               break;
3804               }
3805             GETCHARLEN(d, eptr, len);
3806             if (fc == d || (unsigned int)foc == d) break;
3807             eptr += len;
3808             }
3809           if (possessive) continue;
3810           for(;;)
3811             {
3812             RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3813             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3814             if (eptr-- == pp) break;        /* Stop if tried at original pos */
3815             BACKCHAR(eptr);
3816             }
3817           }
3818         else
3819 #endif
3820         /* Not UTF mode */
3821           {
3822           for (i = min; i < max; i++)
3823             {
3824             if (eptr >= md->end_subject)
3825               {
3826               SCHECK_PARTIAL();
3827               break;
3828               }
3829             if (fc == *eptr || foc == *eptr) break;
3830             eptr++;
3831             }
3832           if (possessive) continue;
3833           while (eptr >= pp)
3834             {
3835             RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
3836             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3837             eptr--;
3838             }
3839           }
3840
3841         RRETURN(MATCH_NOMATCH);
3842         }
3843       /* Control never gets here */
3844       }
3845
3846     /* Caseful comparisons */
3847
3848     else
3849       {
3850 #ifdef SUPPORT_UTF
3851       if (utf)
3852         {
3853         unsigned int d;
3854         for (i = 1; i <= min; i++)
3855           {
3856           if (eptr >= md->end_subject)
3857             {
3858             SCHECK_PARTIAL();
3859             RRETURN(MATCH_NOMATCH);
3860             }
3861           GETCHARINC(d, eptr);
3862           if (fc == d) RRETURN(MATCH_NOMATCH);
3863           }
3864         }
3865       else
3866 #endif
3867       /* Not UTF mode */
3868         {
3869         for (i = 1; i <= min; i++)
3870           {
3871           if (eptr >= md->end_subject)
3872             {
3873             SCHECK_PARTIAL();
3874             RRETURN(MATCH_NOMATCH);
3875             }
3876           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3877           }
3878         }
3879
3880       if (min == max) continue;
3881
3882       if (minimize)
3883         {
3884 #ifdef SUPPORT_UTF
3885         if (utf)
3886           {
3887           unsigned int d;
3888           for (fi = min;; fi++)
3889             {
3890             RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
3891             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3892             if (fi >= max) RRETURN(MATCH_NOMATCH);
3893             if (eptr >= md->end_subject)
3894               {
3895               SCHECK_PARTIAL();
3896               RRETURN(MATCH_NOMATCH);
3897               }
3898             GETCHARINC(d, eptr);
3899             if (fc == d) RRETURN(MATCH_NOMATCH);
3900             }
3901           }
3902         else
3903 #endif
3904         /* Not UTF mode */
3905           {
3906           for (fi = min;; fi++)
3907             {
3908             RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
3909             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3910             if (fi >= max) RRETURN(MATCH_NOMATCH);
3911             if (eptr >= md->end_subject)
3912               {
3913               SCHECK_PARTIAL();
3914               RRETURN(MATCH_NOMATCH);
3915               }
3916             if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3917             }
3918           }
3919         /* Control never gets here */
3920         }
3921
3922       /* Maximize case */
3923
3924       else
3925         {
3926         pp = eptr;
3927
3928 #ifdef SUPPORT_UTF
3929         if (utf)
3930           {
3931           unsigned int d;
3932           for (i = min; i < max; i++)
3933             {
3934             int len = 1;
3935             if (eptr >= md->end_subject)
3936               {
3937               SCHECK_PARTIAL();
3938               break;
3939               }
3940             GETCHARLEN(d, eptr, len);
3941             if (fc == d) break;
3942             eptr += len;
3943             }
3944           if (possessive) continue;
3945           for(;;)
3946             {
3947             RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
3948             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3949             if (eptr-- == pp) break;        /* Stop if tried at original pos */
3950             BACKCHAR(eptr);
3951             }
3952           }
3953         else
3954 #endif
3955         /* Not UTF mode */
3956           {
3957           for (i = min; i < max; i++)
3958             {
3959             if (eptr >= md->end_subject)
3960               {
3961               SCHECK_PARTIAL();
3962               break;
3963               }
3964             if (fc == *eptr) break;
3965             eptr++;
3966             }
3967           if (possessive) continue;
3968           while (eptr >= pp)
3969             {
3970             RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
3971             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3972             eptr--;
3973             }
3974           }
3975
3976         RRETURN(MATCH_NOMATCH);
3977         }
3978       }
3979     /* Control never gets here */
3980
3981     /* Match a single character type repeatedly; several different opcodes
3982     share code. This is very similar to the code for single characters, but we
3983     repeat it in the interests of efficiency. */
3984
3985     case OP_TYPEEXACT:
3986     min = max = GET2(ecode, 1);
3987     minimize = TRUE;
3988     ecode += 1 + IMM2_SIZE;
3989     goto REPEATTYPE;
3990
3991     case OP_TYPEUPTO:
3992     case OP_TYPEMINUPTO:
3993     min = 0;
3994     max = GET2(ecode, 1);
3995     minimize = *ecode == OP_TYPEMINUPTO;
3996     ecode += 1 + IMM2_SIZE;
3997     goto REPEATTYPE;
3998
3999     case OP_TYPEPOSSTAR:
4000     possessive = TRUE;
4001     min = 0;
4002     max = INT_MAX;
4003     ecode++;
4004     goto REPEATTYPE;
4005
4006     case OP_TYPEPOSPLUS:
4007     possessive = TRUE;
4008     min = 1;
4009     max = INT_MAX;
4010     ecode++;
4011     goto REPEATTYPE;
4012
4013     case OP_TYPEPOSQUERY:
4014     possessive = TRUE;
4015     min = 0;
4016     max = 1;
4017     ecode++;
4018     goto REPEATTYPE;
4019
4020     case OP_TYPEPOSUPTO:
4021     possessive = TRUE;
4022     min = 0;
4023     max = GET2(ecode, 1);
4024     ecode += 1 + IMM2_SIZE;
4025     goto REPEATTYPE;
4026
4027     case OP_TYPESTAR:
4028     case OP_TYPEMINSTAR:
4029     case OP_TYPEPLUS:
4030     case OP_TYPEMINPLUS:
4031     case OP_TYPEQUERY:
4032     case OP_TYPEMINQUERY:
4033     c = *ecode++ - OP_TYPESTAR;
4034     minimize = (c & 1) != 0;
4035     min = rep_min[c];                 /* Pick up values from tables; */
4036     max = rep_max[c];                 /* zero for max => infinity */
4037     if (max == 0) max = INT_MAX;
4038
4039     /* Common code for all repeated single character type matches. Note that
4040     in UTF-8 mode, '.' matches a character of any length, but for the other
4041     character types, the valid characters are all one-byte long. */
4042
4043     REPEATTYPE:
4044     ctype = *ecode++;      /* Code for the character type */
4045
4046 #ifdef SUPPORT_UCP
4047     if (ctype == OP_PROP || ctype == OP_NOTPROP)
4048       {
4049       prop_fail_result = ctype == OP_NOTPROP;
4050       prop_type = *ecode++;
4051       prop_value = *ecode++;
4052       }
4053     else prop_type = -1;
4054 #endif
4055
4056     /* First, ensure the minimum number of matches are present. Use inline
4057     code for maximizing the speed, and do the type test once at the start
4058     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
4059     is tidier. Also separate the UCP code, which can be the same for both UTF-8
4060     and single-bytes. */
4061
4062     if (min > 0)
4063       {
4064 #ifdef SUPPORT_UCP
4065       if (prop_type >= 0)
4066         {
4067         switch(prop_type)
4068           {
4069           case PT_ANY:
4070           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4071           for (i = 1; i <= min; i++)
4072             {
4073             if (eptr >= md->end_subject)
4074               {
4075               SCHECK_PARTIAL();
4076               RRETURN(MATCH_NOMATCH);
4077               }
4078             GETCHARINCTEST(c, eptr);
4079             }
4080           break;
4081
4082           case PT_LAMP:
4083           for (i = 1; i <= min; i++)
4084             {
4085             int chartype;
4086             if (eptr >= md->end_subject)
4087               {
4088               SCHECK_PARTIAL();
4089               RRETURN(MATCH_NOMATCH);
4090               }
4091             GETCHARINCTEST(c, eptr);
4092             chartype = UCD_CHARTYPE(c);
4093             if ((chartype == ucp_Lu ||
4094                  chartype == ucp_Ll ||
4095                  chartype == ucp_Lt) == prop_fail_result)
4096               RRETURN(MATCH_NOMATCH);
4097             }
4098           break;
4099
4100           case PT_GC:
4101           for (i = 1; i <= min; i++)
4102             {
4103             if (eptr >= md->end_subject)
4104               {
4105               SCHECK_PARTIAL();
4106               RRETURN(MATCH_NOMATCH);
4107               }
4108             GETCHARINCTEST(c, eptr);
4109             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4110               RRETURN(MATCH_NOMATCH);
4111             }
4112           break;
4113
4114           case PT_PC:
4115           for (i = 1; i <= min; i++)
4116             {
4117             if (eptr >= md->end_subject)
4118               {
4119               SCHECK_PARTIAL();
4120               RRETURN(MATCH_NOMATCH);
4121               }
4122             GETCHARINCTEST(c, eptr);
4123             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4124               RRETURN(MATCH_NOMATCH);
4125             }
4126           break;
4127
4128           case PT_SC:
4129           for (i = 1; i <= min; i++)
4130             {
4131             if (eptr >= md->end_subject)
4132               {
4133               SCHECK_PARTIAL();
4134               RRETURN(MATCH_NOMATCH);
4135               }
4136             GETCHARINCTEST(c, eptr);
4137             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4138               RRETURN(MATCH_NOMATCH);
4139             }
4140           break;
4141
4142           case PT_ALNUM:
4143           for (i = 1; i <= min; i++)
4144             {
4145             int category;
4146             if (eptr >= md->end_subject)
4147               {
4148               SCHECK_PARTIAL();
4149               RRETURN(MATCH_NOMATCH);
4150               }
4151             GETCHARINCTEST(c, eptr);
4152             category = UCD_CATEGORY(c);
4153             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4154               RRETURN(MATCH_NOMATCH);
4155             }
4156           break;
4157
4158           case PT_SPACE:    /* Perl space */
4159           for (i = 1; i <= min; i++)
4160             {
4161             if (eptr >= md->end_subject)
4162               {
4163               SCHECK_PARTIAL();
4164               RRETURN(MATCH_NOMATCH);
4165               }
4166             GETCHARINCTEST(c, eptr);
4167             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4168                  c == CHAR_FF || c == CHAR_CR)
4169                    == prop_fail_result)
4170               RRETURN(MATCH_NOMATCH);
4171             }
4172           break;
4173
4174           case PT_PXSPACE:  /* POSIX space */
4175           for (i = 1; i <= min; i++)
4176             {
4177             if (eptr >= md->end_subject)
4178               {
4179               SCHECK_PARTIAL();
4180               RRETURN(MATCH_NOMATCH);
4181               }
4182             GETCHARINCTEST(c, eptr);
4183             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4184                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4185                    == prop_fail_result)
4186               RRETURN(MATCH_NOMATCH);
4187             }
4188           break;
4189
4190           case PT_WORD:
4191           for (i = 1; i <= min; i++)
4192             {
4193             int category;
4194             if (eptr >= md->end_subject)
4195               {
4196               SCHECK_PARTIAL();
4197               RRETURN(MATCH_NOMATCH);
4198               }
4199             GETCHARINCTEST(c, eptr);
4200             category = UCD_CATEGORY(c);
4201             if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
4202                    == prop_fail_result)
4203               RRETURN(MATCH_NOMATCH);
4204             }
4205           break;
4206
4207           /* This should not occur */
4208
4209           default:
4210           RRETURN(PCRE_ERROR_INTERNAL);
4211           }
4212         }
4213
4214       /* Match extended Unicode sequences. We will get here only if the
4215       support is in the binary; otherwise a compile-time error occurs. */
4216
4217       else if (ctype == OP_EXTUNI)
4218         {
4219         for (i = 1; i <= min; i++)
4220           {
4221           if (eptr >= md->end_subject)
4222             {
4223             SCHECK_PARTIAL();
4224             RRETURN(MATCH_NOMATCH);
4225             }
4226           GETCHARINCTEST(c, eptr);
4227           if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
4228           while (eptr < md->end_subject)
4229             {
4230             int len = 1;
4231             if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4232             if (UCD_CATEGORY(c) != ucp_M) break;
4233             eptr += len;
4234             }
4235           CHECK_PARTIAL();
4236           }
4237         }
4238
4239       else
4240 #endif     /* SUPPORT_UCP */
4241
4242 /* Handle all other cases when the coding is UTF-8 */
4243
4244 #ifdef SUPPORT_UTF
4245       if (utf) switch(ctype)
4246         {
4247         case OP_ANY:
4248         for (i = 1; i <= min; i++)
4249           {
4250           if (eptr >= md->end_subject)
4251             {
4252             SCHECK_PARTIAL();
4253             RRETURN(MATCH_NOMATCH);
4254             }
4255           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4256           if (md->partial != 0 &&
4257               eptr + 1 >= md->end_subject &&
4258               NLBLOCK->nltype == NLTYPE_FIXED &&
4259               NLBLOCK->nllen == 2 &&
4260               *eptr == NLBLOCK->nl[0])
4261             {
4262             md->hitend = TRUE;
4263             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4264             }
4265           eptr++;
4266           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4267           }
4268         break;
4269
4270         case OP_ALLANY:
4271         for (i = 1; i <= min; i++)
4272           {
4273           if (eptr >= md->end_subject)
4274             {
4275             SCHECK_PARTIAL();
4276             RRETURN(MATCH_NOMATCH);
4277             }
4278           eptr++;
4279           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4280           }
4281         break;
4282
4283         case OP_ANYBYTE:
4284         if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
4285         eptr += min;
4286         break;
4287
4288         case OP_ANYNL:
4289         for (i = 1; i <= min; i++)
4290           {
4291           if (eptr >= md->end_subject)
4292             {
4293             SCHECK_PARTIAL();
4294             RRETURN(MATCH_NOMATCH);
4295             }
4296           GETCHARINC(c, eptr);
4297           switch(c)
4298             {
4299             default: RRETURN(MATCH_NOMATCH);
4300
4301             case 0x000d:
4302             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4303             break;
4304
4305             case 0x000a:
4306             break;
4307
4308             case 0x000b:
4309             case 0x000c:
4310             case 0x0085:
4311             case 0x2028:
4312             case 0x2029:
4313             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4314             break;
4315             }
4316           }
4317         break;
4318
4319         case OP_NOT_HSPACE:
4320         for (i = 1; i <= min; i++)
4321           {
4322           if (eptr >= md->end_subject)
4323             {
4324             SCHECK_PARTIAL();
4325             RRETURN(MATCH_NOMATCH);
4326             }
4327           GETCHARINC(c, eptr);
4328           switch(c)
4329             {
4330             default: break;
4331             case 0x09:      /* HT */
4332             case 0x20:      /* SPACE */
4333             case 0xa0:      /* NBSP */
4334             case 0x1680:    /* OGHAM SPACE MARK */
4335             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4336             case 0x2000:    /* EN QUAD */
4337             case 0x2001:    /* EM QUAD */
4338             case 0x2002:    /* EN SPACE */
4339             case 0x2003:    /* EM SPACE */
4340             case 0x2004:    /* THREE-PER-EM SPACE */
4341             case 0x2005:    /* FOUR-PER-EM SPACE */
4342             case 0x2006:    /* SIX-PER-EM SPACE */
4343             case 0x2007:    /* FIGURE SPACE */
4344             case 0x2008:    /* PUNCTUATION SPACE */
4345             case 0x2009:    /* THIN SPACE */
4346             case 0x200A:    /* HAIR SPACE */
4347             case 0x202f:    /* NARROW NO-BREAK SPACE */
4348             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4349             case 0x3000:    /* IDEOGRAPHIC SPACE */
4350             RRETURN(MATCH_NOMATCH);
4351             }
4352           }
4353         break;
4354
4355         case OP_HSPACE:
4356         for (i = 1; i <= min; i++)
4357           {
4358           if (eptr >= md->end_subject)
4359             {
4360             SCHECK_PARTIAL();
4361             RRETURN(MATCH_NOMATCH);
4362             }
4363           GETCHARINC(c, eptr);
4364           switch(c)
4365             {
4366             default: RRETURN(MATCH_NOMATCH);
4367             case 0x09:      /* HT */
4368             case 0x20:      /* SPACE */
4369             case 0xa0:      /* NBSP */
4370             case 0x1680:    /* OGHAM SPACE MARK */
4371             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4372             case 0x2000:    /* EN QUAD */
4373             case 0x2001:    /* EM QUAD */
4374             case 0x2002:    /* EN SPACE */
4375             case 0x2003:    /* EM SPACE */
4376             case 0x2004:    /* THREE-PER-EM SPACE */
4377             case 0x2005:    /* FOUR-PER-EM SPACE */
4378             case 0x2006:    /* SIX-PER-EM SPACE */
4379             case 0x2007:    /* FIGURE SPACE */
4380             case 0x2008:    /* PUNCTUATION SPACE */
4381             case 0x2009:    /* THIN SPACE */
4382             case 0x200A:    /* HAIR SPACE */
4383             case 0x202f:    /* NARROW NO-BREAK SPACE */
4384             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4385             case 0x3000:    /* IDEOGRAPHIC SPACE */
4386             break;
4387             }
4388           }
4389         break;
4390
4391         case OP_NOT_VSPACE:
4392         for (i = 1; i <= min; i++)
4393           {
4394           if (eptr >= md->end_subject)
4395             {
4396             SCHECK_PARTIAL();
4397             RRETURN(MATCH_NOMATCH);
4398             }
4399           GETCHARINC(c, eptr);
4400           switch(c)
4401             {
4402             default: break;
4403             case 0x0a:      /* LF */
4404             case 0x0b:      /* VT */
4405             case 0x0c:      /* FF */
4406             case 0x0d:      /* CR */
4407             case 0x85:      /* NEL */
4408             case 0x2028:    /* LINE SEPARATOR */
4409             case 0x2029:    /* PARAGRAPH SEPARATOR */
4410             RRETURN(MATCH_NOMATCH);
4411             }
4412           }
4413         break;
4414
4415         case OP_VSPACE:
4416         for (i = 1; i <= min; i++)
4417           {
4418           if (eptr >= md->end_subject)
4419             {
4420             SCHECK_PARTIAL();
4421             RRETURN(MATCH_NOMATCH);
4422             }
4423           GETCHARINC(c, eptr);
4424           switch(c)
4425             {
4426             default: RRETURN(MATCH_NOMATCH);
4427             case 0x0a:      /* LF */
4428             case 0x0b:      /* VT */
4429             case 0x0c:      /* FF */
4430             case 0x0d:      /* CR */
4431             case 0x85:      /* NEL */
4432             case 0x2028:    /* LINE SEPARATOR */
4433             case 0x2029:    /* PARAGRAPH SEPARATOR */
4434             break;
4435             }
4436           }
4437         break;
4438
4439         case OP_NOT_DIGIT:
4440         for (i = 1; i <= min; i++)
4441           {
4442           if (eptr >= md->end_subject)
4443             {
4444             SCHECK_PARTIAL();
4445             RRETURN(MATCH_NOMATCH);
4446             }
4447           GETCHARINC(c, eptr);
4448           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
4449             RRETURN(MATCH_NOMATCH);
4450           }
4451         break;
4452
4453         case OP_DIGIT:
4454         for (i = 1; i <= min; i++)
4455           {
4456           if (eptr >= md->end_subject)
4457             {
4458             SCHECK_PARTIAL();
4459             RRETURN(MATCH_NOMATCH);
4460             }
4461           if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0)
4462             RRETURN(MATCH_NOMATCH);
4463           eptr++;
4464           /* No need to skip more bytes - we know it's a 1-byte character */
4465           }
4466         break;
4467
4468         case OP_NOT_WHITESPACE:
4469         for (i = 1; i <= min; i++)
4470           {
4471           if (eptr >= md->end_subject)
4472             {
4473             SCHECK_PARTIAL();
4474             RRETURN(MATCH_NOMATCH);
4475             }
4476           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
4477             RRETURN(MATCH_NOMATCH);
4478           eptr++;
4479           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4480           }
4481         break;
4482
4483         case OP_WHITESPACE:
4484         for (i = 1; i <= min; i++)
4485           {
4486           if (eptr >= md->end_subject)
4487             {
4488             SCHECK_PARTIAL();
4489             RRETURN(MATCH_NOMATCH);
4490             }
4491           if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0)
4492             RRETURN(MATCH_NOMATCH);
4493           eptr++;
4494           /* No need to skip more bytes - we know it's a 1-byte character */
4495           }
4496         break;
4497
4498         case OP_NOT_WORDCHAR:
4499         for (i = 1; i <= min; i++)
4500           {
4501           if (eptr >= md->end_subject)
4502             {
4503             SCHECK_PARTIAL();
4504             RRETURN(MATCH_NOMATCH);
4505             }
4506           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
4507             RRETURN(MATCH_NOMATCH);
4508           eptr++;
4509           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4510           }
4511         break;
4512
4513         case OP_WORDCHAR:
4514         for (i = 1; i <= min; i++)
4515           {
4516           if (eptr >= md->end_subject)
4517             {
4518             SCHECK_PARTIAL();
4519             RRETURN(MATCH_NOMATCH);
4520             }
4521           if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0)
4522             RRETURN(MATCH_NOMATCH);
4523           eptr++;
4524           /* No need to skip more bytes - we know it's a 1-byte character */
4525           }
4526         break;
4527
4528         default:
4529         RRETURN(PCRE_ERROR_INTERNAL);
4530         }  /* End switch(ctype) */
4531
4532       else
4533 #endif     /* SUPPORT_UTF */
4534
4535       /* Code for the non-UTF-8 case for minimum matching of operators other
4536       than OP_PROP and OP_NOTPROP. */
4537
4538       switch(ctype)
4539         {
4540         case OP_ANY:
4541         for (i = 1; i <= min; i++)
4542           {
4543           if (eptr >= md->end_subject)
4544             {
4545             SCHECK_PARTIAL();
4546             RRETURN(MATCH_NOMATCH);
4547             }
4548           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4549           if (md->partial != 0 &&
4550               eptr + 1 >= md->end_subject &&
4551               NLBLOCK->nltype == NLTYPE_FIXED &&
4552               NLBLOCK->nllen == 2 &&
4553               *eptr == NLBLOCK->nl[0])
4554             {
4555             md->hitend = TRUE;
4556             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4557             }
4558           eptr++;
4559           }
4560         break;
4561
4562         case OP_ALLANY:
4563         if (eptr > md->end_subject - min)
4564           {
4565           SCHECK_PARTIAL();
4566           RRETURN(MATCH_NOMATCH);
4567           }
4568         eptr += min;
4569         break;
4570
4571         case OP_ANYBYTE:
4572         if (eptr > md->end_subject - min)
4573           {
4574           SCHECK_PARTIAL();
4575           RRETURN(MATCH_NOMATCH);
4576           }
4577         eptr += min;
4578         break;
4579
4580         case OP_ANYNL:
4581         for (i = 1; i <= min; i++)
4582           {
4583           if (eptr >= md->end_subject)
4584             {
4585             SCHECK_PARTIAL();
4586             RRETURN(MATCH_NOMATCH);
4587             }
4588           switch(*eptr++)
4589             {
4590             default: RRETURN(MATCH_NOMATCH);
4591
4592             case 0x000d:
4593             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4594             break;
4595
4596             case 0x000a:
4597             break;
4598
4599             case 0x000b:
4600             case 0x000c:
4601             case 0x0085:
4602 #ifdef COMPILE_PCRE16
4603             case 0x2028:
4604             case 0x2029:
4605 #endif
4606             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4607             break;
4608             }
4609           }
4610         break;
4611
4612         case OP_NOT_HSPACE:
4613         for (i = 1; i <= min; i++)
4614           {
4615           if (eptr >= md->end_subject)
4616             {
4617             SCHECK_PARTIAL();
4618             RRETURN(MATCH_NOMATCH);
4619             }
4620           switch(*eptr++)
4621             {
4622             default: break;
4623             case 0x09:      /* HT */
4624             case 0x20:      /* SPACE */
4625             case 0xa0:      /* NBSP */
4626 #ifdef COMPILE_PCRE16
4627             case 0x1680:    /* OGHAM SPACE MARK */
4628             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4629             case 0x2000:    /* EN QUAD */
4630             case 0x2001:    /* EM QUAD */
4631             case 0x2002:    /* EN SPACE */
4632             case 0x2003:    /* EM SPACE */
4633             case 0x2004:    /* THREE-PER-EM SPACE */
4634             case 0x2005:    /* FOUR-PER-EM SPACE */
4635             case 0x2006:    /* SIX-PER-EM SPACE */
4636             case 0x2007:    /* FIGURE SPACE */
4637             case 0x2008:    /* PUNCTUATION SPACE */
4638             case 0x2009:    /* THIN SPACE */
4639             case 0x200A:    /* HAIR SPACE */
4640             case 0x202f:    /* NARROW NO-BREAK SPACE */
4641             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4642             case 0x3000:    /* IDEOGRAPHIC SPACE */
4643 #endif
4644             RRETURN(MATCH_NOMATCH);
4645             }
4646           }
4647         break;
4648
4649         case OP_HSPACE:
4650         for (i = 1; i <= min; i++)
4651           {
4652           if (eptr >= md->end_subject)
4653             {
4654             SCHECK_PARTIAL();
4655             RRETURN(MATCH_NOMATCH);
4656             }
4657           switch(*eptr++)
4658             {
4659             default: RRETURN(MATCH_NOMATCH);
4660             case 0x09:      /* HT */
4661             case 0x20:      /* SPACE */
4662             case 0xa0:      /* NBSP */
4663 #ifdef COMPILE_PCRE16
4664             case 0x1680:    /* OGHAM SPACE MARK */
4665             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4666             case 0x2000:    /* EN QUAD */
4667             case 0x2001:    /* EM QUAD */
4668             case 0x2002:    /* EN SPACE */
4669             case 0x2003:    /* EM SPACE */
4670             case 0x2004:    /* THREE-PER-EM SPACE */
4671             case 0x2005:    /* FOUR-PER-EM SPACE */
4672             case 0x2006:    /* SIX-PER-EM SPACE */
4673             case 0x2007:    /* FIGURE SPACE */
4674             case 0x2008:    /* PUNCTUATION SPACE */
4675             case 0x2009:    /* THIN SPACE */
4676             case 0x200A:    /* HAIR SPACE */
4677             case 0x202f:    /* NARROW NO-BREAK SPACE */
4678             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4679             case 0x3000:    /* IDEOGRAPHIC SPACE */
4680 #endif
4681             break;
4682             }
4683           }
4684         break;
4685
4686         case OP_NOT_VSPACE:
4687         for (i = 1; i <= min; i++)
4688           {
4689           if (eptr >= md->end_subject)
4690             {
4691             SCHECK_PARTIAL();
4692             RRETURN(MATCH_NOMATCH);
4693             }
4694           switch(*eptr++)
4695             {
4696             default: break;
4697             case 0x0a:      /* LF */
4698             case 0x0b:      /* VT */
4699             case 0x0c:      /* FF */
4700             case 0x0d:      /* CR */
4701             case 0x85:      /* NEL */
4702 #ifdef COMPILE_PCRE16
4703             case 0x2028:    /* LINE SEPARATOR */
4704             case 0x2029:    /* PARAGRAPH SEPARATOR */
4705 #endif
4706             RRETURN(MATCH_NOMATCH);
4707             }
4708           }
4709         break;
4710
4711         case OP_VSPACE:
4712         for (i = 1; i <= min; i++)
4713           {
4714           if (eptr >= md->end_subject)
4715             {
4716             SCHECK_PARTIAL();
4717             RRETURN(MATCH_NOMATCH);
4718             }
4719           switch(*eptr++)
4720             {
4721             default: RRETURN(MATCH_NOMATCH);
4722             case 0x0a:      /* LF */
4723             case 0x0b:      /* VT */
4724             case 0x0c:      /* FF */
4725             case 0x0d:      /* CR */
4726             case 0x85:      /* NEL */
4727 #ifdef COMPILE_PCRE16
4728             case 0x2028:    /* LINE SEPARATOR */
4729             case 0x2029:    /* PARAGRAPH SEPARATOR */
4730 #endif
4731             break;
4732             }
4733           }
4734         break;
4735
4736         case OP_NOT_DIGIT:
4737         for (i = 1; i <= min; i++)
4738           {
4739           if (eptr >= md->end_subject)
4740             {
4741             SCHECK_PARTIAL();
4742             RRETURN(MATCH_NOMATCH);
4743             }
4744           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
4745             RRETURN(MATCH_NOMATCH);
4746           eptr++;
4747           }
4748         break;
4749
4750         case OP_DIGIT:
4751         for (i = 1; i <= min; i++)
4752           {
4753           if (eptr >= md->end_subject)
4754             {
4755             SCHECK_PARTIAL();
4756             RRETURN(MATCH_NOMATCH);
4757             }
4758           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
4759             RRETURN(MATCH_NOMATCH);
4760           eptr++;
4761           }
4762         break;
4763
4764         case OP_NOT_WHITESPACE:
4765         for (i = 1; i <= min; i++)
4766           {
4767           if (eptr >= md->end_subject)
4768             {
4769             SCHECK_PARTIAL();
4770             RRETURN(MATCH_NOMATCH);
4771             }
4772           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
4773             RRETURN(MATCH_NOMATCH);
4774           eptr++;
4775           }
4776         break;
4777
4778         case OP_WHITESPACE:
4779         for (i = 1; i <= min; i++)
4780           {
4781           if (eptr >= md->end_subject)
4782             {
4783             SCHECK_PARTIAL();
4784             RRETURN(MATCH_NOMATCH);
4785             }
4786           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
4787             RRETURN(MATCH_NOMATCH);
4788           eptr++;
4789           }
4790         break;
4791
4792         case OP_NOT_WORDCHAR:
4793         for (i = 1; i <= min; i++)
4794           {
4795           if (eptr >= md->end_subject)
4796             {
4797             SCHECK_PARTIAL();
4798             RRETURN(MATCH_NOMATCH);
4799             }
4800           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
4801             RRETURN(MATCH_NOMATCH);
4802           eptr++;
4803           }
4804         break;
4805
4806         case OP_WORDCHAR:
4807         for (i = 1; i <= min; i++)
4808           {
4809           if (eptr >= md->end_subject)
4810             {
4811             SCHECK_PARTIAL();
4812             RRETURN(MATCH_NOMATCH);
4813             }
4814           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
4815             RRETURN(MATCH_NOMATCH);
4816           eptr++;
4817           }
4818         break;
4819
4820         default:
4821         RRETURN(PCRE_ERROR_INTERNAL);
4822         }
4823       }
4824
4825     /* If min = max, continue at the same level without recursing */
4826
4827     if (min == max) continue;
4828
4829     /* If minimizing, we have to test the rest of the pattern before each
4830     subsequent match. Again, separate the UTF-8 case for speed, and also
4831     separate the UCP cases. */
4832
4833     if (minimize)
4834       {
4835 #ifdef SUPPORT_UCP
4836       if (prop_type >= 0)
4837         {
4838         switch(prop_type)
4839           {
4840           case PT_ANY:
4841           for (fi = min;; fi++)
4842             {
4843             RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
4844             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4845             if (fi >= max) RRETURN(MATCH_NOMATCH);
4846             if (eptr >= md->end_subject)
4847               {
4848               SCHECK_PARTIAL();
4849               RRETURN(MATCH_NOMATCH);
4850               }
4851             GETCHARINCTEST(c, eptr);
4852             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4853             }
4854           /* Control never gets here */
4855
4856           case PT_LAMP:
4857           for (fi = min;; fi++)
4858             {
4859             int chartype;
4860             RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
4861             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4862             if (fi >= max) RRETURN(MATCH_NOMATCH);
4863             if (eptr >= md->end_subject)
4864               {
4865               SCHECK_PARTIAL();
4866               RRETURN(MATCH_NOMATCH);
4867               }
4868             GETCHARINCTEST(c, eptr);
4869             chartype = UCD_CHARTYPE(c);
4870             if ((chartype == ucp_Lu ||
4871                  chartype == ucp_Ll ||
4872                  chartype == ucp_Lt) == prop_fail_result)
4873               RRETURN(MATCH_NOMATCH);
4874             }
4875           /* Control never gets here */
4876
4877           case PT_GC:
4878           for (fi = min;; fi++)
4879             {
4880             RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
4881             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4882             if (fi >= max) RRETURN(MATCH_NOMATCH);
4883             if (eptr >= md->end_subject)
4884               {
4885               SCHECK_PARTIAL();
4886               RRETURN(MATCH_NOMATCH);
4887               }
4888             GETCHARINCTEST(c, eptr);
4889             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4890               RRETURN(MATCH_NOMATCH);
4891             }
4892           /* Control never gets here */
4893
4894           case PT_PC:
4895           for (fi = min;; fi++)
4896             {
4897             RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
4898             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4899             if (fi >= max) RRETURN(MATCH_NOMATCH);
4900             if (eptr >= md->end_subject)
4901               {
4902               SCHECK_PARTIAL();
4903               RRETURN(MATCH_NOMATCH);
4904               }
4905             GETCHARINCTEST(c, eptr);
4906             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4907               RRETURN(MATCH_NOMATCH);
4908             }
4909           /* Control never gets here */
4910
4911           case PT_SC:
4912           for (fi = min;; fi++)
4913             {
4914             RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
4915             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4916             if (fi >= max) RRETURN(MATCH_NOMATCH);
4917             if (eptr >= md->end_subject)
4918               {
4919               SCHECK_PARTIAL();
4920               RRETURN(MATCH_NOMATCH);
4921               }
4922             GETCHARINCTEST(c, eptr);
4923             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4924               RRETURN(MATCH_NOMATCH);
4925             }
4926           /* Control never gets here */
4927
4928           case PT_ALNUM:
4929           for (fi = min;; fi++)
4930             {
4931             int category;
4932             RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
4933             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4934             if (fi >= max) RRETURN(MATCH_NOMATCH);
4935             if (eptr >= md->end_subject)
4936               {
4937               SCHECK_PARTIAL();
4938               RRETURN(MATCH_NOMATCH);
4939               }
4940             GETCHARINCTEST(c, eptr);
4941             category = UCD_CATEGORY(c);
4942             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4943               RRETURN(MATCH_NOMATCH);
4944             }
4945           /* Control never gets here */
4946
4947           case PT_SPACE:    /* Perl space */
4948           for (fi = min;; fi++)
4949             {
4950             RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
4951             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4952             if (fi >= max) RRETURN(MATCH_NOMATCH);
4953             if (eptr >= md->end_subject)
4954               {
4955               SCHECK_PARTIAL();
4956               RRETURN(MATCH_NOMATCH);
4957               }
4958             GETCHARINCTEST(c, eptr);
4959             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4960                  c == CHAR_FF || c == CHAR_CR)
4961                    == prop_fail_result)
4962               RRETURN(MATCH_NOMATCH);
4963             }
4964           /* Control never gets here */
4965
4966           case PT_PXSPACE:  /* POSIX space */
4967           for (fi = min;; fi++)
4968             {
4969             RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
4970             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4971             if (fi >= max) RRETURN(MATCH_NOMATCH);
4972             if (eptr >= md->end_subject)
4973               {
4974               SCHECK_PARTIAL();
4975               RRETURN(MATCH_NOMATCH);
4976               }
4977             GETCHARINCTEST(c, eptr);
4978             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4979                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4980                    == prop_fail_result)
4981               RRETURN(MATCH_NOMATCH);
4982             }
4983           /* Control never gets here */
4984
4985           case PT_WORD:
4986           for (fi = min;; fi++)
4987             {
4988             int category;
4989             RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
4990             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4991             if (fi >= max) RRETURN(MATCH_NOMATCH);
4992             if (eptr >= md->end_subject)
4993               {
4994               SCHECK_PARTIAL();
4995               RRETURN(MATCH_NOMATCH);
4996               }
4997             GETCHARINCTEST(c, eptr);
4998             category = UCD_CATEGORY(c);
4999             if ((category == ucp_L ||
5000                  category == ucp_N ||
5001                  c == CHAR_UNDERSCORE)
5002                    == prop_fail_result)
5003               RRETURN(MATCH_NOMATCH);
5004             }
5005           /* Control never gets here */
5006
5007           /* This should never occur */
5008
5009           default:
5010           RRETURN(PCRE_ERROR_INTERNAL);
5011           }
5012         }
5013
5014       /* Match extended Unicode sequences. We will get here only if the
5015       support is in the binary; otherwise a compile-time error occurs. */
5016
5017       else if (ctype == OP_EXTUNI)
5018         {
5019         for (fi = min;; fi++)
5020           {
5021           RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
5022           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5023           if (fi >= max) RRETURN(MATCH_NOMATCH);
5024           if (eptr >= md->end_subject)
5025             {
5026             SCHECK_PARTIAL();
5027             RRETURN(MATCH_NOMATCH);
5028             }
5029           GETCHARINCTEST(c, eptr);
5030           if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
5031           while (eptr < md->end_subject)
5032             {
5033             int len = 1;
5034             if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5035             if (UCD_CATEGORY(c) != ucp_M) break;
5036             eptr += len;
5037             }
5038           CHECK_PARTIAL();
5039           }
5040         }
5041       else
5042 #endif     /* SUPPORT_UCP */
5043
5044 #ifdef SUPPORT_UTF
5045       if (utf)
5046         {
5047         for (fi = min;; fi++)
5048           {
5049           RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
5050           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5051           if (fi >= max) RRETURN(MATCH_NOMATCH);
5052           if (eptr >= md->end_subject)
5053             {
5054             SCHECK_PARTIAL();
5055             RRETURN(MATCH_NOMATCH);
5056             }
5057           if (ctype == OP_ANY && IS_NEWLINE(eptr))
5058             RRETURN(MATCH_NOMATCH);
5059           GETCHARINC(c, eptr);
5060           switch(ctype)
5061             {
5062             case OP_ANY:               /* This is the non-NL case */
5063             if (md->partial != 0 &&    /* Take care with CRLF partial */
5064                 eptr >= md->end_subject &&
5065                 NLBLOCK->nltype == NLTYPE_FIXED &&
5066                 NLBLOCK->nllen == 2 &&
5067                 c == NLBLOCK->nl[0])
5068               {
5069               md->hitend = TRUE;
5070               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5071               }
5072             break;
5073
5074             case OP_ALLANY:
5075             case OP_ANYBYTE:
5076             break;
5077
5078             case OP_ANYNL:
5079             switch(c)
5080               {
5081               default: RRETURN(MATCH_NOMATCH);
5082               case 0x000d:
5083               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
5084               break;
5085               case 0x000a:
5086               break;
5087
5088               case 0x000b:
5089               case 0x000c:
5090               case 0x0085:
5091               case 0x2028:
5092               case 0x2029:
5093               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5094               break;
5095               }
5096             break;
5097
5098             case OP_NOT_HSPACE:
5099             switch(c)
5100               {
5101               default: break;
5102               case 0x09:      /* HT */
5103               case 0x20:      /* SPACE */
5104               case 0xa0:      /* NBSP */
5105               case 0x1680:    /* OGHAM SPACE MARK */
5106               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5107               case 0x2000:    /* EN QUAD */
5108               case 0x2001:    /* EM QUAD */
5109               case 0x2002:    /* EN SPACE */
5110               case 0x2003:    /* EM SPACE */
5111               case 0x2004:    /* THREE-PER-EM SPACE */
5112               case 0x2005:    /* FOUR-PER-EM SPACE */
5113               case 0x2006:    /* SIX-PER-EM SPACE */
5114               case 0x2007:    /* FIGURE SPACE */
5115               case 0x2008:    /* PUNCTUATION SPACE */
5116               case 0x2009:    /* THIN SPACE */
5117               case 0x200A:    /* HAIR SPACE */
5118               case 0x202f:    /* NARROW NO-BREAK SPACE */
5119               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5120               case 0x3000:    /* IDEOGRAPHIC SPACE */
5121               RRETURN(MATCH_NOMATCH);
5122               }
5123             break;
5124
5125             case OP_HSPACE:
5126             switch(c)
5127               {
5128               default: RRETURN(MATCH_NOMATCH);
5129               case 0x09:      /* HT */
5130               case 0x20:      /* SPACE */
5131               case 0xa0:      /* NBSP */
5132               case 0x1680:    /* OGHAM SPACE MARK */
5133               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5134               case 0x2000:    /* EN QUAD */
5135               case 0x2001:    /* EM QUAD */
5136               case 0x2002:    /* EN SPACE */
5137               case 0x2003:    /* EM SPACE */
5138               case 0x2004:    /* THREE-PER-EM SPACE */
5139               case 0x2005:    /* FOUR-PER-EM SPACE */
5140               case 0x2006:    /* SIX-PER-EM SPACE */
5141               case 0x2007:    /* FIGURE SPACE */
5142               case 0x2008:    /* PUNCTUATION SPACE */
5143               case 0x2009:    /* THIN SPACE */
5144               case 0x200A:    /* HAIR SPACE */
5145               case 0x202f:    /* NARROW NO-BREAK SPACE */
5146               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5147               case 0x3000:    /* IDEOGRAPHIC SPACE */
5148               break;
5149               }
5150             break;
5151
5152             case OP_NOT_VSPACE:
5153             switch(c)
5154               {
5155               default: break;
5156               case 0x0a:      /* LF */
5157               case 0x0b:      /* VT */
5158               case 0x0c:      /* FF */
5159               case 0x0d:      /* CR */
5160               case 0x85:      /* NEL */
5161               case 0x2028:    /* LINE SEPARATOR */
5162               case 0x2029:    /* PARAGRAPH SEPARATOR */
5163               RRETURN(MATCH_NOMATCH);
5164               }
5165             break;
5166
5167             case OP_VSPACE:
5168             switch(c)
5169               {
5170               default: RRETURN(MATCH_NOMATCH);
5171               case 0x0a:      /* LF */
5172               case 0x0b:      /* VT */
5173               case 0x0c:      /* FF */
5174               case 0x0d:      /* CR */
5175               case 0x85:      /* NEL */
5176               case 0x2028:    /* LINE SEPARATOR */
5177               case 0x2029:    /* PARAGRAPH SEPARATOR */
5178               break;
5179               }
5180             break;
5181
5182             case OP_NOT_DIGIT:
5183             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
5184               RRETURN(MATCH_NOMATCH);
5185             break;
5186
5187             case OP_DIGIT:
5188             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
5189               RRETURN(MATCH_NOMATCH);
5190             break;
5191
5192             case OP_NOT_WHITESPACE:
5193             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
5194               RRETURN(MATCH_NOMATCH);
5195             break;
5196
5197             case OP_WHITESPACE:
5198             if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
5199               RRETURN(MATCH_NOMATCH);
5200             break;
5201
5202             case OP_NOT_WORDCHAR:
5203             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
5204               RRETURN(MATCH_NOMATCH);
5205             break;
5206
5207             case OP_WORDCHAR:
5208             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
5209               RRETURN(MATCH_NOMATCH);
5210             break;
5211
5212             default:
5213             RRETURN(PCRE_ERROR_INTERNAL);
5214             }
5215           }
5216         }
5217       else
5218 #endif
5219       /* Not UTF mode */
5220         {
5221         for (fi = min;; fi++)
5222           {
5223           RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
5224           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5225           if (fi >= max) RRETURN(MATCH_NOMATCH);
5226           if (eptr >= md->end_subject)
5227             {
5228             SCHECK_PARTIAL();
5229             RRETURN(MATCH_NOMATCH);
5230             }
5231           if (ctype == OP_ANY && IS_NEWLINE(eptr))
5232             RRETURN(MATCH_NOMATCH);
5233           c = *eptr++;
5234           switch(ctype)
5235             {
5236             case OP_ANY:               /* This is the non-NL case */
5237             if (md->partial != 0 &&    /* Take care with CRLF partial */
5238                 eptr >= md->end_subject &&
5239                 NLBLOCK->nltype == NLTYPE_FIXED &&
5240                 NLBLOCK->nllen == 2 &&
5241                 c == NLBLOCK->nl[0])
5242               {
5243               md->hitend = TRUE;
5244               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5245               }
5246             break;
5247
5248             case OP_ALLANY:
5249             case OP_ANYBYTE:
5250             break;
5251
5252             case OP_ANYNL:
5253             switch(c)
5254               {
5255               default: RRETURN(MATCH_NOMATCH);
5256               case 0x000d:
5257               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
5258               break;
5259
5260               case 0x000a:
5261               break;
5262
5263               case 0x000b:
5264               case 0x000c:
5265               case 0x0085:
5266 #ifdef COMPILE_PCRE16
5267               case 0x2028:
5268               case 0x2029:
5269 #endif
5270               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5271               break;
5272               }
5273             break;
5274
5275             case OP_NOT_HSPACE:
5276             switch(c)
5277               {
5278               default: break;
5279               case 0x09:      /* HT */
5280               case 0x20:      /* SPACE */
5281               case 0xa0:      /* NBSP */
5282 #ifdef COMPILE_PCRE16
5283               case 0x1680:    /* OGHAM SPACE MARK */
5284               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5285               case 0x2000:    /* EN QUAD */
5286               case 0x2001:    /* EM QUAD */
5287               case 0x2002:    /* EN SPACE */
5288               case 0x2003:    /* EM SPACE */
5289               case 0x2004:    /* THREE-PER-EM SPACE */
5290               case 0x2005:    /* FOUR-PER-EM SPACE */
5291               case 0x2006:    /* SIX-PER-EM SPACE */
5292               case 0x2007:    /* FIGURE SPACE */
5293               case 0x2008:    /* PUNCTUATION SPACE */
5294               case 0x2009:    /* THIN SPACE */
5295               case 0x200A:    /* HAIR SPACE */
5296               case 0x202f:    /* NARROW NO-BREAK SPACE */
5297               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5298               case 0x3000:    /* IDEOGRAPHIC SPACE */
5299 #endif
5300               RRETURN(MATCH_NOMATCH);
5301               }
5302             break;
5303
5304             case OP_HSPACE:
5305             switch(c)
5306               {
5307               default: RRETURN(MATCH_NOMATCH);
5308               case 0x09:      /* HT */
5309               case 0x20:      /* SPACE */
5310               case 0xa0:      /* NBSP */
5311 #ifdef COMPILE_PCRE16
5312               case 0x1680:    /* OGHAM SPACE MARK */
5313               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5314               case 0x2000:    /* EN QUAD */
5315               case 0x2001:    /* EM QUAD */
5316               case 0x2002:    /* EN SPACE */
5317               case 0x2003:    /* EM SPACE */
5318               case 0x2004:    /* THREE-PER-EM SPACE */
5319               case 0x2005:    /* FOUR-PER-EM SPACE */
5320               case 0x2006:    /* SIX-PER-EM SPACE */
5321               case 0x2007:    /* FIGURE SPACE */
5322               case 0x2008:    /* PUNCTUATION SPACE */
5323               case 0x2009:    /* THIN SPACE */
5324               case 0x200A:    /* HAIR SPACE */
5325               case 0x202f:    /* NARROW NO-BREAK SPACE */
5326               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5327               case 0x3000:    /* IDEOGRAPHIC SPACE */
5328 #endif
5329               break;
5330               }
5331             break;
5332
5333             case OP_NOT_VSPACE:
5334             switch(c)
5335               {
5336               default: break;
5337               case 0x0a:      /* LF */
5338               case 0x0b:      /* VT */
5339               case 0x0c:      /* FF */
5340               case 0x0d:      /* CR */
5341               case 0x85:      /* NEL */
5342 #ifdef COMPILE_PCRE16
5343               case 0x2028:    /* LINE SEPARATOR */
5344               case 0x2029:    /* PARAGRAPH SEPARATOR */
5345 #endif
5346               RRETURN(MATCH_NOMATCH);
5347               }
5348             break;
5349
5350             case OP_VSPACE:
5351             switch(c)
5352               {
5353               default: RRETURN(MATCH_NOMATCH);
5354               case 0x0a:      /* LF */
5355               case 0x0b:      /* VT */
5356               case 0x0c:      /* FF */
5357               case 0x0d:      /* CR */
5358               case 0x85:      /* NEL */
5359 #ifdef COMPILE_PCRE16
5360               case 0x2028:    /* LINE SEPARATOR */
5361               case 0x2029:    /* PARAGRAPH SEPARATOR */
5362 #endif
5363               break;
5364               }
5365             break;
5366
5367             case OP_NOT_DIGIT:
5368             if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
5369             break;
5370
5371             case OP_DIGIT:
5372             if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
5373             break;
5374
5375             case OP_NOT_WHITESPACE:
5376             if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
5377             break;
5378
5379             case OP_WHITESPACE:
5380             if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
5381             break;
5382
5383             case OP_NOT_WORDCHAR:
5384             if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
5385             break;
5386
5387             case OP_WORDCHAR:
5388             if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
5389             break;
5390
5391             default:
5392             RRETURN(PCRE_ERROR_INTERNAL);
5393             }
5394           }
5395         }
5396       /* Control never gets here */
5397       }
5398
5399     /* If maximizing, it is worth using inline code for speed, doing the type
5400     test once at the start (i.e. keep it out of the loop). Again, keep the
5401     UTF-8 and UCP stuff separate. */
5402
5403     else
5404       {
5405       pp = eptr;  /* Remember where we started */
5406
5407 #ifdef SUPPORT_UCP
5408       if (prop_type >= 0)
5409         {
5410         switch(prop_type)
5411           {
5412           case PT_ANY:
5413           for (i = min; i < max; i++)
5414             {
5415             int len = 1;
5416             if (eptr >= md->end_subject)
5417               {
5418               SCHECK_PARTIAL();
5419               break;
5420               }
5421             GETCHARLENTEST(c, eptr, len);
5422             if (prop_fail_result) break;
5423             eptr+= len;
5424             }
5425           break;
5426
5427           case PT_LAMP:
5428           for (i = min; i < max; i++)
5429             {
5430             int chartype;
5431             int len = 1;
5432             if (eptr >= md->end_subject)
5433               {
5434               SCHECK_PARTIAL();
5435               break;
5436               }
5437             GETCHARLENTEST(c, eptr, len);
5438             chartype = UCD_CHARTYPE(c);
5439             if ((chartype == ucp_Lu ||
5440                  chartype == ucp_Ll ||
5441                  chartype == ucp_Lt) == prop_fail_result)
5442               break;
5443             eptr+= len;
5444             }
5445           break;
5446
5447           case PT_GC:
5448           for (i = min; i < max; i++)
5449             {
5450             int len = 1;
5451             if (eptr >= md->end_subject)
5452               {
5453               SCHECK_PARTIAL();
5454               break;
5455               }
5456             GETCHARLENTEST(c, eptr, len);
5457             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
5458             eptr+= len;
5459             }
5460           break;
5461
5462           case PT_PC:
5463           for (i = min; i < max; i++)
5464             {
5465             int len = 1;
5466             if (eptr >= md->end_subject)
5467               {
5468               SCHECK_PARTIAL();
5469               break;
5470               }
5471             GETCHARLENTEST(c, eptr, len);
5472             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
5473             eptr+= len;
5474             }
5475           break;
5476
5477           case PT_SC:
5478           for (i = min; i < max; i++)
5479             {
5480             int len = 1;
5481             if (eptr >= md->end_subject)
5482               {
5483               SCHECK_PARTIAL();
5484               break;
5485               }
5486             GETCHARLENTEST(c, eptr, len);
5487             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
5488             eptr+= len;
5489             }
5490           break;
5491
5492           case PT_ALNUM:
5493           for (i = min; i < max; i++)
5494             {
5495             int category;
5496             int len = 1;
5497             if (eptr >= md->end_subject)
5498               {
5499               SCHECK_PARTIAL();
5500               break;
5501               }
5502             GETCHARLENTEST(c, eptr, len);
5503             category = UCD_CATEGORY(c);
5504             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
5505               break;
5506             eptr+= len;
5507             }
5508           break;
5509
5510           case PT_SPACE:    /* Perl space */
5511           for (i = min; i < max; i++)
5512             {
5513             int len = 1;
5514             if (eptr >= md->end_subject)
5515               {
5516               SCHECK_PARTIAL();
5517               break;
5518               }
5519             GETCHARLENTEST(c, eptr, len);
5520             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
5521                  c == CHAR_FF || c == CHAR_CR)
5522                  == prop_fail_result)
5523               break;
5524             eptr+= len;
5525             }
5526           break;
5527
5528           case PT_PXSPACE:  /* POSIX space */
5529           for (i = min; i < max; i++)
5530             {
5531             int len = 1;
5532             if (eptr >= md->end_subject)
5533               {
5534               SCHECK_PARTIAL();
5535               break;
5536               }
5537             GETCHARLENTEST(c, eptr, len);
5538             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
5539                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
5540                  == prop_fail_result)
5541               break;
5542             eptr+= len;
5543             }
5544           break;
5545
5546           case PT_WORD:
5547           for (i = min; i < max; i++)
5548             {
5549             int category;
5550             int len = 1;
5551             if (eptr >= md->end_subject)
5552               {
5553               SCHECK_PARTIAL();
5554               break;
5555               }
5556             GETCHARLENTEST(c, eptr, len);
5557             category = UCD_CATEGORY(c);
5558             if ((category == ucp_L || category == ucp_N ||
5559                  c == CHAR_UNDERSCORE) == prop_fail_result)
5560               break;
5561             eptr+= len;
5562             }
5563           break;
5564
5565           default:
5566           RRETURN(PCRE_ERROR_INTERNAL);
5567           }
5568
5569         /* eptr is now past the end of the maximum run */
5570
5571         if (possessive) continue;
5572         for(;;)
5573           {
5574           RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5575           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5576           if (eptr-- == pp) break;        /* Stop if tried at original pos */
5577           if (utf) BACKCHAR(eptr);
5578           }
5579         }
5580
5581       /* Match extended Unicode sequences. We will get here only if the
5582       support is in the binary; otherwise a compile-time error occurs. */
5583
5584       else if (ctype == OP_EXTUNI)
5585         {
5586         for (i = min; i < max; i++)
5587           {
5588           int len = 1;
5589           if (eptr >= md->end_subject)
5590             {
5591             SCHECK_PARTIAL();
5592             break;
5593             }
5594           if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5595           if (UCD_CATEGORY(c) == ucp_M) break;
5596           eptr += len;
5597           while (eptr < md->end_subject)
5598             {
5599             len = 1;
5600             if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5601             if (UCD_CATEGORY(c) != ucp_M) break;
5602             eptr += len;
5603             }
5604           CHECK_PARTIAL();
5605           }
5606
5607         /* eptr is now past the end of the maximum run */
5608
5609         if (possessive) continue;
5610
5611         for(;;)
5612           {
5613           RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
5614           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5615           if (eptr-- == pp) break;        /* Stop if tried at original pos */
5616           for (;;)                        /* Move back over one extended */
5617             {
5618             if (!utf) c = *eptr; else
5619               {
5620               BACKCHAR(eptr);
5621               GETCHAR(c, eptr);
5622               }
5623             if (UCD_CATEGORY(c) != ucp_M) break;
5624             eptr--;
5625             }
5626           }
5627         }
5628
5629       else
5630 #endif   /* SUPPORT_UCP */
5631
5632 #ifdef SUPPORT_UTF
5633       if (utf)
5634         {
5635         switch(ctype)
5636           {
5637           case OP_ANY:
5638           if (max < INT_MAX)
5639             {
5640             for (i = min; i < max; i++)
5641               {
5642               if (eptr >= md->end_subject)
5643                 {
5644                 SCHECK_PARTIAL();
5645                 break;
5646                 }
5647               if (IS_NEWLINE(eptr)) break;
5648               if (md->partial != 0 &&    /* Take care with CRLF partial */
5649                   eptr + 1 >= md->end_subject &&
5650                   NLBLOCK->nltype == NLTYPE_FIXED &&
5651                   NLBLOCK->nllen == 2 &&
5652                   *eptr == NLBLOCK->nl[0])
5653                 {
5654                 md->hitend = TRUE;
5655                 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5656                 }
5657               eptr++;
5658               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5659               }
5660             }
5661
5662           /* Handle unlimited UTF-8 repeat */
5663
5664           else
5665             {
5666             for (i = min; i < max; i++)
5667               {
5668               if (eptr >= md->end_subject)
5669                 {
5670                 SCHECK_PARTIAL();
5671                 break;
5672                 }
5673               if (IS_NEWLINE(eptr)) break;
5674               if (md->partial != 0 &&    /* Take care with CRLF partial */
5675                   eptr + 1 >= md->end_subject &&
5676                   NLBLOCK->nltype == NLTYPE_FIXED &&
5677                   NLBLOCK->nllen == 2 &&
5678                   *eptr == NLBLOCK->nl[0])
5679                 {
5680                 md->hitend = TRUE;
5681                 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5682                 }
5683               eptr++;
5684               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5685               }
5686             }
5687           break;
5688
5689           case OP_ALLANY:
5690           if (max < INT_MAX)
5691             {
5692             for (i = min; i < max; i++)
5693               {
5694               if (eptr >= md->end_subject)
5695                 {
5696                 SCHECK_PARTIAL();
5697                 break;
5698                 }
5699               eptr++;
5700               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5701               }
5702             }
5703           else
5704             {
5705             eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
5706             SCHECK_PARTIAL();
5707             }
5708           break;
5709
5710           /* The byte case is the same as non-UTF8 */
5711
5712           case OP_ANYBYTE:
5713           c = max - min;
5714           if (c > (unsigned int)(md->end_subject - eptr))
5715             {
5716             eptr = md->end_subject;
5717             SCHECK_PARTIAL();
5718             }
5719           else eptr += c;
5720           break;
5721
5722           case OP_ANYNL:
5723           for (i = min; i < max; i++)
5724             {
5725             int len = 1;
5726             if (eptr >= md->end_subject)
5727               {
5728               SCHECK_PARTIAL();
5729               break;
5730               }
5731             GETCHARLEN(c, eptr, len);
5732             if (c == 0x000d)
5733               {
5734               if (++eptr >= md->end_subject) break;
5735               if (*eptr == 0x000a) eptr++;
5736               }
5737             else
5738               {
5739               if (c != 0x000a &&
5740                   (md->bsr_anycrlf ||
5741                    (c != 0x000b && c != 0x000c &&
5742                     c != 0x0085 && c != 0x2028 && c != 0x2029)))
5743                 break;
5744               eptr += len;
5745               }
5746             }
5747           break;
5748
5749           case OP_NOT_HSPACE:
5750           case OP_HSPACE:
5751           for (i = min; i < max; i++)
5752             {
5753             BOOL gotspace;
5754             int len = 1;
5755             if (eptr >= md->end_subject)
5756               {
5757               SCHECK_PARTIAL();
5758               break;
5759               }
5760             GETCHARLEN(c, eptr, len);
5761             switch(c)
5762               {
5763               default: gotspace = FALSE; break;
5764               case 0x09:      /* HT */
5765               case 0x20:      /* SPACE */
5766               case 0xa0:      /* NBSP */
5767               case 0x1680:    /* OGHAM SPACE MARK */
5768               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5769               case 0x2000:    /* EN QUAD */
5770               case 0x2001:    /* EM QUAD */
5771               case 0x2002:    /* EN SPACE */
5772               case 0x2003:    /* EM SPACE */
5773               case 0x2004:    /* THREE-PER-EM SPACE */
5774               case 0x2005:    /* FOUR-PER-EM SPACE */
5775               case 0x2006:    /* SIX-PER-EM SPACE */
5776               case 0x2007:    /* FIGURE SPACE */
5777               case 0x2008:    /* PUNCTUATION SPACE */
5778               case 0x2009:    /* THIN SPACE */
5779               case 0x200A:    /* HAIR SPACE */
5780               case 0x202f:    /* NARROW NO-BREAK SPACE */
5781               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5782               case 0x3000:    /* IDEOGRAPHIC SPACE */
5783               gotspace = TRUE;
5784               break;
5785               }
5786             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5787             eptr += len;
5788             }
5789           break;
5790
5791           case OP_NOT_VSPACE:
5792           case OP_VSPACE:
5793           for (i = min; i < max; i++)
5794             {
5795             BOOL gotspace;
5796             int len = 1;
5797             if (eptr >= md->end_subject)
5798               {
5799               SCHECK_PARTIAL();
5800               break;
5801               }
5802             GETCHARLEN(c, eptr, len);
5803             switch(c)
5804               {
5805               default: gotspace = FALSE; break;
5806               case 0x0a:      /* LF */
5807               case 0x0b:      /* VT */
5808               case 0x0c:      /* FF */
5809               case 0x0d:      /* CR */
5810               case 0x85:      /* NEL */
5811               case 0x2028:    /* LINE SEPARATOR */
5812               case 0x2029:    /* PARAGRAPH SEPARATOR */
5813               gotspace = TRUE;
5814               break;
5815               }
5816             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5817             eptr += len;
5818             }
5819           break;
5820
5821           case OP_NOT_DIGIT:
5822           for (i = min; i < max; i++)
5823             {
5824             int len = 1;
5825             if (eptr >= md->end_subject)
5826               {
5827               SCHECK_PARTIAL();
5828               break;
5829               }
5830             GETCHARLEN(c, eptr, len);
5831             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
5832             eptr+= len;
5833             }
5834           break;
5835
5836           case OP_DIGIT:
5837           for (i = min; i < max; i++)
5838             {
5839             int len = 1;
5840             if (eptr >= md->end_subject)
5841               {
5842               SCHECK_PARTIAL();
5843               break;
5844               }
5845             GETCHARLEN(c, eptr, len);
5846             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
5847             eptr+= len;
5848             }
5849           break;
5850
5851           case OP_NOT_WHITESPACE:
5852           for (i = min; i < max; i++)
5853             {
5854             int len = 1;
5855             if (eptr >= md->end_subject)
5856               {
5857               SCHECK_PARTIAL();
5858               break;
5859               }
5860             GETCHARLEN(c, eptr, len);
5861             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
5862             eptr+= len;
5863             }
5864           break;
5865
5866           case OP_WHITESPACE:
5867           for (i = min; i < max; i++)
5868             {
5869             int len = 1;
5870             if (eptr >= md->end_subject)
5871               {
5872               SCHECK_PARTIAL();
5873               break;
5874               }
5875             GETCHARLEN(c, eptr, len);
5876             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
5877             eptr+= len;
5878             }
5879           break;
5880
5881           case OP_NOT_WORDCHAR:
5882           for (i = min; i < max; i++)
5883             {
5884             int len = 1;
5885             if (eptr >= md->end_subject)
5886               {
5887               SCHECK_PARTIAL();
5888               break;
5889               }
5890             GETCHARLEN(c, eptr, len);
5891             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
5892             eptr+= len;
5893             }
5894           break;
5895
5896           case OP_WORDCHAR:
5897           for (i = min; i < max; i++)
5898             {
5899             int len = 1;
5900             if (eptr >= md->end_subject)
5901               {
5902               SCHECK_PARTIAL();
5903               break;
5904               }
5905             GETCHARLEN(c, eptr, len);
5906             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
5907             eptr+= len;
5908             }
5909           break;
5910
5911           default:
5912           RRETURN(PCRE_ERROR_INTERNAL);
5913           }
5914
5915         /* eptr is now past the end of the maximum run. If possessive, we are
5916         done (no backing up). Otherwise, match at this position; anything other
5917         than no match is immediately returned. For nomatch, back up one
5918         character, unless we are matching \R and the last thing matched was
5919         \r\n, in which case, back up two bytes. */
5920
5921         if (possessive) continue;
5922         for(;;)
5923           {
5924           RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
5925           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5926           if (eptr-- == pp) break;        /* Stop if tried at original pos */
5927           BACKCHAR(eptr);
5928           if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
5929               eptr[-1] == '\r') eptr--;
5930           }
5931         }
5932       else
5933 #endif  /* SUPPORT_UTF */
5934       /* Not UTF mode */
5935         {
5936         switch(ctype)
5937           {
5938           case OP_ANY:
5939           for (i = min; i < max; i++)
5940             {
5941             if (eptr >= md->end_subject)
5942               {
5943               SCHECK_PARTIAL();
5944               break;
5945               }
5946             if (IS_NEWLINE(eptr)) break;
5947             if (md->partial != 0 &&    /* Take care with CRLF partial */
5948                 eptr + 1 >= md->end_subject &&
5949                 NLBLOCK->nltype == NLTYPE_FIXED &&
5950                 NLBLOCK->nllen == 2 &&
5951                 *eptr == NLBLOCK->nl[0])
5952               {
5953               md->hitend = TRUE;
5954               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5955               }
5956             eptr++;
5957             }
5958           break;
5959
5960           case OP_ALLANY:
5961           case OP_ANYBYTE:
5962           c = max - min;
5963           if (c > (unsigned int)(md->end_subject - eptr))
5964             {
5965             eptr = md->end_subject;
5966             SCHECK_PARTIAL();
5967             }
5968           else eptr += c;
5969           break;
5970
5971           case OP_ANYNL:
5972           for (i = min; i < max; i++)
5973             {
5974             if (eptr >= md->end_subject)
5975               {
5976               SCHECK_PARTIAL();
5977               break;
5978               }
5979             c = *eptr;
5980             if (c == 0x000d)
5981               {
5982               if (++eptr >= md->end_subject) break;
5983               if (*eptr == 0x000a) eptr++;
5984               }
5985             else
5986               {
5987               if (c != 0x000a && (md->bsr_anycrlf ||
5988                 (c != 0x000b && c != 0x000c && c != 0x0085
5989 #ifdef COMPILE_PCRE16
5990                 && c != 0x2028 && c != 0x2029
5991 #endif
5992                 ))) break;
5993               eptr++;
5994               }
5995             }
5996           break;
5997
5998           case OP_NOT_HSPACE:
5999           for (i = min; i < max; i++)
6000             {
6001             if (eptr >= md->end_subject)
6002               {
6003               SCHECK_PARTIAL();
6004               break;
6005               }
6006             c = *eptr;
6007             if (c == 0x09 || c == 0x20 || c == 0xa0
6008 #ifdef COMPILE_PCRE16
6009               || c == 0x1680 || c == 0x180e || (c >= 0x2000 && c <= 0x200A)
6010               || c == 0x202f || c == 0x205f || c == 0x3000
6011 #endif
6012               ) break;
6013             eptr++;
6014             }
6015           break;
6016
6017           case OP_HSPACE:
6018           for (i = min; i < max; i++)
6019             {
6020             if (eptr >= md->end_subject)
6021               {
6022               SCHECK_PARTIAL();
6023               break;
6024               }
6025             c = *eptr;
6026             if (c != 0x09 && c != 0x20 && c != 0xa0
6027 #ifdef COMPILE_PCRE16
6028               && c != 0x1680 && c != 0x180e && (c < 0x2000 || c > 0x200A)
6029               && c != 0x202f && c != 0x205f && c != 0x3000
6030 #endif
6031               ) break;
6032             eptr++;
6033             }
6034           break;
6035
6036           case OP_NOT_VSPACE:
6037           for (i = min; i < max; i++)
6038             {
6039             if (eptr >= md->end_subject)
6040               {
6041               SCHECK_PARTIAL();
6042               break;
6043               }
6044             c = *eptr;
6045             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85
6046 #ifdef COMPILE_PCRE16
6047               || c == 0x2028 || c == 0x2029
6048 #endif
6049               ) break;
6050             eptr++;
6051             }
6052           break;
6053
6054           case OP_VSPACE:
6055           for (i = min; i < max; i++)
6056             {
6057             if (eptr >= md->end_subject)
6058               {
6059               SCHECK_PARTIAL();
6060               break;
6061               }
6062             c = *eptr;
6063             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85
6064 #ifdef COMPILE_PCRE16
6065               && c != 0x2028 && c != 0x2029
6066 #endif
6067               ) break;
6068             eptr++;
6069             }
6070           break;
6071
6072           case OP_NOT_DIGIT:
6073           for (i = min; i < max; i++)
6074             {
6075             if (eptr >= md->end_subject)
6076               {
6077               SCHECK_PARTIAL();
6078               break;
6079               }
6080             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
6081             eptr++;
6082             }
6083           break;
6084
6085           case OP_DIGIT:
6086           for (i = min; i < max; i++)
6087             {
6088             if (eptr >= md->end_subject)
6089               {
6090               SCHECK_PARTIAL();
6091               break;
6092               }
6093             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
6094             eptr++;
6095             }
6096           break;
6097
6098           case OP_NOT_WHITESPACE:
6099           for (i = min; i < max; i++)
6100             {
6101             if (eptr >= md->end_subject)
6102               {
6103               SCHECK_PARTIAL();
6104               break;
6105               }
6106             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
6107             eptr++;
6108             }
6109           break;
6110
6111           case OP_WHITESPACE:
6112           for (i = min; i < max; i++)
6113             {
6114             if (eptr >= md->end_subject)
6115               {
6116               SCHECK_PARTIAL();
6117               break;
6118               }
6119             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
6120             eptr++;
6121             }
6122           break;
6123
6124           case OP_NOT_WORDCHAR:
6125           for (i = min; i < max; i++)
6126             {
6127             if (eptr >= md->end_subject)
6128               {
6129               SCHECK_PARTIAL();
6130               break;
6131               }
6132             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
6133             eptr++;
6134             }
6135           break;
6136
6137           case OP_WORDCHAR:
6138           for (i = min; i < max; i++)
6139             {
6140             if (eptr >= md->end_subject)
6141               {
6142               SCHECK_PARTIAL();
6143               break;
6144               }
6145             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
6146             eptr++;
6147             }
6148           break;
6149
6150           default:
6151           RRETURN(PCRE_ERROR_INTERNAL);
6152           }
6153
6154         /* eptr is now past the end of the maximum run. If possessive, we are
6155         done (no backing up). Otherwise, match at this position; anything other
6156         than no match is immediately returned. For nomatch, back up one
6157         character (byte), unless we are matching \R and the last thing matched
6158         was \r\n, in which case, back up two bytes. */
6159
6160         if (possessive) continue;
6161         while (eptr >= pp)
6162           {
6163           RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
6164           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6165           eptr--;
6166           if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
6167               eptr[-1] == '\r') eptr--;
6168           }
6169         }
6170
6171       /* Get here if we can't make it match with any permitted repetitions */
6172
6173       RRETURN(MATCH_NOMATCH);
6174       }
6175     /* Control never gets here */
6176
6177     /* There's been some horrible disaster. Arrival here can only mean there is
6178     something seriously wrong in the code above or the OP_xxx definitions. */
6179
6180     default:
6181     DPRINTF(("Unknown opcode %d\n", *ecode));
6182     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
6183     }
6184
6185   /* Do not stick any code in here without much thought; it is assumed
6186   that "continue" in the code above comes out to here to repeat the main
6187   loop. */
6188
6189   }             /* End of main loop */
6190 /* Control never reaches here */
6191
6192
6193 /* When compiling to use the heap rather than the stack for recursive calls to
6194 match(), the RRETURN() macro jumps here. The number that is saved in
6195 frame->Xwhere indicates which label we actually want to return to. */
6196
6197 #ifdef NO_RECURSE
6198 #define LBL(val) case val: goto L_RM##val;
6199 HEAP_RETURN:
6200 switch (frame->Xwhere)
6201   {
6202   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6203   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
6204   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
6205   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
6206   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
6207   LBL(65) LBL(66)
6208 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6209   LBL(21)
6210 #endif
6211 #ifdef SUPPORT_UTF
6212   LBL(16) LBL(18) LBL(20)
6213   LBL(22) LBL(23) LBL(28) LBL(30)
6214   LBL(32) LBL(34) LBL(42) LBL(46)
6215 #ifdef SUPPORT_UCP
6216   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6217   LBL(59) LBL(60) LBL(61) LBL(62)
6218 #endif  /* SUPPORT_UCP */
6219 #endif  /* SUPPORT_UTF */
6220   default:
6221   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
6222
6223 printf("+++jump error in pcre match: label %d non-existent\n", frame->Xwhere);
6224
6225   return PCRE_ERROR_INTERNAL;
6226   }
6227 #undef LBL
6228 #endif  /* NO_RECURSE */
6229 }
6230
6231
6232 /***************************************************************************
6233 ****************************************************************************
6234                    RECURSION IN THE match() FUNCTION
6235
6236 Undefine all the macros that were defined above to handle this. */
6237
6238 #ifdef NO_RECURSE
6239 #undef eptr
6240 #undef ecode
6241 #undef mstart
6242 #undef offset_top
6243 #undef eptrb
6244 #undef flags
6245
6246 #undef callpat
6247 #undef charptr
6248 #undef data
6249 #undef next
6250 #undef pp
6251 #undef prev
6252 #undef saved_eptr
6253
6254 #undef new_recursive
6255
6256 #undef cur_is_word
6257 #undef condition
6258 #undef prev_is_word
6259
6260 #undef ctype
6261 #undef length
6262 #undef max
6263 #undef min
6264 #undef number
6265 #undef offset
6266 #undef op
6267 #undef save_capture_last
6268 #undef save_offset1
6269 #undef save_offset2
6270 #undef save_offset3
6271 #undef stacksave
6272
6273 #undef newptrb
6274
6275 #endif
6276
6277 /* These two are defined as macros in both cases */
6278
6279 #undef fc
6280 #undef fi
6281
6282 /***************************************************************************
6283 ***************************************************************************/
6284
6285
6286 #ifdef NO_RECURSE
6287 /*************************************************
6288 *          Release allocated heap frames         *
6289 *************************************************/
6290
6291 /* This function releases all the allocated frames. The base frame is on the
6292 machine stack, and so must not be freed.
6293
6294 Argument: the address of the base frame
6295 Returns:  nothing
6296 */
6297
6298 static void
6299 release_match_heapframes (heapframe *frame_base)
6300 {
6301 heapframe *nextframe = frame_base->Xnextframe;
6302 while (nextframe != NULL)
6303   {
6304   heapframe *oldframe = nextframe;
6305   nextframe = nextframe->Xnextframe;
6306   (PUBL(stack_free))(oldframe);
6307   }
6308 }
6309 #endif
6310
6311
6312 /*************************************************
6313 *         Execute a Regular Expression           *
6314 *************************************************/
6315
6316 /* This function applies a compiled re to a subject string and picks out
6317 portions of the string if it matches. Two elements in the vector are set for
6318 each substring: the offsets to the start and end of the substring.
6319
6320 Arguments:
6321   argument_re     points to the compiled expression
6322   extra_data      points to extra data or is NULL
6323   subject         points to the subject string
6324   length          length of subject string (may contain binary zeros)
6325   start_offset    where to start in the subject string
6326   options         option bits
6327   offsets         points to a vector of ints to be filled in with offsets
6328   offsetcount     the number of elements in the vector
6329
6330 Returns:          > 0 => success; value is the number of elements filled in
6331                   = 0 => success, but offsets is not big enough
6332                    -1 => failed to match
6333                  < -1 => some kind of unexpected problem
6334 */
6335
6336 #ifdef COMPILE_PCRE8
6337 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6338 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
6339   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
6340   int offsetcount)
6341 #else
6342 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6343 pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
6344   PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
6345   int offsetcount)
6346 #endif
6347 {
6348 int rc, ocount, arg_offset_max;
6349 int newline;
6350 BOOL using_temporary_offsets = FALSE;
6351 BOOL anchored;
6352 BOOL startline;
6353 BOOL firstline;
6354 BOOL utf;
6355 BOOL has_first_char = FALSE;
6356 BOOL has_req_char = FALSE;
6357 pcre_uchar first_char = 0;
6358 pcre_uchar first_char2 = 0;
6359 pcre_uchar req_char = 0;
6360 pcre_uchar req_char2 = 0;
6361 match_data match_block;
6362 match_data *md = &match_block;
6363 const pcre_uint8 *tables;
6364 const pcre_uint8 *start_bits = NULL;
6365 PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6366 PCRE_PUCHAR end_subject;
6367 PCRE_PUCHAR start_partial = NULL;
6368 PCRE_PUCHAR req_char_ptr = start_match - 1;
6369
6370 const pcre_study_data *study;
6371 const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
6372
6373 #ifdef NO_RECURSE
6374 heapframe frame_zero;
6375 frame_zero.Xprevframe = NULL;            /* Marks the top level */
6376 frame_zero.Xnextframe = NULL;            /* None are allocated yet */
6377 md->match_frames_base = &frame_zero;
6378 #endif
6379
6380 /* Check for the special magic call that measures the size of the stack used
6381 per recursive call of match(). Without the funny casting for sizeof, a Windows
6382 compiler gave this error: "unary minus operator applied to unsigned type,
6383 result still unsigned". Hopefully the cast fixes that. */
6384
6385 if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
6386     start_offset == -999)
6387 #ifdef NO_RECURSE
6388   return -((int)sizeof(heapframe));
6389 #else
6390   return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
6391 #endif
6392
6393 /* Plausibility checks */
6394
6395 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
6396 if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6397   return PCRE_ERROR_NULL;
6398 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6399 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6400
6401 /* Check that the first field in the block is the magic number. If it is not,
6402 return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6403 REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6404 means that the pattern is likely compiled with different endianness. */
6405
6406 if (re->magic_number != MAGIC_NUMBER)
6407   return re->magic_number == REVERSED_MAGIC_NUMBER?
6408     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6409 if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
6410
6411 /* These two settings are used in the code for checking a UTF-8 string that
6412 follows immediately afterwards. Other values in the md block are used only
6413 during "normal" pcre_exec() processing, not when the JIT support is in use,
6414 so they are set up later. */
6415
6416 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
6417 utf = md->utf = (re->options & PCRE_UTF8) != 0;
6418 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
6419               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
6420
6421 /* Check a UTF-8 string if required. Pass back the character offset and error
6422 code for an invalid string if a results vector is available. */
6423
6424 #ifdef SUPPORT_UTF
6425 if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
6426   {
6427   int erroroffset;
6428   int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
6429   if (errorcode != 0)
6430     {
6431     if (offsetcount >= 2)
6432       {
6433       offsets[0] = erroroffset;
6434       offsets[1] = errorcode;
6435       }
6436 #ifdef COMPILE_PCRE16
6437     return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
6438       PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
6439 #else
6440     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6441       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6442 #endif
6443     }
6444
6445   /* Check that a start_offset points to the start of a UTF character. */
6446   if (start_offset > 0 && start_offset < length &&
6447       NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
6448     return PCRE_ERROR_BADUTF8_OFFSET;
6449   }
6450 #endif
6451
6452 /* If the pattern was successfully studied with JIT support, run the JIT
6453 executable instead of the rest of this function. Most options must be set at
6454 compile time for the JIT code to be usable. Fallback to the normal code path if
6455 an unsupported flag is set. */
6456
6457 #ifdef SUPPORT_JIT
6458 if (extra_data != NULL
6459     && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
6460                              PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6461     && extra_data->executable_jit != NULL
6462     && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
6463                     PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART |
6464                     PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD)) == 0)
6465   {
6466   rc = PRIV(jit_exec)(re, extra_data, (const pcre_uchar *)subject, length,
6467        start_offset, options, offsets, offsetcount);
6468
6469   /* PCRE_ERROR_NULL means that the selected normal or partial matching
6470   mode is not compiled. In this case we simply fallback to interpreter. */
6471
6472   if (rc != PCRE_ERROR_NULL) return rc;
6473   }
6474 #endif
6475
6476 /* Carry on with non-JIT matching. This information is for finding all the
6477 numbers associated with a given name, for condition testing. */
6478
6479 md->name_table = (pcre_uchar *)re + re->name_table_offset;
6480 md->name_count = re->name_count;
6481 md->name_entry_size = re->name_entry_size;
6482
6483 /* Fish out the optional data from the extra_data structure, first setting
6484 the default values. */
6485
6486 study = NULL;
6487 md->match_limit = MATCH_LIMIT;
6488 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
6489 md->callout_data = NULL;
6490
6491 /* The table pointer is always in native byte order. */
6492
6493 tables = re->tables;
6494
6495 if (extra_data != NULL)
6496   {
6497   unsigned int flags = extra_data->flags;
6498   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
6499     study = (const pcre_study_data *)extra_data->study_data;
6500   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
6501     md->match_limit = extra_data->match_limit;
6502   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
6503     md->match_limit_recursion = extra_data->match_limit_recursion;
6504   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
6505     md->callout_data = extra_data->callout_data;
6506   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
6507   }
6508
6509 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
6510 is a feature that makes it possible to save compiled regex and re-use them
6511 in other programs later. */
6512
6513 if (tables == NULL) tables = PRIV(default_tables);
6514
6515 /* Set up other data */
6516
6517 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
6518 startline = (re->flags & PCRE_STARTLINE) != 0;
6519 firstline = (re->options & PCRE_FIRSTLINE) != 0;
6520
6521 /* The code starts after the real_pcre block and the capture name table. */
6522
6523 md->start_code = (const pcre_uchar *)re + re->name_table_offset +
6524   re->name_count * re->name_entry_size;
6525
6526 md->start_subject = (PCRE_PUCHAR)subject;
6527 md->start_offset = start_offset;
6528 md->end_subject = md->start_subject + length;
6529 end_subject = md->end_subject;
6530
6531 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6532 md->use_ucp = (re->options & PCRE_UCP) != 0;
6533 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6534 md->ignore_skip_arg = FALSE;
6535
6536 /* Some options are unpacked into BOOL variables in the hope that testing
6537 them will be faster than individual option bits. */
6538
6539 md->notbol = (options & PCRE_NOTBOL) != 0;
6540 md->noteol = (options & PCRE_NOTEOL) != 0;
6541 md->notempty = (options & PCRE_NOTEMPTY) != 0;
6542 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
6543
6544 md->hitend = FALSE;
6545 md->mark = md->nomatch_mark = NULL;     /* In case never set */
6546
6547 md->recursive = NULL;                   /* No recursion at top level */
6548 md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
6549
6550 md->lcc = tables + lcc_offset;
6551 md->fcc = tables + fcc_offset;
6552 md->ctypes = tables + ctypes_offset;
6553
6554 /* Handle different \R options. */
6555
6556 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
6557   {
6558   case 0:
6559   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
6560     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
6561   else
6562 #ifdef BSR_ANYCRLF
6563   md->bsr_anycrlf = TRUE;
6564 #else
6565   md->bsr_anycrlf = FALSE;
6566 #endif
6567   break;
6568
6569   case PCRE_BSR_ANYCRLF:
6570   md->bsr_anycrlf = TRUE;
6571   break;
6572
6573   case PCRE_BSR_UNICODE:
6574   md->bsr_anycrlf = FALSE;
6575   break;
6576
6577   default: return PCRE_ERROR_BADNEWLINE;
6578   }
6579
6580 /* Handle different types of newline. The three bits give eight cases. If
6581 nothing is set at run time, whatever was used at compile time applies. */
6582
6583 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
6584         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
6585   {
6586   case 0: newline = NEWLINE; break;   /* Compile-time default */
6587   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
6588   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
6589   case PCRE_NEWLINE_CR+
6590        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
6591   case PCRE_NEWLINE_ANY: newline = -1; break;
6592   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
6593   default: return PCRE_ERROR_BADNEWLINE;
6594   }
6595
6596 if (newline == -2)
6597   {
6598   md->nltype = NLTYPE_ANYCRLF;
6599   }
6600 else if (newline < 0)
6601   {
6602   md->nltype = NLTYPE_ANY;
6603   }
6604 else
6605   {
6606   md->nltype = NLTYPE_FIXED;
6607   if (newline > 255)
6608     {
6609     md->nllen = 2;
6610     md->nl[0] = (newline >> 8) & 255;
6611     md->nl[1] = newline & 255;
6612     }
6613   else
6614     {
6615     md->nllen = 1;
6616     md->nl[0] = newline;
6617     }
6618   }
6619
6620 /* Partial matching was originally supported only for a restricted set of
6621 regexes; from release 8.00 there are no restrictions, but the bits are still
6622 defined (though never set). So there's no harm in leaving this code. */
6623
6624 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
6625   return PCRE_ERROR_BADPARTIAL;
6626
6627 /* If the expression has got more back references than the offsets supplied can
6628 hold, we get a temporary chunk of working store to use during the matching.
6629 Otherwise, we can use the vector supplied, rounding down its size to a multiple
6630 of 3. */
6631
6632 ocount = offsetcount - (offsetcount % 3);
6633 arg_offset_max = (2*ocount)/3;
6634
6635 if (re->top_backref > 0 && re->top_backref >= ocount/3)
6636   {
6637   ocount = re->top_backref * 3 + 3;
6638   md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
6639   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
6640   using_temporary_offsets = TRUE;
6641   DPRINTF(("Got memory to hold back references\n"));
6642   }
6643 else md->offset_vector = offsets;
6644
6645 md->offset_end = ocount;
6646 md->offset_max = (2*ocount)/3;
6647 md->offset_overflow = FALSE;
6648 md->capture_last = -1;
6649
6650 /* Reset the working variable associated with each extraction. These should
6651 never be used unless previously set, but they get saved and restored, and so we
6652 initialize them to avoid reading uninitialized locations. Also, unset the
6653 offsets for the matched string. This is really just for tidiness with callouts,
6654 in case they inspect these fields. */
6655
6656 if (md->offset_vector != NULL)
6657   {
6658   int *iptr = md->offset_vector + ocount;
6659   int *iend = iptr - re->top_bracket;
6660   if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
6661   while (--iptr >= iend) *iptr = -1;
6662   md->offset_vector[0] = md->offset_vector[1] = -1;
6663   }
6664
6665 /* Set up the first character to match, if available. The first_char value is
6666 never set for an anchored regular expression, but the anchoring may be forced
6667 at run time, so we have to test for anchoring. The first char may be unset for
6668 an unanchored pattern, of course. If there's no first char and the pattern was
6669 studied, there may be a bitmap of possible first characters. */
6670
6671 if (!anchored)
6672   {
6673   if ((re->flags & PCRE_FIRSTSET) != 0)
6674     {
6675     has_first_char = TRUE;
6676     first_char = first_char2 = (pcre_uchar)(re->first_char);
6677     if ((re->flags & PCRE_FCH_CASELESS) != 0)
6678       {
6679       first_char2 = TABLE_GET(first_char, md->fcc, first_char);
6680 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6681       if (utf && first_char > 127)
6682         first_char2 = UCD_OTHERCASE(first_char);
6683 #endif
6684       }
6685     }
6686   else
6687     if (!startline && study != NULL &&
6688       (study->flags & PCRE_STUDY_MAPPED) != 0)
6689         start_bits = study->start_bits;
6690   }
6691
6692 /* For anchored or unanchored matches, there may be a "last known required
6693 character" set. */
6694
6695 if ((re->flags & PCRE_REQCHSET) != 0)
6696   {
6697   has_req_char = TRUE;
6698   req_char = req_char2 = (pcre_uchar)(re->req_char);
6699   if ((re->flags & PCRE_RCH_CASELESS) != 0)
6700     {
6701     req_char2 = TABLE_GET(req_char, md->fcc, req_char);
6702 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6703     if (utf && req_char > 127)
6704       req_char2 = UCD_OTHERCASE(req_char);
6705 #endif
6706     }
6707   }
6708
6709
6710 /* ==========================================================================*/
6711
6712 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
6713 the loop runs just once. */
6714
6715 for(;;)
6716   {
6717   PCRE_PUCHAR save_end_subject = end_subject;
6718   PCRE_PUCHAR new_start_match;
6719
6720   /* If firstline is TRUE, the start of the match is constrained to the first
6721   line of a multiline string. That is, the match must be before or at the first
6722   newline. Implement this by temporarily adjusting end_subject so that we stop
6723   scanning at a newline. If the match fails at the newline, later code breaks
6724   this loop. */
6725
6726   if (firstline)
6727     {
6728     PCRE_PUCHAR t = start_match;
6729 #ifdef SUPPORT_UTF
6730     if (utf)
6731       {
6732       while (t < md->end_subject && !IS_NEWLINE(t))
6733         {
6734         t++;
6735         ACROSSCHAR(t < end_subject, *t, t++);
6736         }
6737       }
6738     else
6739 #endif
6740     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
6741     end_subject = t;
6742     }
6743
6744   /* There are some optimizations that avoid running the match if a known
6745   starting point is not found, or if a known later character is not present.
6746   However, there is an option that disables these, for testing and for ensuring
6747   that all callouts do actually occur. The option can be set in the regex by
6748   (*NO_START_OPT) or passed in match-time options. */
6749
6750   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
6751     {
6752     /* Advance to a unique first char if there is one. */
6753
6754     if (has_first_char)
6755       {
6756       if (first_char != first_char2)
6757         while (start_match < end_subject &&
6758             *start_match != first_char && *start_match != first_char2)
6759           start_match++;
6760       else
6761         while (start_match < end_subject && *start_match != first_char)
6762           start_match++;
6763       }
6764
6765     /* Or to just after a linebreak for a multiline match */
6766
6767     else if (startline)
6768       {
6769       if (start_match > md->start_subject + start_offset)
6770         {
6771 #ifdef SUPPORT_UTF
6772         if (utf)
6773           {
6774           while (start_match < end_subject && !WAS_NEWLINE(start_match))
6775             {
6776             start_match++;
6777             ACROSSCHAR(start_match < end_subject, *start_match,
6778               start_match++);
6779             }
6780           }
6781         else
6782 #endif
6783         while (start_match < end_subject && !WAS_NEWLINE(start_match))
6784           start_match++;
6785
6786         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
6787         and we are now at a LF, advance the match position by one more character.
6788         */
6789
6790         if (start_match[-1] == CHAR_CR &&
6791              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
6792              start_match < end_subject &&
6793              *start_match == CHAR_NL)
6794           start_match++;
6795         }
6796       }
6797
6798     /* Or to a non-unique first byte after study */
6799
6800     else if (start_bits != NULL)
6801       {
6802       while (start_match < end_subject)
6803         {
6804         unsigned int c = *start_match;
6805 #ifndef COMPILE_PCRE8
6806         if (c > 255) c = 255;
6807 #endif
6808         if ((start_bits[c/8] & (1 << (c&7))) == 0)
6809           {
6810           start_match++;
6811 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6812           /* In non 8-bit mode, the iteration will stop for
6813           characters > 255 at the beginning or not stop at all. */
6814           if (utf)
6815             ACROSSCHAR(start_match < end_subject, *start_match,
6816               start_match++);
6817 #endif
6818           }
6819         else break;
6820         }
6821       }
6822     }   /* Starting optimizations */
6823
6824   /* Restore fudged end_subject */
6825
6826   end_subject = save_end_subject;
6827
6828   /* The following two optimizations are disabled for partial matching or if
6829   disabling is explicitly requested. */
6830
6831   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
6832     {
6833     /* If the pattern was studied, a minimum subject length may be set. This is
6834     a lower bound; no actual string of that length may actually match the
6835     pattern. Although the value is, strictly, in characters, we treat it as
6836     bytes to avoid spending too much time in this optimization. */
6837
6838     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
6839         (pcre_uint32)(end_subject - start_match) < study->minlength)
6840       {
6841       rc = MATCH_NOMATCH;
6842       break;
6843       }
6844
6845     /* If req_char is set, we know that that character must appear in the
6846     subject for the match to succeed. If the first character is set, req_char
6847     must be later in the subject; otherwise the test starts at the match point.
6848     This optimization can save a huge amount of backtracking in patterns with
6849     nested unlimited repeats that aren't going to match. Writing separate code
6850     for cased/caseless versions makes it go faster, as does using an
6851     autoincrement and backing off on a match.
6852
6853     HOWEVER: when the subject string is very, very long, searching to its end
6854     can take a long time, and give bad performance on quite ordinary patterns.
6855     This showed up when somebody was matching something like /^\d+C/ on a
6856     32-megabyte string... so we don't do this when the string is sufficiently
6857     long. */
6858
6859     if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
6860       {
6861       PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
6862
6863       /* We don't need to repeat the search if we haven't yet reached the
6864       place we found it at last time. */
6865
6866       if (p > req_char_ptr)
6867         {
6868         if (req_char != req_char2)
6869           {
6870           while (p < end_subject)
6871             {
6872             int pp = *p++;
6873             if (pp == req_char || pp == req_char2) { p--; break; }
6874             }
6875           }
6876         else
6877           {
6878           while (p < end_subject)
6879             {
6880             if (*p++ == req_char) { p--; break; }
6881             }
6882           }
6883
6884         /* If we can't find the required character, break the matching loop,
6885         forcing a match failure. */
6886
6887         if (p >= end_subject)
6888           {
6889           rc = MATCH_NOMATCH;
6890           break;
6891           }
6892
6893         /* If we have found the required character, save the point where we
6894         found it, so that we don't search again next time round the loop if
6895         the start hasn't passed this character yet. */
6896
6897         req_char_ptr = p;
6898         }
6899       }
6900     }
6901
6902 #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
6903   printf(">>>> Match against: ");
6904   pchars(start_match, end_subject - start_match, TRUE, md);
6905   printf("\n");
6906 #endif
6907
6908   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6909   first starting point for which a partial match was found. */
6910
6911   md->start_match_ptr = start_match;
6912   md->start_used_ptr = start_match;
6913   md->match_call_count = 0;
6914   md->match_function_type = 0;
6915   md->end_offset_top = 0;
6916   rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
6917   if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
6918
6919   switch(rc)
6920     {
6921     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6922     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
6923     entirely. The only way we can do that is to re-do the match at the same
6924     point, with a flag to force SKIP with an argument to be ignored. Just
6925     treating this case as NOMATCH does not work because it does not check other
6926     alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
6927
6928     case MATCH_SKIP_ARG:
6929     new_start_match = start_match;
6930     md->ignore_skip_arg = TRUE;
6931     break;
6932
6933     /* SKIP passes back the next starting point explicitly, but if it is the
6934     same as the match we have just done, treat it as NOMATCH. */
6935
6936     case MATCH_SKIP:
6937     if (md->start_match_ptr != start_match)
6938       {
6939       new_start_match = md->start_match_ptr;
6940       break;
6941       }
6942     /* Fall through */
6943
6944     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6945     exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */
6946
6947     case MATCH_NOMATCH:
6948     case MATCH_PRUNE:
6949     case MATCH_THEN:
6950     md->ignore_skip_arg = FALSE;
6951     new_start_match = start_match + 1;
6952 #ifdef SUPPORT_UTF
6953     if (utf)
6954       ACROSSCHAR(new_start_match < end_subject, *new_start_match,
6955         new_start_match++);
6956 #endif
6957     break;
6958
6959     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6960
6961     case MATCH_COMMIT:
6962     rc = MATCH_NOMATCH;
6963     goto ENDLOOP;
6964
6965     /* Any other return is either a match, or some kind of error. */
6966
6967     default:
6968     goto ENDLOOP;
6969     }
6970
6971   /* Control reaches here for the various types of "no match at this point"
6972   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
6973
6974   rc = MATCH_NOMATCH;
6975
6976   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
6977   newline in the subject (though it may continue over the newline). Therefore,
6978   if we have just failed to match, starting at a newline, do not continue. */
6979
6980   if (firstline && IS_NEWLINE(start_match)) break;
6981
6982   /* Advance to new matching position */
6983
6984   start_match = new_start_match;
6985
6986   /* Break the loop if the pattern is anchored or if we have passed the end of
6987   the subject. */
6988
6989   if (anchored || start_match > end_subject) break;
6990
6991   /* If we have just passed a CR and we are now at a LF, and the pattern does
6992   not contain any explicit matches for \r or \n, and the newline option is CRLF
6993   or ANY or ANYCRLF, advance the match position by one more character. In
6994   normal matching start_match will aways be greater than the first position at
6995   this stage, but a failed *SKIP can cause a return at the same point, which is
6996   why the first test exists. */
6997
6998   if (start_match > (PCRE_PUCHAR)subject + start_offset &&
6999       start_match[-1] == CHAR_CR &&
7000       start_match < end_subject &&
7001       *start_match == CHAR_NL &&
7002       (re->flags & PCRE_HASCRORLF) == 0 &&
7003         (md->nltype == NLTYPE_ANY ||
7004          md->nltype == NLTYPE_ANYCRLF ||
7005          md->nllen == 2))
7006     start_match++;
7007
7008   md->mark = NULL;   /* Reset for start of next match attempt */
7009   }                  /* End of for(;;) "bumpalong" loop */
7010
7011 /* ==========================================================================*/
7012
7013 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
7014 conditions is true:
7015
7016 (1) The pattern is anchored or the match was failed by (*COMMIT);
7017
7018 (2) We are past the end of the subject;
7019
7020 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
7021     this option requests that a match occur at or before the first newline in
7022     the subject.
7023
7024 When we have a match and the offset vector is big enough to deal with any
7025 backreferences, captured substring offsets will already be set up. In the case
7026 where we had to get some local store to hold offsets for backreference
7027 processing, copy those that we can. In this case there need not be overflow if
7028 certain parts of the pattern were not used, even though there are more
7029 capturing parentheses than vector slots. */
7030
7031 ENDLOOP:
7032
7033 if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
7034   {
7035   if (using_temporary_offsets)
7036     {
7037     if (arg_offset_max >= 4)
7038       {
7039       memcpy(offsets + 2, md->offset_vector + 2,
7040         (arg_offset_max - 2) * sizeof(int));
7041       DPRINTF(("Copied offsets from temporary memory\n"));
7042       }
7043     if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
7044     DPRINTF(("Freeing temporary memory\n"));
7045     (PUBL(free))(md->offset_vector);
7046     }
7047
7048   /* Set the return code to the number of captured strings, or 0 if there were
7049   too many to fit into the vector. */
7050
7051   rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
7052     0 : md->end_offset_top/2;
7053
7054   /* If there is space in the offset vector, set any unused pairs at the end of
7055   the pattern to -1 for backwards compatibility. It is documented that this
7056   happens. In earlier versions, the whole set of potential capturing offsets
7057   was set to -1 each time round the loop, but this is handled differently now.
7058   "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
7059   those at the end that need unsetting here. We can't just unset them all at
7060   the start of the whole thing because they may get set in one branch that is
7061   not the final matching branch. */
7062
7063   if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
7064     {
7065     int *iptr, *iend;
7066     int resetcount = 2 + re->top_bracket * 2;
7067     if (resetcount > offsetcount) resetcount = offsetcount;
7068     iptr = offsets + md->end_offset_top;
7069     iend = offsets + resetcount;
7070     while (iptr < iend) *iptr++ = -1;
7071     }
7072
7073   /* If there is space, set up the whole thing as substring 0. The value of
7074   md->start_match_ptr might be modified if \K was encountered on the success
7075   matching path. */
7076
7077   if (offsetcount < 2) rc = 0; else
7078     {
7079     offsets[0] = (int)(md->start_match_ptr - md->start_subject);
7080     offsets[1] = (int)(md->end_match_ptr - md->start_subject);
7081     }
7082
7083   /* Return MARK data if requested */
7084
7085   if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
7086     *(extra_data->mark) = (pcre_uchar *)md->mark;
7087   DPRINTF((">>>> returning %d\n", rc));
7088 #ifdef NO_RECURSE
7089   release_match_heapframes(&frame_zero);
7090 #endif
7091   return rc;
7092   }
7093
7094 /* Control gets here if there has been an error, or if the overall match
7095 attempt has failed at all permitted starting positions. */
7096
7097 if (using_temporary_offsets)
7098   {
7099   DPRINTF(("Freeing temporary memory\n"));
7100   (PUBL(free))(md->offset_vector);
7101   }
7102
7103 /* For anything other than nomatch or partial match, just return the code. */
7104
7105 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
7106   {
7107   DPRINTF((">>>> error: returning %d\n", rc));
7108 #ifdef NO_RECURSE
7109   release_match_heapframes(&frame_zero);
7110 #endif
7111   return rc;
7112   }
7113
7114 /* Handle partial matches - disable any mark data */
7115
7116 if (start_partial != NULL)
7117   {
7118   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
7119   md->mark = NULL;
7120   if (offsetcount > 1)
7121     {
7122     offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
7123     offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
7124     }
7125   rc = PCRE_ERROR_PARTIAL;
7126   }
7127
7128 /* This is the classic nomatch case */
7129
7130 else
7131   {
7132   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
7133   rc = PCRE_ERROR_NOMATCH;
7134   }
7135
7136 /* Return the MARK data if it has been requested. */
7137
7138 if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
7139   *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
7140 #ifdef NO_RECURSE
7141   release_match_heapframes(&frame_zero);
7142 #endif
7143 return rc;
7144 }
7145
7146 /* End of pcre_exec.c */