glib/pcre/pcre_exec.c

   1 /*************************************************
   2 *      Perl-Compatible Regular Expressions       *
   3 *************************************************/
   4
   5 /* PCRE is a library of functions to support regular expressions whose syntax
   6 and semantics are as close as possible to those of the Perl 5 language.
   7
   8                        Written by Philip Hazel
   9            Copyright (c) 1997-2010 University of Cambridge
  10
  11 -----------------------------------------------------------------------------
  12 Redistribution and use in source and binary forms, with or without
  13 modification, are permitted provided that the following conditions are met:
  14
  15     * Redistributions of source code must retain the above copyright notice,
  16       this list of conditions and the following disclaimer.
  17
  18     * Redistributions in binary form must reproduce the above copyright
  19       notice, this list of conditions and the following disclaimer in the
  20       documentation and/or other materials provided with the distribution.
  21
  22     * Neither the name of the University of Cambridge nor the names of its
  23       contributors may be used to endorse or promote products derived from
  24       this software without specific prior written permission.
  25
  26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  36 POSSIBILITY OF SUCH DAMAGE.
  37 -----------------------------------------------------------------------------
  38 */
  39
  40
  41 /* This module contains pcre_exec(), the externally visible function that does
  42 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
  43 possible. There are also some static supporting functions. */
  44
  45 #ifdef HAVE_CONFIG_H
  46 #include "config.h"
  47 #endif
  48
  49 #define NLBLOCK md             /* Block containing newline information */
  50 #define PSSTART start_subject  /* Field containing processed string start */
  51 #define PSEND   end_subject    /* Field containing processed string end */
  52
  53 #include "pcre_internal.h"
  54
  55 /* Undefine some potentially clashing cpp symbols */
  56
  57 #undef min
  58 #undef max
  59
  60 /* Flag bits for the match() function */
  61
  62 #define match_condassert     0x01  /* Called to check a condition assertion */
  63 #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
  64
  65 /* Non-error returns from the match() function. Error returns are externally
  66 defined PCRE_ERROR_xxx codes, which are all negative. */
  67
  68 #define MATCH_MATCH        1
  69 #define MATCH_NOMATCH      0
  70
  71 /* Special internal returns from the match() function. Make them sufficiently
  72 negative to avoid the external error codes. */
  73
  74 #define MATCH_ACCEPT       (-999)
  75 #define MATCH_COMMIT       (-998)
  76 #define MATCH_PRUNE        (-997)
  77 #define MATCH_SKIP         (-996)
  78 #define MATCH_SKIP_ARG     (-995)
  79 #define MATCH_THEN         (-994)
  80
  81 /* This is a convenience macro for code that occurs many times. */
  82
  83 #define MRRETURN(ra) \
  84   { \
  85   md->mark = markptr; \
  86   RRETURN(ra); \
  87   }
  88
  89 /* Maximum number of ints of offset to save on the stack for recursive calls.
  90 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
  91 because the offset vector is always a multiple of 3 long. */
  92
  93 #define REC_STACK_SAVE_MAX 30
  94
  95 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
  96
  97 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
  98 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
  99
 100
 101
 102 #ifdef PCRE_DEBUG
 103 /*************************************************
 104 *        Debugging function to print chars       *
 105 *************************************************/
 106
 107 /* Print a sequence of chars in printable format, stopping at the end of the
 108 subject if the requested.
 109
 110 Arguments:
 111   p           points to characters
 112   length      number to print
 113   is_subject  TRUE if printing from within md->start_subject
 114   md          pointer to matching data block, if is_subject is TRUE
 115
 116 Returns:     nothing
 117 */
 118
 119 static void
 120 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
 121 {
 122 unsigned int c;
 123 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
 124 while (length-- > 0)
 125   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
 126 }
 127 #endif
 128
 129
 130
 131 /*************************************************
 132 *          Match a back-reference                *
 133 *************************************************/
 134
 135 /* If a back reference hasn't been set, the length that is passed is greater
 136 than the number of characters left in the string, so the match fails.
 137
 138 Arguments:
 139   offset      index into the offset vector
 140   eptr        points into the subject
 141   length      length to be matched
 142   md          points to match data block
 143   ims         the ims flags
 144
 145 Returns:      TRUE if matched
 146 */
 147
 148 static BOOL
 149 match_ref(int offset, register USPTR eptr, int length, match_data *md,
 150   unsigned long int ims)
 151 {
 152 USPTR p = md->start_subject + md->offset_vector[offset];
 153
 154 #ifdef PCRE_DEBUG
 155 if (eptr >= md->end_subject)
 156   printf("matching subject <null>");
 157 else
 158   {
 159   printf("matching subject ");
 160   pchars(eptr, length, TRUE, md);
 161   }
 162 printf(" against backref ");
 163 pchars(p, length, FALSE, md);
 164 printf("\n");
 165 #endif
 166
 167 /* Always fail if not enough characters left */
 168
 169 if (length > md->end_subject - eptr) return FALSE;
 170
 171 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
 172 properly if Unicode properties are supported. Otherwise, we can check only
 173 ASCII characters. */
 174
 175 if ((ims & PCRE_CASELESS) != 0)
 176   {
 177 #ifdef SUPPORT_UTF8
 178 #ifdef SUPPORT_UCP
 179   if (md->utf8)
 180     {
 181     USPTR endptr = eptr + length;
 182     while (eptr < endptr)
 183       {
 184       int c, d;
 185       GETCHARINC(c, eptr);
 186       GETCHARINC(d, p);
 187       if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
 188       }
 189     }
 190   else
 191 #endif
 192 #endif
 193
 194   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
 195   is no UCP support. */
 196
 197   while (length-- > 0)
 198     { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
 199   }
 200
 201 /* In the caseful case, we can just compare the bytes, whether or not we
 202 are in UTF-8 mode. */
 203
 204 else
 205   { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
 206
 207 return TRUE;
 208 }
 209
 210
 211
 212 /***************************************************************************
 213 ****************************************************************************
 214                    RECURSION IN THE match() FUNCTION
 215
 216 The match() function is highly recursive, though not every recursive call
 217 increases the recursive depth. Nevertheless, some regular expressions can cause
 218 it to recurse to a great depth. I was writing for Unix, so I just let it call
 219 itself recursively. This uses the stack for saving everything that has to be
 220 saved for a recursive call. On Unix, the stack can be large, and this works
 221 fine.
 222
 223 It turns out that on some non-Unix-like systems there are problems with
 224 programs that use a lot of stack. (This despite the fact that every last chip
 225 has oodles of memory these days, and techniques for extending the stack have
 226 been known for decades.) So....
 227
 228 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
 229 calls by keeping local variables that need to be preserved in blocks of memory
 230 obtained from malloc() instead instead of on the stack. Macros are used to
 231 achieve this so that the actual code doesn't look very different to what it
 232 always used to.
 233
 234 The original heap-recursive code used longjmp(). However, it seems that this
 235 can be very slow on some operating systems. Following a suggestion from Stan
 236 Switzer, the use of longjmp() has been abolished, at the cost of having to
 237 provide a unique number for each call to RMATCH. There is no way of generating
 238 a sequence of numbers at compile time in C. I have given them names, to make
 239 them stand out more clearly.
 240
 241 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
 242 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
 243 tests. Furthermore, not using longjmp() means that local dynamic variables
 244 don't have indeterminate values; this has meant that the frame size can be
 245 reduced because the result can be "passed back" by straight setting of the
 246 variable instead of being passed in the frame.
 247 ****************************************************************************
 248 ***************************************************************************/
 249
 250 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
 251 below must be updated in sync.  */
 252
 253 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
 254        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
 255        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
 256        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
 257        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
 258        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
 259        RM61,  RM62 };
 260
 261 /* These versions of the macros use the stack, as normal. There are debugging
 262 versions and production versions. Note that the "rw" argument of RMATCH isn't
 263 actually used in this definition. */
 264
 265 #ifndef NO_RECURSE
 266 #define REGISTER register
 267
 268 #ifdef PCRE_DEBUG
 269 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
 270   { \
 271   printf("match() called in line %d\n", __LINE__); \
 272   rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
 273   printf("to line %d\n", __LINE__); \
 274   }
 275 #define RRETURN(ra) \
 276   { \
 277   printf("match() returned %d from line %d ", ra, __LINE__); \
 278   return ra; \
 279   }
 280 #else
 281 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
 282   rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
 283 #define RRETURN(ra) return ra
 284 #endif
 285
 286 #else
 287
 288
 289 /* These versions of the macros manage a private stack on the heap. Note that
 290 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
 291 argument of match(), which never changes. */
 292
 293 #define REGISTER
 294
 295 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
 296   {\
 297   heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
 298   if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
 299   frame->Xwhere = rw; \
 300   newframe->Xeptr = ra;\
 301   newframe->Xecode = rb;\
 302   newframe->Xmstart = mstart;\
 303   newframe->Xmarkptr = markptr;\
 304   newframe->Xoffset_top = rc;\
 305   newframe->Xims = re;\
 306   newframe->Xeptrb = rf;\
 307   newframe->Xflags = rg;\
 308   newframe->Xrdepth = frame->Xrdepth + 1;\
 309   newframe->Xprevframe = frame;\
 310   frame = newframe;\
 311   DPRINTF(("restarting from line %d\n", __LINE__));\
 312   goto HEAP_RECURSE;\
 313   L_##rw:\
 314   DPRINTF(("jumped back to line %d\n", __LINE__));\
 315   }
 316
 317 #define RRETURN(ra)\
 318   {\
 319   heapframe *oldframe = frame;\
 320   frame = oldframe->Xprevframe;\
 321   (pcre_stack_free)(oldframe);\
 322   if (frame != NULL)\
 323     {\
 324     rrc = ra;\
 325     goto HEAP_RETURN;\
 326     }\
 327   return ra;\
 328   }
 329
 330
 331 /* Structure for remembering the local variables in a private frame */
 332
 333 typedef struct heapframe {
 334   struct heapframe *Xprevframe;
 335
 336   /* Function arguments that may change */
 337
 338   USPTR Xeptr;
 339   const uschar *Xecode;
 340   USPTR Xmstart;
 341   USPTR Xmarkptr;
 342   int Xoffset_top;
 343   long int Xims;
 344   eptrblock *Xeptrb;
 345   int Xflags;
 346   unsigned int Xrdepth;
 347
 348   /* Function local variables */
 349
 350   USPTR Xcallpat;
 351 #ifdef SUPPORT_UTF8
 352   USPTR Xcharptr;
 353 #endif
 354   USPTR Xdata;
 355   USPTR Xnext;
 356   USPTR Xpp;
 357   USPTR Xprev;
 358   USPTR Xsaved_eptr;
 359
 360   recursion_info Xnew_recursive;
 361
 362   BOOL Xcur_is_word;
 363   BOOL Xcondition;
 364   BOOL Xprev_is_word;
 365
 366   unsigned long int Xoriginal_ims;
 367
 368 #ifdef SUPPORT_UCP
 369   int Xprop_type;
 370   int Xprop_value;
 371   int Xprop_fail_result;
 372   int Xprop_category;
 373   int Xprop_chartype;
 374   int Xprop_script;
 375   int Xoclength;
 376   uschar Xocchars[8];
 377 #endif
 378
 379   int Xcodelink;
 380   int Xctype;
 381   unsigned int Xfc;
 382   int Xfi;
 383   int Xlength;
 384   int Xmax;
 385   int Xmin;
 386   int Xnumber;
 387   int Xoffset;
 388   int Xop;
 389   int Xsave_capture_last;
 390   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
 391   int Xstacksave[REC_STACK_SAVE_MAX];
 392
 393   eptrblock Xnewptrb;
 394
 395   /* Where to jump back to */
 396
 397   int Xwhere;
 398
 399 } heapframe;
 400
 401 #endif
 402
 403
 404 /***************************************************************************
 405 ***************************************************************************/
 406
 407
 408
 409 /*************************************************
 410 *         Match from current position            *
 411 *************************************************/
 412
 413 /* This function is called recursively in many circumstances. Whenever it
 414 returns a negative (error) response, the outer incarnation must also return the
 415 same response. */
 416
 417 /* These macros pack up tests that are used for partial matching, and which
 418 appears several times in the code. We set the "hit end" flag if the pointer is
 419 at the end of the subject and also past the start of the subject (i.e.
 420 something has been matched). For hard partial matching, we then return
 421 immediately. The second one is used when we already know we are past the end of
 422 the subject. */
 423
 424 #define CHECK_PARTIAL()\
 425   if (md->partial != 0 && eptr >= md->end_subject && \
 426       eptr > md->start_used_ptr) \
 427     { \
 428     md->hitend = TRUE; \
 429     if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
 430     }
 431
 432 #define SCHECK_PARTIAL()\
 433   if (md->partial != 0 && eptr > md->start_used_ptr) \
 434     { \
 435     md->hitend = TRUE; \
 436     if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
 437     }
 438
 439
 440 /* Performance note: It might be tempting to extract commonly used fields from
 441 the md structure (e.g. utf8, end_subject) into individual variables to improve
 442 performance. Tests using gcc on a SPARC disproved this; in the first case, it
 443 made performance worse.
 444
 445 Arguments:
 446    eptr        pointer to current character in subject
 447    ecode       pointer to current position in compiled code
 448    mstart      pointer to the current match start position (can be modified
 449                  by encountering \K)
 450    markptr     pointer to the most recent MARK name, or NULL
 451    offset_top  current top pointer
 452    md          pointer to "static" info for the match
 453    ims         current /i, /m, and /s options
 454    eptrb       pointer to chain of blocks containing eptr at start of
 455                  brackets - for testing for empty matches
 456    flags       can contain
 457                  match_condassert - this is an assertion condition
 458                  match_cbegroup - this is the start of an unlimited repeat
 459                    group that can match an empty string
 460    rdepth      the recursion depth
 461
 462 Returns:       MATCH_MATCH if matched            )  these values are >= 0
 463                MATCH_NOMATCH if failed to match  )
 464                a negative MATCH_xxx value for PRUNE, SKIP, etc
 465                a negative PCRE_ERROR_xxx value if aborted by an error condition
 466                  (e.g. stopped by repeated call or recursion limit)
 467 */
 468
 469 static int
 470 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
 471   const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
 472   eptrblock *eptrb, int flags, unsigned int rdepth)
 473 {
 474 /* These variables do not need to be preserved over recursion in this function,
 475 so they can be ordinary variables in all cases. Mark some of them with
 476 "register" because they are used a lot in loops. */
 477
 478 register int  rrc;         /* Returns from recursive calls */
 479 register int  i;           /* Used for loops not involving calls to RMATCH() */
 480 register unsigned int c;   /* Character values not kept over RMATCH() calls */
 481 register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
 482
 483 BOOL minimize, possessive; /* Quantifier options */
 484 int condcode;
 485
 486 /* When recursion is not being used, all "local" variables that have to be
 487 preserved over calls to RMATCH() are part of a "frame" which is obtained from
 488 heap storage. Set up the top-level frame here; others are obtained from the
 489 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
 490
 491 #ifdef NO_RECURSE
 492 heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
 493 if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
 494 frame->Xprevframe = NULL;            /* Marks the top level */
 495
 496 /* Copy in the original argument variables */
 497
 498 frame->Xeptr = eptr;
 499 frame->Xecode = ecode;
 500 frame->Xmstart = mstart;
 501 frame->Xmarkptr = markptr;
 502 frame->Xoffset_top = offset_top;
 503 frame->Xims = ims;
 504 frame->Xeptrb = eptrb;
 505 frame->Xflags = flags;
 506 frame->Xrdepth = rdepth;
 507
 508 /* This is where control jumps back to to effect "recursion" */
 509
 510 HEAP_RECURSE:
 511
 512 /* Macros make the argument variables come from the current frame */
 513
 514 #define eptr               frame->Xeptr
 515 #define ecode              frame->Xecode
 516 #define mstart             frame->Xmstart
 517 #define markptr            frame->Xmarkptr
 518 #define offset_top         frame->Xoffset_top
 519 #define ims                frame->Xims
 520 #define eptrb              frame->Xeptrb
 521 #define flags              frame->Xflags
 522 #define rdepth             frame->Xrdepth
 523
 524 /* Ditto for the local variables */
 525
 526 #ifdef SUPPORT_UTF8
 527 #define charptr            frame->Xcharptr
 528 #endif
 529 #define callpat            frame->Xcallpat
 530 #define codelink           frame->Xcodelink
 531 #define data               frame->Xdata
 532 #define next               frame->Xnext
 533 #define pp                 frame->Xpp
 534 #define prev               frame->Xprev
 535 #define saved_eptr         frame->Xsaved_eptr
 536
 537 #define new_recursive      frame->Xnew_recursive
 538
 539 #define cur_is_word        frame->Xcur_is_word
 540 #define condition          frame->Xcondition
 541 #define prev_is_word       frame->Xprev_is_word
 542
 543 #define original_ims       frame->Xoriginal_ims
 544
 545 #ifdef SUPPORT_UCP
 546 #define prop_type          frame->Xprop_type
 547 #define prop_value         frame->Xprop_value
 548 #define prop_fail_result   frame->Xprop_fail_result
 549 #define prop_category      frame->Xprop_category
 550 #define prop_chartype      frame->Xprop_chartype
 551 #define prop_script        frame->Xprop_script
 552 #define oclength           frame->Xoclength
 553 #define occhars            frame->Xocchars
 554 #endif
 555
 556 #define ctype              frame->Xctype
 557 #define fc                 frame->Xfc
 558 #define fi                 frame->Xfi
 559 #define length             frame->Xlength
 560 #define max                frame->Xmax
 561 #define min                frame->Xmin
 562 #define number             frame->Xnumber
 563 #define offset             frame->Xoffset
 564 #define op                 frame->Xop
 565 #define save_capture_last  frame->Xsave_capture_last
 566 #define save_offset1       frame->Xsave_offset1
 567 #define save_offset2       frame->Xsave_offset2
 568 #define save_offset3       frame->Xsave_offset3
 569 #define stacksave          frame->Xstacksave
 570
 571 #define newptrb            frame->Xnewptrb
 572
 573 /* When recursion is being used, local variables are allocated on the stack and
 574 get preserved during recursion in the normal way. In this environment, fi and
 575 i, and fc and c, can be the same variables. */
 576
 577 #else         /* NO_RECURSE not defined */
 578 #define fi i
 579 #define fc c
 580
 581
 582 #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
 583 const uschar *charptr;             /* in small blocks of the code. My normal */
 584 #endif                             /* style of coding would have declared    */
 585 const uschar *callpat;             /* them within each of those blocks.      */
 586 const uschar *data;                /* However, in order to accommodate the   */
 587 const uschar *next;                /* version of this code that uses an      */
 588 USPTR         pp;                  /* external "stack" implemented on the    */
 589 const uschar *prev;                /* heap, it is easier to declare them all */
 590 USPTR         saved_eptr;          /* here, so the declarations can be cut   */
 591                                    /* out in a block. The only declarations  */
 592 recursion_info new_recursive;      /* within blocks below are for variables  */
 593                                    /* that do not have to be preserved over  */
 594 BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
 595 BOOL condition;
 596 BOOL prev_is_word;
 597
 598 unsigned long int original_ims;
 599
 600 #ifdef SUPPORT_UCP
 601 int prop_type;
 602 int prop_value;
 603 int prop_fail_result;
 604 int prop_category;
 605 int prop_chartype;
 606 int prop_script;
 607 int oclength;
 608 uschar occhars[8];
 609 #endif
 610
 611 int codelink;
 612 int ctype;
 613 int length;
 614 int max;
 615 int min;
 616 int number;
 617 int offset;
 618 int op;
 619 int save_capture_last;
 620 int save_offset1, save_offset2, save_offset3;
 621 int stacksave[REC_STACK_SAVE_MAX];
 622
 623 eptrblock newptrb;
 624 #endif     /* NO_RECURSE */
 625
 626 /* These statements are here to stop the compiler complaining about unitialized
 627 variables. */
 628
 629 #ifdef SUPPORT_UCP
 630 prop_value = 0;
 631 prop_fail_result = 0;
 632 #endif
 633
 634
 635 /* This label is used for tail recursion, which is used in a few cases even
 636 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
 637 used. Thanks to Ian Taylor for noticing this possibility and sending the
 638 original patch. */
 639
 640 TAIL_RECURSE:
 641
 642 /* OK, now we can get on with the real code of the function. Recursive calls
 643 are specified by the macro RMATCH and RRETURN is used to return. When
 644 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
 645 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
 646 defined). However, RMATCH isn't like a function call because it's quite a
 647 complicated macro. It has to be used in one particular way. This shouldn't,
 648 however, impact performance when true recursion is being used. */
 649
 650 #ifdef SUPPORT_UTF8
 651 utf8 = md->utf8;       /* Local copy of the flag */
 652 #else
 653 utf8 = FALSE;
 654 #endif
 655
 656 /* First check that we haven't called match() too many times, or that we
 657 haven't exceeded the recursive call limit. */
 658
 659 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
 660 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
 661
 662 original_ims = ims;    /* Save for resetting on ')' */
 663
 664 /* At the start of a group with an unlimited repeat that may match an empty
 665 string, the match_cbegroup flag is set. When this is the case, add the current
 666 subject pointer to the chain of such remembered pointers, to be checked when we
 667 hit the closing ket, in order to break infinite loops that match no characters.
 668 When match() is called in other circumstances, don't add to the chain. The
 669 match_cbegroup flag must NOT be used with tail recursion, because the memory
 670 block that is used is on the stack, so a new one may be required for each
 671 match(). */
 672
 673 if ((flags & match_cbegroup) != 0)
 674   {
 675   newptrb.epb_saved_eptr = eptr;
 676   newptrb.epb_prev = eptrb;
 677   eptrb = &newptrb;
 678   }
 679
 680 /* Now start processing the opcodes. */
 681
 682 for (;;)
 683   {
 684   minimize = possessive = FALSE;
 685   op = *ecode;
 686
 687   switch(op)
 688     {
 689     case OP_MARK:
 690     markptr = ecode + 2;
 691     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
 692       ims, eptrb, flags, RM55);
 693
 694     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
 695     argument, and we must check whether that argument matches this MARK's
 696     argument. It is passed back in md->start_match_ptr (an overloading of that
 697     variable). If it does match, we reset that variable to the current subject
 698     position and return MATCH_SKIP. Otherwise, pass back the return code
 699     unaltered. */
 700
 701     if (rrc == MATCH_SKIP_ARG &&
 702         strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
 703       {
 704       md->start_match_ptr = eptr;
 705       RRETURN(MATCH_SKIP);
 706       }
 707
 708     if (md->mark == NULL) md->mark = markptr;
 709     RRETURN(rrc);
 710
 711     case OP_FAIL:
 712     MRRETURN(MATCH_NOMATCH);
 713
 714     /* COMMIT overrides PRUNE, SKIP, and THEN */
 715
 716     case OP_COMMIT:
 717     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
 718       ims, eptrb, flags, RM52);
 719     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
 720         rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
 721         rrc != MATCH_THEN)
 722       RRETURN(rrc);
 723     MRRETURN(MATCH_COMMIT);
 724
 725     /* PRUNE overrides THEN */
 726
 727     case OP_PRUNE:
 728     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
 729       ims, eptrb, flags, RM51);
 730     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
 731     MRRETURN(MATCH_PRUNE);
 732
 733     case OP_PRUNE_ARG:
 734     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
 735       ims, eptrb, flags, RM56);
 736     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
 737     md->mark = ecode + 2;
 738     RRETURN(MATCH_PRUNE);
 739
 740     /* SKIP overrides PRUNE and THEN */
 741
 742     case OP_SKIP:
 743     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
 744       ims, eptrb, flags, RM53);
 745     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
 746       RRETURN(rrc);
 747     md->start_match_ptr = eptr;   /* Pass back current position */
 748     MRRETURN(MATCH_SKIP);
 749
 750     case OP_SKIP_ARG:
 751     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
 752       ims, eptrb, flags, RM57);
 753     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
 754       RRETURN(rrc);
 755
 756     /* Pass back the current skip name by overloading md->start_match_ptr and
 757     returning the special MATCH_SKIP_ARG return code. This will either be
 758     caught by a matching MARK, or get to the top, where it is treated the same
 759     as PRUNE. */
 760
 761     md->start_match_ptr = ecode + 2;
 762     RRETURN(MATCH_SKIP_ARG);
 763
 764     /* For THEN (and THEN_ARG) we pass back the address of the bracket or
 765     the alt that is at the start of the current branch. This makes it possible
 766     to skip back past alternatives that precede the THEN within the current
 767     branch. */
 768
 769     case OP_THEN:
 770     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
 771       ims, eptrb, flags, RM54);
 772     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 773     md->start_match_ptr = ecode - GET(ecode, 1);
 774     MRRETURN(MATCH_THEN);
 775
 776     case OP_THEN_ARG:
 777     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
 778       offset_top, md, ims, eptrb, flags, RM58);
 779     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 780     md->start_match_ptr = ecode - GET(ecode, 1);
 781     md->mark = ecode + LINK_SIZE + 2;
 782     RRETURN(MATCH_THEN);
 783
 784     /* Handle a capturing bracket. If there is space in the offset vector, save
 785     the current subject position in the working slot at the top of the vector.
 786     We mustn't change the current values of the data slot, because they may be
 787     set from a previous iteration of this group, and be referred to by a
 788     reference inside the group.
 789
 790     If the bracket fails to match, we need to restore this value and also the
 791     values of the final offsets, in case they were set by a previous iteration
 792     of the same bracket.
 793
 794     If there isn't enough space in the offset vector, treat this as if it were
 795     a non-capturing bracket. Don't worry about setting the flag for the error
 796     case here; that is handled in the code for KET. */
 797
 798     case OP_CBRA:
 799     case OP_SCBRA:
 800     number = GET2(ecode, 1+LINK_SIZE);
 801     offset = number << 1;
 802
 803 #ifdef PCRE_DEBUG
 804     printf("start bracket %d\n", number);
 805     printf("subject=");
 806     pchars(eptr, 16, TRUE, md);
 807     printf("\n");
 808 #endif
 809
 810     if (offset < md->offset_max)
 811       {
 812       save_offset1 = md->offset_vector[offset];
 813       save_offset2 = md->offset_vector[offset+1];
 814       save_offset3 = md->offset_vector[md->offset_end - number];
 815       save_capture_last = md->capture_last;
 816
 817       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
 818       md->offset_vector[md->offset_end - number] =
 819         (int)(eptr - md->start_subject);
 820
 821       flags = (op == OP_SCBRA)? match_cbegroup : 0;
 822       do
 823         {
 824         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
 825           ims, eptrb, flags, RM1);
 826         if (rrc != MATCH_NOMATCH &&
 827             (rrc != MATCH_THEN || md->start_match_ptr != ecode))
 828           RRETURN(rrc);
 829         md->capture_last = save_capture_last;
 830         ecode += GET(ecode, 1);
 831         }
 832       while (*ecode == OP_ALT);
 833
 834       DPRINTF(("bracket %d failed\n", number));
 835
 836       md->offset_vector[offset] = save_offset1;
 837       md->offset_vector[offset+1] = save_offset2;
 838       md->offset_vector[md->offset_end - number] = save_offset3;
 839
 840       if (rrc != MATCH_THEN) md->mark = markptr;
 841       RRETURN(MATCH_NOMATCH);
 842       }
 843
 844     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
 845     as a non-capturing bracket. */
 846
 847     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 848     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 849
 850     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
 851
 852     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 853     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 854
 855     /* Non-capturing bracket. Loop for all the alternatives. When we get to the
 856     final alternative within the brackets, we would return the result of a
 857     recursive call to match() whatever happened. We can reduce stack usage by
 858     turning this into a tail recursion, except in the case when match_cbegroup
 859     is set.*/
 860
 861     case OP_BRA:
 862     case OP_SBRA:
 863     DPRINTF(("start non-capturing bracket\n"));
 864     flags = (op >= OP_SBRA)? match_cbegroup : 0;
 865     for (;;)
 866       {
 867       if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
 868         {
 869         if (flags == 0)    /* Not a possibly empty group */
 870           {
 871           ecode += _pcre_OP_lengths[*ecode];
 872           DPRINTF(("bracket 0 tail recursion\n"));
 873           goto TAIL_RECURSE;
 874           }
 875
 876         /* Possibly empty group; can't use tail recursion. */
 877
 878         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
 879           eptrb, flags, RM48);
 880         if (rrc == MATCH_NOMATCH) md->mark = markptr;
 881         RRETURN(rrc);
 882         }
 883
 884       /* For non-final alternatives, continue the loop for a NOMATCH result;
 885       otherwise return. */
 886
 887       RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
 888         eptrb, flags, RM2);
 889       if (rrc != MATCH_NOMATCH &&
 890           (rrc != MATCH_THEN || md->start_match_ptr != ecode))
 891         RRETURN(rrc);
 892       ecode += GET(ecode, 1);
 893       }
 894     /* Control never reaches here. */
 895
 896     /* Conditional group: compilation checked that there are no more than
 897     two branches. If the condition is false, skipping the first branch takes us
 898     past the end if there is only one branch, but that's OK because that is
 899     exactly what going to the ket would do. As there is only one branch to be
 900     obeyed, we can use tail recursion to avoid using another stack frame. */
 901
 902     case OP_COND:
 903     case OP_SCOND:
 904     codelink= GET(ecode, 1);
 905
 906     /* Because of the way auto-callout works during compile, a callout item is
 907     inserted between OP_COND and an assertion condition. */
 908
 909     if (ecode[LINK_SIZE+1] == OP_CALLOUT)
 910       {
 911       if (pcre_callout != NULL)
 912         {
 913         pcre_callout_block cb;
 914         cb.version          = 1;   /* Version 1 of the callout block */
 915         cb.callout_number   = ecode[LINK_SIZE+2];
 916         cb.offset_vector    = md->offset_vector;
 917         cb.subject          = (PCRE_SPTR)md->start_subject;
 918         cb.subject_length   = (int)(md->end_subject - md->start_subject);
 919         cb.start_match      = (int)(mstart - md->start_subject);
 920         cb.current_position = (int)(eptr - md->start_subject);
 921         cb.pattern_position = GET(ecode, LINK_SIZE + 3);
 922         cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
 923         cb.capture_top      = offset_top/2;
 924         cb.capture_last     = md->capture_last;
 925         cb.callout_data     = md->callout_data;
 926         if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
 927         if (rrc < 0) RRETURN(rrc);
 928         }
 929       ecode += _pcre_OP_lengths[OP_CALLOUT];
 930       }
 931
 932     condcode = ecode[LINK_SIZE+1];
 933
 934     /* Now see what the actual condition is */
 935
 936     if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
 937       {
 938       if (md->recursive == NULL)                /* Not recursing => FALSE */
 939         {
 940         condition = FALSE;
 941         ecode += GET(ecode, 1);
 942         }
 943       else
 944         {
 945         int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
 946         condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
 947
 948         /* If the test is for recursion into a specific subpattern, and it is
 949         false, but the test was set up by name, scan the table to see if the
 950         name refers to any other numbers, and test them. The condition is true
 951         if any one is set. */
 952
 953         if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
 954           {
 955           uschar *slotA = md->name_table;
 956           for (i = 0; i < md->name_count; i++)
 957             {
 958             if (GET2(slotA, 0) == recno) break;
 959             slotA += md->name_entry_size;
 960             }
 961
 962           /* Found a name for the number - there can be only one; duplicate
 963           names for different numbers are allowed, but not vice versa. First
 964           scan down for duplicates. */
 965
 966           if (i < md->name_count)
 967             {
 968             uschar *slotB = slotA;
 969             while (slotB > md->name_table)
 970               {
 971               slotB -= md->name_entry_size;
 972               if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
 973                 {
 974                 condition = GET2(slotB, 0) == md->recursive->group_num;
 975                 if (condition) break;
 976                 }
 977               else break;
 978               }
 979
 980             /* Scan up for duplicates */
 981
 982             if (!condition)
 983               {
 984               slotB = slotA;
 985               for (i++; i < md->name_count; i++)
 986                 {
 987                 slotB += md->name_entry_size;
 988                 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
 989                   {
 990                   condition = GET2(slotB, 0) == md->recursive->group_num;
 991                   if (condition) break;
 992                   }
 993                 else break;
 994                 }
 995               }
 996             }
 997           }
 998
 999         /* Chose branch according to the condition */
1000
1001         ecode += condition? 3 : GET(ecode, 1);
1002         }
1003       }
1004
1005     else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
1006       {
1007       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
1008       condition = offset < offset_top && md->offset_vector[offset] >= 0;
1009
1010       /* If the numbered capture is unset, but the reference was by name,
1011       scan the table to see if the name refers to any other numbers, and test
1012       them. The condition is true if any one is set. This is tediously similar
1013       to the code above, but not close enough to try to amalgamate. */
1014
1015       if (!condition && condcode == OP_NCREF)
1016         {
1017         int refno = offset >> 1;
1018         uschar *slotA = md->name_table;
1019
1020         for (i = 0; i < md->name_count; i++)
1021           {
1022           if (GET2(slotA, 0) == refno) break;
1023           slotA += md->name_entry_size;
1024           }
1025
1026         /* Found a name for the number - there can be only one; duplicate names
1027         for different numbers are allowed, but not vice versa. First scan down
1028         for duplicates. */
1029
1030         if (i < md->name_count)
1031           {
1032           uschar *slotB = slotA;
1033           while (slotB > md->name_table)
1034             {
1035             slotB -= md->name_entry_size;
1036             if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1037               {
1038               offset = GET2(slotB, 0) << 1;
1039               condition = offset < offset_top &&
1040                 md->offset_vector[offset] >= 0;
1041               if (condition) break;
1042               }
1043             else break;
1044             }
1045
1046           /* Scan up for duplicates */
1047
1048           if (!condition)
1049             {
1050             slotB = slotA;
1051             for (i++; i < md->name_count; i++)
1052               {
1053               slotB += md->name_entry_size;
1054               if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1055                 {
1056                 offset = GET2(slotB, 0) << 1;
1057                 condition = offset < offset_top &&
1058                   md->offset_vector[offset] >= 0;
1059                 if (condition) break;
1060                 }
1061               else break;
1062               }
1063             }
1064           }
1065         }
1066
1067       /* Chose branch according to the condition */
1068
1069       ecode += condition? 3 : GET(ecode, 1);
1070       }
1071
1072     else if (condcode == OP_DEF)     /* DEFINE - always false */
1073       {
1074       condition = FALSE;
1075       ecode += GET(ecode, 1);
1076       }
1077
1078     /* The condition is an assertion. Call match() to evaluate it - setting
1079     the final argument match_condassert causes it to stop at the end of an
1080     assertion. */
1081
1082     else
1083       {
1084       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1085           match_condassert, RM3);
1086       if (rrc == MATCH_MATCH)
1087         {
1088         condition = TRUE;
1089         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1090         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1091         }
1092       else if (rrc != MATCH_NOMATCH &&
1093               (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1094         {
1095         RRETURN(rrc);         /* Need braces because of following else */
1096         }
1097       else
1098         {
1099         condition = FALSE;
1100         ecode += codelink;
1101         }
1102       }
1103
1104     /* We are now at the branch that is to be obeyed. As there is only one,
1105     we can use tail recursion to avoid using another stack frame, except when
1106     match_cbegroup is required for an unlimited repeat of a possibly empty
1107     group. If the second alternative doesn't exist, we can just plough on. */
1108
1109     if (condition || *ecode == OP_ALT)
1110       {
1111       ecode += 1 + LINK_SIZE;
1112       if (op == OP_SCOND)        /* Possibly empty group */
1113         {
1114         RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1115         RRETURN(rrc);
1116         }
1117       else                       /* Group must match something */
1118         {
1119         flags = 0;
1120         goto TAIL_RECURSE;
1121         }
1122       }
1123     else                         /* Condition false & no alternative */
1124       {
1125       ecode += 1 + LINK_SIZE;
1126       }
1127     break;
1128
1129
1130     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1131     to close any currently open capturing brackets. */
1132
1133     case OP_CLOSE:
1134     number = GET2(ecode, 1);
1135     offset = number << 1;
1136
1137 #ifdef PCRE_DEBUG
1138       printf("end bracket %d at *ACCEPT", number);
1139       printf("\n");
1140 #endif
1141
1142     md->capture_last = number;
1143     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1144       {
1145       md->offset_vector[offset] =
1146         md->offset_vector[md->offset_end - number];
1147       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1148       if (offset_top <= offset) offset_top = offset + 2;
1149       }
1150     ecode += 3;
1151     break;
1152
1153
1154     /* End of the pattern, either real or forced. If we are in a top-level
1155     recursion, we should restore the offsets appropriately and continue from
1156     after the call. */
1157
1158     case OP_ACCEPT:
1159     case OP_END:
1160     if (md->recursive != NULL && md->recursive->group_num == 0)
1161       {
1162       recursion_info *rec = md->recursive;
1163       DPRINTF(("End of pattern in a (?0) recursion\n"));
1164       md->recursive = rec->prevrec;
1165       memmove(md->offset_vector, rec->offset_save,
1166         rec->saved_max * sizeof(int));
1167       offset_top = rec->save_offset_top;
1168       ims = original_ims;
1169       ecode = rec->after_call;
1170       break;
1171       }
1172
1173     /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1174     set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1175     the subject. In both cases, backtracking will then try other alternatives,
1176     if any. */
1177
1178     if (eptr == mstart &&
1179         (md->notempty ||
1180           (md->notempty_atstart &&
1181             mstart == md->start_subject + md->start_offset)))
1182       MRRETURN(MATCH_NOMATCH);
1183
1184     /* Otherwise, we have a match. */
1185
1186     md->end_match_ptr = eptr;           /* Record where we ended */
1187     md->end_offset_top = offset_top;    /* and how many extracts were taken */
1188     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1189
1190     /* For some reason, the macros don't work properly if an expression is
1191     given as the argument to MRRETURN when the heap is in use. */
1192
1193     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1194     MRRETURN(rrc);
1195
1196     /* Change option settings */
1197
1198     case OP_OPT:
1199     ims = ecode[1];
1200     ecode += 2;
1201     DPRINTF(("ims set to %02lx\n", ims));
1202     break;
1203
1204     /* Assertion brackets. Check the alternative branches in turn - the
1205     matching won't pass the KET for an assertion. If any one branch matches,
1206     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1207     start of each branch to move the current point backwards, so the code at
1208     this level is identical to the lookahead case. */
1209
1210     case OP_ASSERT:
1211     case OP_ASSERTBACK:
1212     do
1213       {
1214       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1215         RM4);
1216       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1217         {
1218         mstart = md->start_match_ptr;   /* In case \K reset it */
1219         break;
1220         }
1221       if (rrc != MATCH_NOMATCH &&
1222           (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1223         RRETURN(rrc);
1224       ecode += GET(ecode, 1);
1225       }
1226     while (*ecode == OP_ALT);
1227     if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1228
1229     /* If checking an assertion for a condition, return MATCH_MATCH. */
1230
1231     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1232
1233     /* Continue from after the assertion, updating the offsets high water
1234     mark, since extracts may have been taken during the assertion. */
1235
1236     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1237     ecode += 1 + LINK_SIZE;
1238     offset_top = md->end_offset_top;
1239     continue;
1240
1241     /* Negative assertion: all branches must fail to match. Encountering SKIP,
1242     PRUNE, or COMMIT means we must assume failure without checking subsequent
1243     branches. */
1244
1245     case OP_ASSERT_NOT:
1246     case OP_ASSERTBACK_NOT:
1247     do
1248       {
1249       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1250         RM5);
1251       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1252       if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1253         {
1254         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1255         break;
1256         }
1257       if (rrc != MATCH_NOMATCH &&
1258           (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1259         RRETURN(rrc);
1260       ecode += GET(ecode,1);
1261       }
1262     while (*ecode == OP_ALT);
1263
1264     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1265
1266     ecode += 1 + LINK_SIZE;
1267     continue;
1268
1269     /* Move the subject pointer back. This occurs only at the start of
1270     each branch of a lookbehind assertion. If we are too close to the start to
1271     move back, this match function fails. When working with UTF-8 we move
1272     back a number of characters, not bytes. */
1273
1274     case OP_REVERSE:
1275 #ifdef SUPPORT_UTF8
1276     if (utf8)
1277       {
1278       i = GET(ecode, 1);
1279       while (i-- > 0)
1280         {
1281         eptr--;
1282         if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1283         BACKCHAR(eptr);
1284         }
1285       }
1286     else
1287 #endif
1288
1289     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1290
1291       {
1292       eptr -= GET(ecode, 1);
1293       if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1294       }
1295
1296     /* Save the earliest consulted character, then skip to next op code */
1297
1298     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1299     ecode += 1 + LINK_SIZE;
1300     break;
1301
1302     /* The callout item calls an external function, if one is provided, passing
1303     details of the match so far. This is mainly for debugging, though the
1304     function is able to force a failure. */
1305
1306     case OP_CALLOUT:
1307     if (pcre_callout != NULL)
1308       {
1309       pcre_callout_block cb;
1310       cb.version          = 1;   /* Version 1 of the callout block */
1311       cb.callout_number   = ecode[1];
1312       cb.offset_vector    = md->offset_vector;
1313       cb.subject          = (PCRE_SPTR)md->start_subject;
1314       cb.subject_length   = (int)(md->end_subject - md->start_subject);
1315       cb.start_match      = (int)(mstart - md->start_subject);
1316       cb.current_position = (int)(eptr - md->start_subject);
1317       cb.pattern_position = GET(ecode, 2);
1318       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1319       cb.capture_top      = offset_top/2;
1320       cb.capture_last     = md->capture_last;
1321       cb.callout_data     = md->callout_data;
1322       if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1323       if (rrc < 0) RRETURN(rrc);
1324       }
1325     ecode += 2 + 2*LINK_SIZE;
1326     break;
1327
1328     /* Recursion either matches the current regex, or some subexpression. The
1329     offset data is the offset to the starting bracket from the start of the
1330     whole pattern. (This is so that it works from duplicated subpatterns.)
1331
1332     If there are any capturing brackets started but not finished, we have to
1333     save their starting points and reinstate them after the recursion. However,
1334     we don't know how many such there are (offset_top records the completed
1335     total) so we just have to save all the potential data. There may be up to
1336     65535 such values, which is too large to put on the stack, but using malloc
1337     for small numbers seems expensive. As a compromise, the stack is used when
1338     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1339     is used. A problem is what to do if the malloc fails ... there is no way of
1340     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1341     values on the stack, and accept that the rest may be wrong.
1342
1343     There are also other values that have to be saved. We use a chained
1344     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1345     for the original version of this logic. */
1346
1347     case OP_RECURSE:
1348       {
1349       callpat = md->start_code + GET(ecode, 1);
1350       new_recursive.group_num = (callpat == md->start_code)? 0 :
1351         GET2(callpat, 1 + LINK_SIZE);
1352
1353       /* Add to "recursing stack" */
1354
1355       new_recursive.prevrec = md->recursive;
1356       md->recursive = &new_recursive;
1357
1358       /* Find where to continue from afterwards */
1359
1360       ecode += 1 + LINK_SIZE;
1361       new_recursive.after_call = ecode;
1362
1363       /* Now save the offset data. */
1364
1365       new_recursive.saved_max = md->offset_end;
1366       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1367         new_recursive.offset_save = stacksave;
1368       else
1369         {
1370         new_recursive.offset_save =
1371           (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1372         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1373         }
1374
1375       memcpy(new_recursive.offset_save, md->offset_vector,
1376             new_recursive.saved_max * sizeof(int));
1377       new_recursive.save_offset_top = offset_top;
1378
1379       /* OK, now we can do the recursion. For each top-level alternative we
1380       restore the offset and recursion data. */
1381
1382       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1383       flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1384       do
1385         {
1386         RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1387           md, ims, eptrb, flags, RM6);
1388         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1389           {
1390           DPRINTF(("Recursion matched\n"));
1391           md->recursive = new_recursive.prevrec;
1392           if (new_recursive.offset_save != stacksave)
1393             (pcre_free)(new_recursive.offset_save);
1394           MRRETURN(MATCH_MATCH);
1395           }
1396         else if (rrc != MATCH_NOMATCH &&
1397                 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1398           {
1399           DPRINTF(("Recursion gave error %d\n", rrc));
1400           if (new_recursive.offset_save != stacksave)
1401             (pcre_free)(new_recursive.offset_save);
1402           RRETURN(rrc);
1403           }
1404
1405         md->recursive = &new_recursive;
1406         memcpy(md->offset_vector, new_recursive.offset_save,
1407             new_recursive.saved_max * sizeof(int));
1408         callpat += GET(callpat, 1);
1409         }
1410       while (*callpat == OP_ALT);
1411
1412       DPRINTF(("Recursion didn't match\n"));
1413       md->recursive = new_recursive.prevrec;
1414       if (new_recursive.offset_save != stacksave)
1415         (pcre_free)(new_recursive.offset_save);
1416       MRRETURN(MATCH_NOMATCH);
1417       }
1418     /* Control never reaches here */
1419
1420     /* "Once" brackets are like assertion brackets except that after a match,
1421     the point in the subject string is not moved back. Thus there can never be
1422     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1423     Check the alternative branches in turn - the matching won't pass the KET
1424     for this kind of subpattern. If any one branch matches, we carry on as at
1425     the end of a normal bracket, leaving the subject pointer, but resetting
1426     the start-of-match value in case it was changed by \K. */
1427
1428     case OP_ONCE:
1429     prev = ecode;
1430     saved_eptr = eptr;
1431
1432     do
1433       {
1434       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1435       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
1436         {
1437         mstart = md->start_match_ptr;
1438         break;
1439         }
1440       if (rrc != MATCH_NOMATCH &&
1441           (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1442         RRETURN(rrc);
1443       ecode += GET(ecode,1);
1444       }
1445     while (*ecode == OP_ALT);
1446
1447     /* If hit the end of the group (which could be repeated), fail */
1448
1449     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1450
1451     /* Continue as from after the assertion, updating the offsets high water
1452     mark, since extracts may have been taken. */
1453
1454     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1455
1456     offset_top = md->end_offset_top;
1457     eptr = md->end_match_ptr;
1458
1459     /* For a non-repeating ket, just continue at this level. This also
1460     happens for a repeating ket if no characters were matched in the group.
1461     This is the forcible breaking of infinite loops as implemented in Perl
1462     5.005. If there is an options reset, it will get obeyed in the normal
1463     course of events. */
1464
1465     if (*ecode == OP_KET || eptr == saved_eptr)
1466       {
1467       ecode += 1+LINK_SIZE;
1468       break;
1469       }
1470
1471     /* The repeating kets try the rest of the pattern or restart from the
1472     preceding bracket, in the appropriate order. The second "call" of match()
1473     uses tail recursion, to avoid using another stack frame. We need to reset
1474     any options that changed within the bracket before re-running it, so
1475     check the next opcode. */
1476
1477     if (ecode[1+LINK_SIZE] == OP_OPT)
1478       {
1479       ims = (ims & ~PCRE_IMS) | ecode[4];
1480       DPRINTF(("ims set to %02lx at group repeat\n", ims));
1481       }
1482
1483     if (*ecode == OP_KETRMIN)
1484       {
1485       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1486       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1487       ecode = prev;
1488       flags = 0;
1489       goto TAIL_RECURSE;
1490       }
1491     else  /* OP_KETRMAX */
1492       {
1493       RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1494       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1495       ecode += 1 + LINK_SIZE;
1496       flags = 0;
1497       goto TAIL_RECURSE;
1498       }
1499     /* Control never gets here */
1500
1501     /* An alternation is the end of a branch; scan along to find the end of the
1502     bracketed group and go to there. */
1503
1504     case OP_ALT:
1505     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1506     break;
1507
1508     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1509     indicating that it may occur zero times. It may repeat infinitely, or not
1510     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1511     with fixed upper repeat limits are compiled as a number of copies, with the
1512     optional ones preceded by BRAZERO or BRAMINZERO. */
1513
1514     case OP_BRAZERO:
1515       {
1516       next = ecode+1;
1517       RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1518       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1519       do next += GET(next,1); while (*next == OP_ALT);
1520       ecode = next + 1 + LINK_SIZE;
1521       }
1522     break;
1523
1524     case OP_BRAMINZERO:
1525       {
1526       next = ecode+1;
1527       do next += GET(next, 1); while (*next == OP_ALT);
1528       RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1529       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1530       ecode++;
1531       }
1532     break;
1533
1534     case OP_SKIPZERO:
1535       {
1536       next = ecode+1;
1537       do next += GET(next,1); while (*next == OP_ALT);
1538       ecode = next + 1 + LINK_SIZE;
1539       }
1540     break;
1541
1542     /* End of a group, repeated or non-repeating. */
1543
1544     case OP_KET:
1545     case OP_KETRMIN:
1546     case OP_KETRMAX:
1547     prev = ecode - GET(ecode, 1);
1548
1549     /* If this was a group that remembered the subject start, in order to break
1550     infinite repeats of empty string matches, retrieve the subject start from
1551     the chain. Otherwise, set it NULL. */
1552
1553     if (*prev >= OP_SBRA)
1554       {
1555       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1556       eptrb = eptrb->epb_prev;              /* Backup to previous group */
1557       }
1558     else saved_eptr = NULL;
1559
1560     /* If we are at the end of an assertion group or an atomic group, stop
1561     matching and return MATCH_MATCH, but record the current high water mark for
1562     use by positive assertions. We also need to record the match start in case
1563     it was changed by \K. */
1564
1565     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1566         *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1567         *prev == OP_ONCE)
1568       {
1569       md->end_match_ptr = eptr;      /* For ONCE */
1570       md->end_offset_top = offset_top;
1571       md->start_match_ptr = mstart;
1572       MRRETURN(MATCH_MATCH);
1573       }
1574
1575     /* For capturing groups we have to check the group number back at the start
1576     and if necessary complete handling an extraction by setting the offsets and
1577     bumping the high water mark. Note that whole-pattern recursion is coded as
1578     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1579     when the OP_END is reached. Other recursion is handled here. */
1580
1581     if (*prev == OP_CBRA || *prev == OP_SCBRA)
1582       {
1583       number = GET2(prev, 1+LINK_SIZE);
1584       offset = number << 1;
1585
1586 #ifdef PCRE_DEBUG
1587       printf("end bracket %d", number);
1588       printf("\n");
1589 #endif
1590
1591       md->capture_last = number;
1592       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1593         {
1594         md->offset_vector[offset] =
1595           md->offset_vector[md->offset_end - number];
1596         md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1597         if (offset_top <= offset) offset_top = offset + 2;
1598         }
1599
1600       /* Handle a recursively called group. Restore the offsets
1601       appropriately and continue from after the call. */
1602
1603       if (md->recursive != NULL && md->recursive->group_num == number)
1604         {
1605         recursion_info *rec = md->recursive;
1606         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1607         md->recursive = rec->prevrec;
1608         memcpy(md->offset_vector, rec->offset_save,
1609           rec->saved_max * sizeof(int));
1610         offset_top = rec->save_offset_top;
1611         ecode = rec->after_call;
1612         ims = original_ims;
1613         break;
1614         }
1615       }
1616
1617     /* For both capturing and non-capturing groups, reset the value of the ims
1618     flags, in case they got changed during the group. */
1619
1620     ims = original_ims;
1621     DPRINTF(("ims reset to %02lx\n", ims));
1622
1623     /* For a non-repeating ket, just continue at this level. This also
1624     happens for a repeating ket if no characters were matched in the group.
1625     This is the forcible breaking of infinite loops as implemented in Perl
1626     5.005. If there is an options reset, it will get obeyed in the normal
1627     course of events. */
1628
1629     if (*ecode == OP_KET || eptr == saved_eptr)
1630       {
1631       ecode += 1 + LINK_SIZE;
1632       break;
1633       }
1634
1635     /* The repeating kets try the rest of the pattern or restart from the
1636     preceding bracket, in the appropriate order. In the second case, we can use
1637     tail recursion to avoid using another stack frame, unless we have an
1638     unlimited repeat of a group that can match an empty string. */
1639
1640     flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1641
1642     if (*ecode == OP_KETRMIN)
1643       {
1644       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1645       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1646       if (flags != 0)    /* Could match an empty string */
1647         {
1648         RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1649         RRETURN(rrc);
1650         }
1651       ecode = prev;
1652       goto TAIL_RECURSE;
1653       }
1654     else  /* OP_KETRMAX */
1655       {
1656       RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1657       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1658       ecode += 1 + LINK_SIZE;
1659       flags = 0;
1660       goto TAIL_RECURSE;
1661       }
1662     /* Control never gets here */
1663
1664     /* Start of subject unless notbol, or after internal newline if multiline */
1665
1666     case OP_CIRC:
1667     if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1668     if ((ims & PCRE_MULTILINE) != 0)
1669       {
1670       if (eptr != md->start_subject &&
1671           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1672         MRRETURN(MATCH_NOMATCH);
1673       ecode++;
1674       break;
1675       }
1676     /* ... else fall through */
1677
1678     /* Start of subject assertion */
1679
1680     case OP_SOD:
1681     if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1682     ecode++;
1683     break;
1684
1685     /* Start of match assertion */
1686
1687     case OP_SOM:
1688     if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1689     ecode++;
1690     break;
1691
1692     /* Reset the start of match point */
1693
1694     case OP_SET_SOM:
1695     mstart = eptr;
1696     ecode++;
1697     break;
1698
1699     /* Assert before internal newline if multiline, or before a terminating
1700     newline unless endonly is set, else end of subject unless noteol is set. */
1701
1702     case OP_DOLL:
1703     if ((ims & PCRE_MULTILINE) != 0)
1704       {
1705       if (eptr < md->end_subject)
1706         { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1707       else
1708         {
1709         if (md->noteol) MRRETURN(MATCH_NOMATCH);
1710         SCHECK_PARTIAL();
1711         }
1712       ecode++;
1713       break;
1714       }
1715     else  /* Not multiline */
1716       {
1717       if (md->noteol) MRRETURN(MATCH_NOMATCH);
1718       if (!md->endonly) goto ASSERT_NL_OR_EOS;
1719       }
1720
1721     /* ... else fall through for endonly */
1722
1723     /* End of subject assertion (\z) */
1724
1725     case OP_EOD:
1726     if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1727     SCHECK_PARTIAL();
1728     ecode++;
1729     break;
1730
1731     /* End of subject or ending \n assertion (\Z) */
1732
1733     case OP_EODN:
1734     ASSERT_NL_OR_EOS:
1735     if (eptr < md->end_subject &&
1736         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1737       MRRETURN(MATCH_NOMATCH);
1738
1739     /* Either at end of string or \n before end. */
1740
1741     SCHECK_PARTIAL();
1742     ecode++;
1743     break;
1744
1745     /* Word boundary assertions */
1746
1747     case OP_NOT_WORD_BOUNDARY:
1748     case OP_WORD_BOUNDARY:
1749       {
1750
1751       /* Find out if the previous and current characters are "word" characters.
1752       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1753       be "non-word" characters. Remember the earliest consulted character for
1754       partial matching. */
1755
1756 #ifdef SUPPORT_UTF8
1757       if (utf8)
1758         {
1759         /* Get status of previous character */
1760
1761         if (eptr == md->start_subject) prev_is_word = FALSE; else
1762           {
1763           USPTR lastptr = eptr - 1;
1764           while((*lastptr & 0xc0) == 0x80) lastptr--;
1765           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1766           GETCHAR(c, lastptr);
1767 #ifdef SUPPORT_UCP
1768           if (md->use_ucp)
1769             {
1770             if (c == '_') prev_is_word = TRUE; else
1771               {
1772               int cat = UCD_CATEGORY(c);
1773               prev_is_word = (cat == ucp_L || cat == ucp_N);
1774               }
1775             }
1776           else
1777 #endif
1778           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1779           }
1780
1781         /* Get status of next character */
1782
1783         if (eptr >= md->end_subject)
1784           {
1785           SCHECK_PARTIAL();
1786           cur_is_word = FALSE;
1787           }
1788         else
1789           {
1790           GETCHAR(c, eptr);
1791 #ifdef SUPPORT_UCP
1792           if (md->use_ucp)
1793             {
1794             if (c == '_') cur_is_word = TRUE; else
1795               {
1796               int cat = UCD_CATEGORY(c);
1797               cur_is_word = (cat == ucp_L || cat == ucp_N);
1798               }
1799             }
1800           else
1801 #endif
1802           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1803           }
1804         }
1805       else
1806 #endif
1807
1808       /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1809       consistency with the behaviour of \w we do use it in this case. */
1810
1811         {
1812         /* Get status of previous character */
1813
1814         if (eptr == md->start_subject) prev_is_word = FALSE; else
1815           {
1816           if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1817 #ifdef SUPPORT_UCP
1818           if (md->use_ucp)
1819             {
1820             c = eptr[-1];
1821             if (c == '_') prev_is_word = TRUE; else
1822               {
1823               int cat = UCD_CATEGORY(c);
1824               prev_is_word = (cat == ucp_L || cat == ucp_N);
1825               }
1826             }
1827           else
1828 #endif
1829           prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1830           }
1831
1832         /* Get status of next character */
1833
1834         if (eptr >= md->end_subject)
1835           {
1836           SCHECK_PARTIAL();
1837           cur_is_word = FALSE;
1838           }
1839         else
1840 #ifdef SUPPORT_UCP
1841         if (md->use_ucp)
1842           {
1843           c = *eptr;
1844           if (c == '_') cur_is_word = TRUE; else
1845             {
1846             int cat = UCD_CATEGORY(c);
1847             cur_is_word = (cat == ucp_L || cat == ucp_N);
1848             }
1849           }
1850         else
1851 #endif
1852         cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1853         }
1854
1855       /* Now see if the situation is what we want */
1856
1857       if ((*ecode++ == OP_WORD_BOUNDARY)?
1858            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1859         MRRETURN(MATCH_NOMATCH);
1860       }
1861     break;
1862
1863     /* Match a single character type; inline for speed */
1864
1865     case OP_ANY:
1866     if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1867     /* Fall through */
1868
1869     case OP_ALLANY:
1870     if (eptr++ >= md->end_subject)
1871       {
1872       SCHECK_PARTIAL();
1873       MRRETURN(MATCH_NOMATCH);
1874       }
1875     if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1876     ecode++;
1877     break;
1878
1879     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1880     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1881
1882     case OP_ANYBYTE:
1883     if (eptr++ >= md->end_subject)
1884       {
1885       SCHECK_PARTIAL();
1886       MRRETURN(MATCH_NOMATCH);
1887       }
1888     ecode++;
1889     break;
1890
1891     case OP_NOT_DIGIT:
1892     if (eptr >= md->end_subject)
1893       {
1894       SCHECK_PARTIAL();
1895       MRRETURN(MATCH_NOMATCH);
1896       }
1897     GETCHARINCTEST(c, eptr);
1898     if (
1899 #ifdef SUPPORT_UTF8
1900        c < 256 &&
1901 #endif
1902        (md->ctypes[c] & ctype_digit) != 0
1903        )
1904       MRRETURN(MATCH_NOMATCH);
1905     ecode++;
1906     break;
1907
1908     case OP_DIGIT:
1909     if (eptr >= md->end_subject)
1910       {
1911       SCHECK_PARTIAL();
1912       MRRETURN(MATCH_NOMATCH);
1913       }
1914     GETCHARINCTEST(c, eptr);
1915     if (
1916 #ifdef SUPPORT_UTF8
1917        c >= 256 ||
1918 #endif
1919        (md->ctypes[c] & ctype_digit) == 0
1920        )
1921       MRRETURN(MATCH_NOMATCH);
1922     ecode++;
1923     break;
1924
1925     case OP_NOT_WHITESPACE:
1926     if (eptr >= md->end_subject)
1927       {
1928       SCHECK_PARTIAL();
1929       MRRETURN(MATCH_NOMATCH);
1930       }
1931     GETCHARINCTEST(c, eptr);
1932     if (
1933 #ifdef SUPPORT_UTF8
1934        c < 256 &&
1935 #endif
1936        (md->ctypes[c] & ctype_space) != 0
1937        )
1938       MRRETURN(MATCH_NOMATCH);
1939     ecode++;
1940     break;
1941
1942     case OP_WHITESPACE:
1943     if (eptr >= md->end_subject)
1944       {
1945       SCHECK_PARTIAL();
1946       MRRETURN(MATCH_NOMATCH);
1947       }
1948     GETCHARINCTEST(c, eptr);
1949     if (
1950 #ifdef SUPPORT_UTF8
1951        c >= 256 ||
1952 #endif
1953        (md->ctypes[c] & ctype_space) == 0
1954        )
1955       MRRETURN(MATCH_NOMATCH);
1956     ecode++;
1957     break;
1958
1959     case OP_NOT_WORDCHAR:
1960     if (eptr >= md->end_subject)
1961       {
1962       SCHECK_PARTIAL();
1963       MRRETURN(MATCH_NOMATCH);
1964       }
1965     GETCHARINCTEST(c, eptr);
1966     if (
1967 #ifdef SUPPORT_UTF8
1968        c < 256 &&
1969 #endif
1970        (md->ctypes[c] & ctype_word) != 0
1971        )
1972       MRRETURN(MATCH_NOMATCH);
1973     ecode++;
1974     break;
1975
1976     case OP_WORDCHAR:
1977     if (eptr >= md->end_subject)
1978       {
1979       SCHECK_PARTIAL();
1980       MRRETURN(MATCH_NOMATCH);
1981       }
1982     GETCHARINCTEST(c, eptr);
1983     if (
1984 #ifdef SUPPORT_UTF8
1985        c >= 256 ||
1986 #endif
1987        (md->ctypes[c] & ctype_word) == 0
1988        )
1989       MRRETURN(MATCH_NOMATCH);
1990     ecode++;
1991     break;
1992
1993     case OP_ANYNL:
1994     if (eptr >= md->end_subject)
1995       {
1996       SCHECK_PARTIAL();
1997       MRRETURN(MATCH_NOMATCH);
1998       }
1999     GETCHARINCTEST(c, eptr);
2000     switch(c)
2001       {
2002       default: MRRETURN(MATCH_NOMATCH);
2003       case 0x000d:
2004       if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2005       break;
2006
2007       case 0x000a:
2008       break;
2009
2010       case 0x000b:
2011       case 0x000c:
2012       case 0x0085:
2013       case 0x2028:
2014       case 0x2029:
2015       if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
2016       break;
2017       }
2018     ecode++;
2019     break;
2020
2021     case OP_NOT_HSPACE:
2022     if (eptr >= md->end_subject)
2023       {
2024       SCHECK_PARTIAL();
2025       MRRETURN(MATCH_NOMATCH);
2026       }
2027     GETCHARINCTEST(c, eptr);
2028     switch(c)
2029       {
2030       default: break;
2031       case 0x09:      /* HT */
2032       case 0x20:      /* SPACE */
2033       case 0xa0:      /* NBSP */
2034       case 0x1680:    /* OGHAM SPACE MARK */
2035       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2036       case 0x2000:    /* EN QUAD */
2037       case 0x2001:    /* EM QUAD */
2038       case 0x2002:    /* EN SPACE */
2039       case 0x2003:    /* EM SPACE */
2040       case 0x2004:    /* THREE-PER-EM SPACE */
2041       case 0x2005:    /* FOUR-PER-EM SPACE */
2042       case 0x2006:    /* SIX-PER-EM SPACE */
2043       case 0x2007:    /* FIGURE SPACE */
2044       case 0x2008:    /* PUNCTUATION SPACE */
2045       case 0x2009:    /* THIN SPACE */
2046       case 0x200A:    /* HAIR SPACE */
2047       case 0x202f:    /* NARROW NO-BREAK SPACE */
2048       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2049       case 0x3000:    /* IDEOGRAPHIC SPACE */
2050       MRRETURN(MATCH_NOMATCH);
2051       }
2052     ecode++;
2053     break;
2054
2055     case OP_HSPACE:
2056     if (eptr >= md->end_subject)
2057       {
2058       SCHECK_PARTIAL();
2059       MRRETURN(MATCH_NOMATCH);
2060       }
2061     GETCHARINCTEST(c, eptr);
2062     switch(c)
2063       {
2064       default: MRRETURN(MATCH_NOMATCH);
2065       case 0x09:      /* HT */
2066       case 0x20:      /* SPACE */
2067       case 0xa0:      /* NBSP */
2068       case 0x1680:    /* OGHAM SPACE MARK */
2069       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2070       case 0x2000:    /* EN QUAD */
2071       case 0x2001:    /* EM QUAD */
2072       case 0x2002:    /* EN SPACE */
2073       case 0x2003:    /* EM SPACE */
2074       case 0x2004:    /* THREE-PER-EM SPACE */
2075       case 0x2005:    /* FOUR-PER-EM SPACE */
2076       case 0x2006:    /* SIX-PER-EM SPACE */
2077       case 0x2007:    /* FIGURE SPACE */
2078       case 0x2008:    /* PUNCTUATION SPACE */
2079       case 0x2009:    /* THIN SPACE */
2080       case 0x200A:    /* HAIR SPACE */
2081       case 0x202f:    /* NARROW NO-BREAK SPACE */
2082       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2083       case 0x3000:    /* IDEOGRAPHIC SPACE */
2084       break;
2085       }
2086     ecode++;
2087     break;
2088
2089     case OP_NOT_VSPACE:
2090     if (eptr >= md->end_subject)
2091       {
2092       SCHECK_PARTIAL();
2093       MRRETURN(MATCH_NOMATCH);
2094       }
2095     GETCHARINCTEST(c, eptr);
2096     switch(c)
2097       {
2098       default: break;
2099       case 0x0a:      /* LF */
2100       case 0x0b:      /* VT */
2101       case 0x0c:      /* FF */
2102       case 0x0d:      /* CR */
2103       case 0x85:      /* NEL */
2104       case 0x2028:    /* LINE SEPARATOR */
2105       case 0x2029:    /* PARAGRAPH SEPARATOR */
2106       MRRETURN(MATCH_NOMATCH);
2107       }
2108     ecode++;
2109     break;
2110
2111     case OP_VSPACE:
2112     if (eptr >= md->end_subject)
2113       {
2114       SCHECK_PARTIAL();
2115       MRRETURN(MATCH_NOMATCH);
2116       }
2117     GETCHARINCTEST(c, eptr);
2118     switch(c)
2119       {
2120       default: MRRETURN(MATCH_NOMATCH);
2121       case 0x0a:      /* LF */
2122       case 0x0b:      /* VT */
2123       case 0x0c:      /* FF */
2124       case 0x0d:      /* CR */
2125       case 0x85:      /* NEL */
2126       case 0x2028:    /* LINE SEPARATOR */
2127       case 0x2029:    /* PARAGRAPH SEPARATOR */
2128       break;
2129       }
2130     ecode++;
2131     break;
2132
2133 #ifdef SUPPORT_UCP
2134     /* Check the next character by Unicode property. We will get here only
2135     if the support is in the binary; otherwise a compile-time error occurs. */
2136
2137     case OP_PROP:
2138     case OP_NOTPROP:
2139     if (eptr >= md->end_subject)
2140       {
2141       SCHECK_PARTIAL();
2142       MRRETURN(MATCH_NOMATCH);
2143       }
2144     GETCHARINCTEST(c, eptr);
2145       {
2146       int chartype = UCD_CHARTYPE(c);
2147
2148       switch(ecode[1])
2149         {
2150         case PT_ANY:
2151         if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2152         break;
2153
2154         case PT_LAMP:
2155         if ((chartype == ucp_Lu ||
2156              chartype == ucp_Ll ||
2157              chartype == ucp_Lt) == (op == OP_NOTPROP))
2158           MRRETURN(MATCH_NOMATCH);
2159         break;
2160
2161         case PT_GC:
2162         if ((ecode[2] != _pcre_ucp_gentype[chartype]) == (op == OP_PROP))
2163           MRRETURN(MATCH_NOMATCH);
2164         break;
2165
2166         case PT_PC:
2167         if ((ecode[2] != chartype) == (op == OP_PROP))
2168           MRRETURN(MATCH_NOMATCH);
2169         break;
2170
2171         case PT_SC:
2172         if ((ecode[2] != UCD_SCRIPT(c)) == (op == OP_PROP))
2173           MRRETURN(MATCH_NOMATCH);
2174         break;
2175
2176         /* These are specials */
2177
2178         case PT_ALNUM:
2179         if ((_pcre_ucp_gentype[chartype] == ucp_L ||
2180              _pcre_ucp_gentype[chartype] == ucp_N) == (op == OP_NOTPROP))
2181           MRRETURN(MATCH_NOMATCH);
2182         break;
2183
2184         case PT_SPACE:    /* Perl space */
2185         if ((_pcre_ucp_gentype[chartype] == ucp_Z ||
2186              c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2187                == (op == OP_NOTPROP))
2188           MRRETURN(MATCH_NOMATCH);
2189         break;
2190
2191         case PT_PXSPACE:  /* POSIX space */
2192         if ((_pcre_ucp_gentype[chartype] == ucp_Z ||
2193              c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2194              c == CHAR_FF || c == CHAR_CR)
2195                == (op == OP_NOTPROP))
2196           MRRETURN(MATCH_NOMATCH);
2197         break;
2198
2199         case PT_WORD:
2200         if ((_pcre_ucp_gentype[chartype] == ucp_L ||
2201              _pcre_ucp_gentype[chartype] == ucp_N ||
2202              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2203           MRRETURN(MATCH_NOMATCH);
2204         break;
2205
2206         /* This should never occur */
2207
2208         default:
2209         RRETURN(PCRE_ERROR_INTERNAL);
2210         }
2211
2212       ecode += 3;
2213       }
2214     break;
2215
2216     /* Match an extended Unicode sequence. We will get here only if the support
2217     is in the binary; otherwise a compile-time error occurs. */
2218
2219     case OP_EXTUNI:
2220     if (eptr >= md->end_subject)
2221       {
2222       SCHECK_PARTIAL();
2223       MRRETURN(MATCH_NOMATCH);
2224       }
2225     GETCHARINCTEST(c, eptr);
2226       {
2227       int category = UCD_CATEGORY(c);
2228       if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
2229       while (eptr < md->end_subject)
2230         {
2231         int len = 1;
2232         if (!utf8) c = *eptr; else
2233           {
2234           GETCHARLEN(c, eptr, len);
2235           }
2236         category = UCD_CATEGORY(c);
2237         if (category != ucp_M) break;
2238         eptr += len;
2239         }
2240       }
2241     ecode++;
2242     break;
2243 #endif
2244
2245
2246     /* Match a back reference, possibly repeatedly. Look past the end of the
2247     item to see if there is repeat information following. The code is similar
2248     to that for character classes, but repeated for efficiency. Then obey
2249     similar code to character type repeats - written out again for speed.
2250     However, if the referenced string is the empty string, always treat
2251     it as matched, any number of times (otherwise there could be infinite
2252     loops). */
2253
2254     case OP_REF:
2255       {
2256       offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2257       ecode += 3;
2258
2259       /* If the reference is unset, there are two possibilities:
2260
2261       (a) In the default, Perl-compatible state, set the length to be longer
2262       than the amount of subject left; this ensures that every attempt at a
2263       match fails. We can't just fail here, because of the possibility of
2264       quantifiers with zero minima.
2265
2266       (b) If the JavaScript compatibility flag is set, set the length to zero
2267       so that the back reference matches an empty string.
2268
2269       Otherwise, set the length to the length of what was matched by the
2270       referenced subpattern. */
2271
2272       if (offset >= offset_top || md->offset_vector[offset] < 0)
2273         length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
2274       else
2275         length = md->offset_vector[offset+1] - md->offset_vector[offset];
2276
2277       /* Set up for repetition, or handle the non-repeated case */
2278
2279       switch (*ecode)
2280         {
2281         case OP_CRSTAR:
2282         case OP_CRMINSTAR:
2283         case OP_CRPLUS:
2284         case OP_CRMINPLUS:
2285         case OP_CRQUERY:
2286         case OP_CRMINQUERY:
2287         c = *ecode++ - OP_CRSTAR;
2288         minimize = (c & 1) != 0;
2289         min = rep_min[c];                 /* Pick up values from tables; */
2290         max = rep_max[c];                 /* zero for max => infinity */
2291         if (max == 0) max = INT_MAX;
2292         break;
2293
2294         case OP_CRRANGE:
2295         case OP_CRMINRANGE:
2296         minimize = (*ecode == OP_CRMINRANGE);
2297         min = GET2(ecode, 1);
2298         max = GET2(ecode, 3);
2299         if (max == 0) max = INT_MAX;
2300         ecode += 5;
2301         break;
2302
2303         default:               /* No repeat follows */
2304         if (!match_ref(offset, eptr, length, md, ims))
2305           {
2306           CHECK_PARTIAL();
2307           MRRETURN(MATCH_NOMATCH);
2308           }
2309         eptr += length;
2310         continue;              /* With the main loop */
2311         }
2312
2313       /* If the length of the reference is zero, just continue with the
2314       main loop. */
2315
2316       if (length == 0) continue;
2317
2318       /* First, ensure the minimum number of matches are present. We get back
2319       the length of the reference string explicitly rather than passing the
2320       address of eptr, so that eptr can be a register variable. */
2321
2322       for (i = 1; i <= min; i++)
2323         {
2324         if (!match_ref(offset, eptr, length, md, ims))
2325           {
2326           CHECK_PARTIAL();
2327           MRRETURN(MATCH_NOMATCH);
2328           }
2329         eptr += length;
2330         }
2331
2332       /* If min = max, continue at the same level without recursion.
2333       They are not both allowed to be zero. */
2334
2335       if (min == max) continue;
2336
2337       /* If minimizing, keep trying and advancing the pointer */
2338
2339       if (minimize)
2340         {
2341         for (fi = min;; fi++)
2342           {
2343           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2344           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2345           if (fi >= max) MRRETURN(MATCH_NOMATCH);
2346           if (!match_ref(offset, eptr, length, md, ims))
2347             {
2348             CHECK_PARTIAL();
2349             MRRETURN(MATCH_NOMATCH);
2350             }
2351           eptr += length;
2352           }
2353         /* Control never gets here */
2354         }
2355
2356       /* If maximizing, find the longest string and work backwards */
2357
2358       else
2359         {
2360         pp = eptr;
2361         for (i = min; i < max; i++)
2362           {
2363           if (!match_ref(offset, eptr, length, md, ims))
2364             {
2365             CHECK_PARTIAL();
2366             break;
2367             }
2368           eptr += length;
2369           }
2370         while (eptr >= pp)
2371           {
2372           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2373           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2374           eptr -= length;
2375           }
2376         MRRETURN(MATCH_NOMATCH);
2377         }
2378       }
2379     /* Control never gets here */
2380
2381     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2382     used when all the characters in the class have values in the range 0-255,
2383     and either the matching is caseful, or the characters are in the range
2384     0-127 when UTF-8 processing is enabled. The only difference between
2385     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2386     encountered.
2387
2388     First, look past the end of the item to see if there is repeat information
2389     following. Then obey similar code to character type repeats - written out
2390     again for speed. */
2391
2392     case OP_NCLASS:
2393     case OP_CLASS:
2394       {
2395       data = ecode + 1;                /* Save for matching */
2396       ecode += 33;                     /* Advance past the item */
2397
2398       switch (*ecode)
2399         {
2400         case OP_CRSTAR:
2401         case OP_CRMINSTAR:
2402         case OP_CRPLUS:
2403         case OP_CRMINPLUS:
2404         case OP_CRQUERY:
2405         case OP_CRMINQUERY:
2406         c = *ecode++ - OP_CRSTAR;
2407         minimize = (c & 1) != 0;
2408         min = rep_min[c];                 /* Pick up values from tables; */
2409         max = rep_max[c];                 /* zero for max => infinity */
2410         if (max == 0) max = INT_MAX;
2411         break;
2412
2413         case OP_CRRANGE:
2414         case OP_CRMINRANGE:
2415         minimize = (*ecode == OP_CRMINRANGE);
2416         min = GET2(ecode, 1);
2417         max = GET2(ecode, 3);
2418         if (max == 0) max = INT_MAX;
2419         ecode += 5;
2420         break;
2421
2422         default:               /* No repeat follows */
2423         min = max = 1;
2424         break;
2425         }
2426
2427       /* First, ensure the minimum number of matches are present. */
2428
2429 #ifdef SUPPORT_UTF8
2430       /* UTF-8 mode */
2431       if (utf8)
2432         {
2433         for (i = 1; i <= min; i++)
2434           {
2435           if (eptr >= md->end_subject)
2436             {
2437             SCHECK_PARTIAL();
2438             MRRETURN(MATCH_NOMATCH);
2439             }
2440           GETCHARINC(c, eptr);
2441           if (c > 255)
2442             {
2443             if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2444             }
2445           else
2446             {
2447             if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2448             }
2449           }
2450         }
2451       else
2452 #endif
2453       /* Not UTF-8 mode */
2454         {
2455         for (i = 1; i <= min; i++)
2456           {
2457           if (eptr >= md->end_subject)
2458             {
2459             SCHECK_PARTIAL();
2460             MRRETURN(MATCH_NOMATCH);
2461             }
2462           c = *eptr++;
2463           if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2464           }
2465         }
2466
2467       /* If max == min we can continue with the main loop without the
2468       need to recurse. */
2469
2470       if (min == max) continue;
2471
2472       /* If minimizing, keep testing the rest of the expression and advancing
2473       the pointer while it matches the class. */
2474
2475       if (minimize)
2476         {
2477 #ifdef SUPPORT_UTF8
2478         /* UTF-8 mode */
2479         if (utf8)
2480           {
2481           for (fi = min;; fi++)
2482             {
2483             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2484             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2485             if (fi >= max) MRRETURN(MATCH_NOMATCH);
2486             if (eptr >= md->end_subject)
2487               {
2488               SCHECK_PARTIAL();
2489               MRRETURN(MATCH_NOMATCH);
2490               }
2491             GETCHARINC(c, eptr);
2492             if (c > 255)
2493               {
2494               if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2495               }
2496             else
2497               {
2498               if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2499               }
2500             }
2501           }
2502         else
2503 #endif
2504         /* Not UTF-8 mode */
2505           {
2506           for (fi = min;; fi++)
2507             {
2508             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2509             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2510             if (fi >= max) MRRETURN(MATCH_NOMATCH);
2511             if (eptr >= md->end_subject)
2512               {
2513               SCHECK_PARTIAL();
2514               MRRETURN(MATCH_NOMATCH);
2515               }
2516             c = *eptr++;
2517             if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2518             }
2519           }
2520         /* Control never gets here */
2521         }
2522
2523       /* If maximizing, find the longest possible run, then work backwards. */
2524
2525       else
2526         {
2527         pp = eptr;
2528
2529 #ifdef SUPPORT_UTF8
2530         /* UTF-8 mode */
2531         if (utf8)
2532           {
2533           for (i = min; i < max; i++)
2534             {
2535             int len = 1;
2536             if (eptr >= md->end_subject)
2537               {
2538               SCHECK_PARTIAL();
2539               break;
2540               }
2541             GETCHARLEN(c, eptr, len);
2542             if (c > 255)
2543               {
2544               if (op == OP_CLASS) break;
2545               }
2546             else
2547               {
2548               if ((data[c/8] & (1 << (c&7))) == 0) break;
2549               }
2550             eptr += len;
2551             }
2552           for (;;)
2553             {
2554             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2555             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2556             if (eptr-- == pp) break;        /* Stop if tried at original pos */
2557             BACKCHAR(eptr);
2558             }
2559           }
2560         else
2561 #endif
2562           /* Not UTF-8 mode */
2563           {
2564           for (i = min; i < max; i++)
2565             {
2566             if (eptr >= md->end_subject)
2567               {
2568               SCHECK_PARTIAL();
2569               break;
2570               }
2571             c = *eptr;
2572             if ((data[c/8] & (1 << (c&7))) == 0) break;
2573             eptr++;
2574             }
2575           while (eptr >= pp)
2576             {
2577             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2578             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2579             eptr--;
2580             }
2581           }
2582
2583         MRRETURN(MATCH_NOMATCH);
2584         }
2585       }
2586     /* Control never gets here */
2587
2588
2589     /* Match an extended character class. This opcode is encountered only
2590     when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2591     mode, because Unicode properties are supported in non-UTF-8 mode. */
2592
2593 #ifdef SUPPORT_UTF8
2594     case OP_XCLASS:
2595       {
2596       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
2597       ecode += GET(ecode, 1);                      /* Advance past the item */
2598
2599       switch (*ecode)
2600         {
2601         case OP_CRSTAR:
2602         case OP_CRMINSTAR:
2603         case OP_CRPLUS:
2604         case OP_CRMINPLUS:
2605         case OP_CRQUERY:
2606         case OP_CRMINQUERY:
2607         c = *ecode++ - OP_CRSTAR;
2608         minimize = (c & 1) != 0;
2609         min = rep_min[c];                 /* Pick up values from tables; */
2610         max = rep_max[c];                 /* zero for max => infinity */
2611         if (max == 0) max = INT_MAX;
2612         break;
2613
2614         case OP_CRRANGE:
2615         case OP_CRMINRANGE:
2616         minimize = (*ecode == OP_CRMINRANGE);
2617         min = GET2(ecode, 1);
2618         max = GET2(ecode, 3);
2619         if (max == 0) max = INT_MAX;
2620         ecode += 5;
2621         break;
2622
2623         default:               /* No repeat follows */
2624         min = max = 1;
2625         break;
2626         }
2627
2628       /* First, ensure the minimum number of matches are present. */
2629
2630       for (i = 1; i <= min; i++)
2631         {
2632         if (eptr >= md->end_subject)
2633           {
2634           SCHECK_PARTIAL();
2635           MRRETURN(MATCH_NOMATCH);
2636           }
2637         GETCHARINCTEST(c, eptr);
2638         if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2639         }
2640
2641       /* If max == min we can continue with the main loop without the
2642       need to recurse. */
2643
2644       if (min == max) continue;
2645
2646       /* If minimizing, keep testing the rest of the expression and advancing
2647       the pointer while it matches the class. */
2648
2649       if (minimize)
2650         {
2651         for (fi = min;; fi++)
2652           {
2653           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2654           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2655           if (fi >= max) MRRETURN(MATCH_NOMATCH);
2656           if (eptr >= md->end_subject)
2657             {
2658             SCHECK_PARTIAL();
2659             MRRETURN(MATCH_NOMATCH);
2660             }
2661           GETCHARINCTEST(c, eptr);
2662           if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2663           }
2664         /* Control never gets here */
2665         }
2666
2667       /* If maximizing, find the longest possible run, then work backwards. */
2668
2669       else
2670         {
2671         pp = eptr;
2672         for (i = min; i < max; i++)
2673           {
2674           int len = 1;
2675           if (eptr >= md->end_subject)
2676             {
2677             SCHECK_PARTIAL();
2678             break;
2679             }
2680           GETCHARLENTEST(c, eptr, len);
2681           if (!_pcre_xclass(c, data)) break;
2682           eptr += len;
2683           }
2684         for(;;)
2685           {
2686           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2687           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2688           if (eptr-- == pp) break;        /* Stop if tried at original pos */
2689           if (utf8) BACKCHAR(eptr);
2690           }
2691         MRRETURN(MATCH_NOMATCH);
2692         }
2693
2694       /* Control never gets here */
2695       }
2696 #endif    /* End of XCLASS */
2697
2698     /* Match a single character, casefully */
2699
2700     case OP_CHAR:
2701 #ifdef SUPPORT_UTF8
2702     if (utf8)
2703       {
2704       length = 1;
2705       ecode++;
2706       GETCHARLEN(fc, ecode, length);
2707       if (length > md->end_subject - eptr)
2708         {
2709         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2710         MRRETURN(MATCH_NOMATCH);
2711         }
2712       while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2713       }
2714     else
2715 #endif
2716
2717     /* Non-UTF-8 mode */
2718       {
2719       if (md->end_subject - eptr < 1)
2720         {
2721         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2722         MRRETURN(MATCH_NOMATCH);
2723         }
2724       if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2725       ecode += 2;
2726       }
2727     break;
2728
2729     /* Match a single character, caselessly */
2730
2731     case OP_CHARNC:
2732 #ifdef SUPPORT_UTF8
2733     if (utf8)
2734       {
2735       length = 1;
2736       ecode++;
2737       GETCHARLEN(fc, ecode, length);
2738
2739       if (length > md->end_subject - eptr)
2740         {
2741         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2742         MRRETURN(MATCH_NOMATCH);
2743         }
2744
2745       /* If the pattern character's value is < 128, we have only one byte, and
2746       can use the fast lookup table. */
2747
2748       if (fc < 128)
2749         {
2750         if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2751         }
2752
2753       /* Otherwise we must pick up the subject character */
2754
2755       else
2756         {
2757         unsigned int dc;
2758         GETCHARINC(dc, eptr);
2759         ecode += length;
2760
2761         /* If we have Unicode property support, we can use it to test the other
2762         case of the character, if there is one. */
2763
2764         if (fc != dc)
2765           {
2766 #ifdef SUPPORT_UCP
2767           if (dc != UCD_OTHERCASE(fc))
2768 #endif
2769             MRRETURN(MATCH_NOMATCH);
2770           }
2771         }
2772       }
2773     else
2774 #endif   /* SUPPORT_UTF8 */
2775
2776     /* Non-UTF-8 mode */
2777       {
2778       if (md->end_subject - eptr < 1)
2779         {
2780         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2781         MRRETURN(MATCH_NOMATCH);
2782         }
2783       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2784       ecode += 2;
2785       }
2786     break;
2787
2788     /* Match a single character repeatedly. */
2789
2790     case OP_EXACT:
2791     min = max = GET2(ecode, 1);
2792     ecode += 3;
2793     goto REPEATCHAR;
2794
2795     case OP_POSUPTO:
2796     possessive = TRUE;
2797     /* Fall through */
2798
2799     case OP_UPTO:
2800     case OP_MINUPTO:
2801     min = 0;
2802     max = GET2(ecode, 1);
2803     minimize = *ecode == OP_MINUPTO;
2804     ecode += 3;
2805     goto REPEATCHAR;
2806
2807     case OP_POSSTAR:
2808     possessive = TRUE;
2809     min = 0;
2810     max = INT_MAX;
2811     ecode++;
2812     goto REPEATCHAR;
2813
2814     case OP_POSPLUS:
2815     possessive = TRUE;
2816     min = 1;
2817     max = INT_MAX;
2818     ecode++;
2819     goto REPEATCHAR;
2820
2821     case OP_POSQUERY:
2822     possessive = TRUE;
2823     min = 0;
2824     max = 1;
2825     ecode++;
2826     goto REPEATCHAR;
2827
2828     case OP_STAR:
2829     case OP_MINSTAR:
2830     case OP_PLUS:
2831     case OP_MINPLUS:
2832     case OP_QUERY:
2833     case OP_MINQUERY:
2834     c = *ecode++ - OP_STAR;
2835     minimize = (c & 1) != 0;
2836
2837     min = rep_min[c];                 /* Pick up values from tables; */
2838     max = rep_max[c];                 /* zero for max => infinity */
2839     if (max == 0) max = INT_MAX;
2840
2841     /* Common code for all repeated single-character matches. */
2842
2843     REPEATCHAR:
2844 #ifdef SUPPORT_UTF8
2845     if (utf8)
2846       {
2847       length = 1;
2848       charptr = ecode;
2849       GETCHARLEN(fc, ecode, length);
2850       ecode += length;
2851
2852       /* Handle multibyte character matching specially here. There is
2853       support for caseless matching if UCP support is present. */
2854
2855       if (length > 1)
2856         {
2857 #ifdef SUPPORT_UCP
2858         unsigned int othercase;
2859         if ((ims & PCRE_CASELESS) != 0 &&
2860             (othercase = UCD_OTHERCASE(fc)) != fc)
2861           oclength = _pcre_ord2utf8(othercase, occhars);
2862         else oclength = 0;
2863 #endif  /* SUPPORT_UCP */
2864
2865         for (i = 1; i <= min; i++)
2866           {
2867           if (eptr <= md->end_subject - length &&
2868             memcmp(eptr, charptr, length) == 0) eptr += length;
2869 #ifdef SUPPORT_UCP
2870           else if (oclength > 0 &&
2871                    eptr <= md->end_subject - oclength &&
2872                    memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2873 #endif  /* SUPPORT_UCP */
2874           else
2875             {
2876             CHECK_PARTIAL();
2877             MRRETURN(MATCH_NOMATCH);
2878             }
2879           }
2880
2881         if (min == max) continue;
2882
2883         if (minimize)
2884           {
2885           for (fi = min;; fi++)
2886             {
2887             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2888             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2889             if (fi >= max) MRRETURN(MATCH_NOMATCH);
2890             if (eptr <= md->end_subject - length &&
2891               memcmp(eptr, charptr, length) == 0) eptr += length;
2892 #ifdef SUPPORT_UCP
2893             else if (oclength > 0 &&
2894                      eptr <= md->end_subject - oclength &&
2895                      memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2896 #endif  /* SUPPORT_UCP */
2897             else
2898               {
2899               CHECK_PARTIAL();
2900               MRRETURN(MATCH_NOMATCH);
2901               }
2902             }
2903           /* Control never gets here */
2904           }
2905
2906         else  /* Maximize */
2907           {
2908           pp = eptr;
2909           for (i = min; i < max; i++)
2910             {
2911             if (eptr <= md->end_subject - length &&
2912                 memcmp(eptr, charptr, length) == 0) eptr += length;
2913 #ifdef SUPPORT_UCP
2914             else if (oclength > 0 &&
2915                      eptr <= md->end_subject - oclength &&
2916                      memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2917 #endif  /* SUPPORT_UCP */
2918             else
2919               {
2920               CHECK_PARTIAL();
2921               break;
2922               }
2923             }
2924
2925           if (possessive) continue;
2926
2927           for(;;)
2928             {
2929             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2930             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2931             if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2932 #ifdef SUPPORT_UCP
2933             eptr--;
2934             BACKCHAR(eptr);
2935 #else   /* without SUPPORT_UCP */
2936             eptr -= length;
2937 #endif  /* SUPPORT_UCP */
2938             }
2939           }
2940         /* Control never gets here */
2941         }
2942
2943       /* If the length of a UTF-8 character is 1, we fall through here, and
2944       obey the code as for non-UTF-8 characters below, though in this case the
2945       value of fc will always be < 128. */
2946       }
2947     else
2948 #endif  /* SUPPORT_UTF8 */
2949
2950     /* When not in UTF-8 mode, load a single-byte character. */
2951
2952     fc = *ecode++;
2953
2954     /* The value of fc at this point is always less than 256, though we may or
2955     may not be in UTF-8 mode. The code is duplicated for the caseless and
2956     caseful cases, for speed, since matching characters is likely to be quite
2957     common. First, ensure the minimum number of matches are present. If min =
2958     max, continue at the same level without recursing. Otherwise, if
2959     minimizing, keep trying the rest of the expression and advancing one
2960     matching character if failing, up to the maximum. Alternatively, if
2961     maximizing, find the maximum number of characters and work backwards. */
2962
2963     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2964       max, eptr));
2965
2966     if ((ims & PCRE_CASELESS) != 0)
2967       {
2968       fc = md->lcc[fc];
2969       for (i = 1; i <= min; i++)
2970         {
2971         if (eptr >= md->end_subject)
2972           {
2973           SCHECK_PARTIAL();
2974           MRRETURN(MATCH_NOMATCH);
2975           }
2976         if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2977         }
2978       if (min == max) continue;
2979       if (minimize)
2980         {
2981         for (fi = min;; fi++)
2982           {
2983           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2984           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2985           if (fi >= max) MRRETURN(MATCH_NOMATCH);
2986           if (eptr >= md->end_subject)
2987             {
2988             SCHECK_PARTIAL();
2989             MRRETURN(MATCH_NOMATCH);
2990             }
2991           if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2992           }
2993         /* Control never gets here */
2994         }
2995       else  /* Maximize */
2996         {
2997         pp = eptr;
2998         for (i = min; i < max; i++)
2999           {
3000           if (eptr >= md->end_subject)
3001             {
3002             SCHECK_PARTIAL();
3003             break;
3004             }
3005           if (fc != md->lcc[*eptr]) break;
3006           eptr++;
3007           }
3008
3009         if (possessive) continue;
3010
3011         while (eptr >= pp)
3012           {
3013           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
3014           eptr--;
3015           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3016           }
3017         MRRETURN(MATCH_NOMATCH);
3018         }
3019       /* Control never gets here */
3020       }
3021
3022     /* Caseful comparisons (includes all multi-byte characters) */
3023
3024     else
3025       {
3026       for (i = 1; i <= min; i++)
3027         {
3028         if (eptr >= md->end_subject)
3029           {
3030           SCHECK_PARTIAL();
3031           MRRETURN(MATCH_NOMATCH);
3032           }
3033         if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3034         }
3035
3036       if (min == max) continue;
3037
3038       if (minimize)
3039         {
3040         for (fi = min;; fi++)
3041           {
3042           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
3043           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3044           if (fi >= max) MRRETURN(MATCH_NOMATCH);
3045           if (eptr >= md->end_subject)
3046             {
3047             SCHECK_PARTIAL();
3048             MRRETURN(MATCH_NOMATCH);
3049             }
3050           if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3051           }
3052         /* Control never gets here */
3053         }
3054       else  /* Maximize */
3055         {
3056         pp = eptr;
3057         for (i = min; i < max; i++)
3058           {
3059           if (eptr >= md->end_subject)
3060             {
3061             SCHECK_PARTIAL();
3062             break;
3063             }
3064           if (fc != *eptr) break;
3065           eptr++;
3066           }
3067         if (possessive) continue;
3068
3069         while (eptr >= pp)
3070           {
3071           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
3072           eptr--;
3073           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3074           }
3075         MRRETURN(MATCH_NOMATCH);
3076         }
3077       }
3078     /* Control never gets here */
3079
3080     /* Match a negated single one-byte character. The character we are
3081     checking can be multibyte. */
3082
3083     case OP_NOT:
3084     if (eptr >= md->end_subject)
3085       {
3086       SCHECK_PARTIAL();
3087       MRRETURN(MATCH_NOMATCH);
3088       }
3089     ecode++;
3090     GETCHARINCTEST(c, eptr);
3091     if ((ims & PCRE_CASELESS) != 0)
3092       {
3093 #ifdef SUPPORT_UTF8
3094       if (c < 256)
3095 #endif
3096       c = md->lcc[c];
3097       if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3098       }
3099     else
3100       {
3101       if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3102       }
3103     break;
3104
3105     /* Match a negated single one-byte character repeatedly. This is almost a
3106     repeat of the code for a repeated single character, but I haven't found a
3107     nice way of commoning these up that doesn't require a test of the
3108     positive/negative option for each character match. Maybe that wouldn't add
3109     very much to the time taken, but character matching *is* what this is all
3110     about... */
3111
3112     case OP_NOTEXACT:
3113     min = max = GET2(ecode, 1);
3114     ecode += 3;
3115     goto REPEATNOTCHAR;
3116
3117     case OP_NOTUPTO:
3118     case OP_NOTMINUPTO:
3119     min = 0;
3120     max = GET2(ecode, 1);
3121     minimize = *ecode == OP_NOTMINUPTO;
3122     ecode += 3;
3123     goto REPEATNOTCHAR;
3124
3125     case OP_NOTPOSSTAR:
3126     possessive = TRUE;
3127     min = 0;
3128     max = INT_MAX;
3129     ecode++;
3130     goto REPEATNOTCHAR;
3131
3132     case OP_NOTPOSPLUS:
3133     possessive = TRUE;
3134     min = 1;
3135     max = INT_MAX;
3136     ecode++;
3137     goto REPEATNOTCHAR;
3138
3139     case OP_NOTPOSQUERY:
3140     possessive = TRUE;
3141     min = 0;
3142     max = 1;
3143     ecode++;
3144     goto REPEATNOTCHAR;
3145
3146     case OP_NOTPOSUPTO:
3147     possessive = TRUE;
3148     min = 0;
3149     max = GET2(ecode, 1);
3150     ecode += 3;
3151     goto REPEATNOTCHAR;
3152
3153     case OP_NOTSTAR:
3154     case OP_NOTMINSTAR:
3155     case OP_NOTPLUS:
3156     case OP_NOTMINPLUS:
3157     case OP_NOTQUERY:
3158     case OP_NOTMINQUERY:
3159     c = *ecode++ - OP_NOTSTAR;
3160     minimize = (c & 1) != 0;
3161     min = rep_min[c];                 /* Pick up values from tables; */
3162     max = rep_max[c];                 /* zero for max => infinity */
3163     if (max == 0) max = INT_MAX;
3164
3165     /* Common code for all repeated single-byte matches. */
3166
3167     REPEATNOTCHAR:
3168     fc = *ecode++;
3169
3170     /* The code is duplicated for the caseless and caseful cases, for speed,
3171     since matching characters is likely to be quite common. First, ensure the
3172     minimum number of matches are present. If min = max, continue at the same
3173     level without recursing. Otherwise, if minimizing, keep trying the rest of
3174     the expression and advancing one matching character if failing, up to the
3175     maximum. Alternatively, if maximizing, find the maximum number of
3176     characters and work backwards. */
3177
3178     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3179       max, eptr));
3180
3181     if ((ims & PCRE_CASELESS) != 0)
3182       {
3183       fc = md->lcc[fc];
3184
3185 #ifdef SUPPORT_UTF8
3186       /* UTF-8 mode */
3187       if (utf8)
3188         {
3189         register unsigned int d;
3190         for (i = 1; i <= min; i++)
3191           {
3192           if (eptr >= md->end_subject)
3193             {
3194             SCHECK_PARTIAL();
3195             MRRETURN(MATCH_NOMATCH);
3196             }
3197           GETCHARINC(d, eptr);
3198           if (d < 256) d = md->lcc[d];
3199           if (fc == d) MRRETURN(MATCH_NOMATCH);
3200           }
3201         }
3202       else
3203 #endif
3204
3205       /* Not UTF-8 mode */
3206         {
3207         for (i = 1; i <= min; i++)
3208           {
3209           if (eptr >= md->end_subject)
3210             {
3211             SCHECK_PARTIAL();
3212             MRRETURN(MATCH_NOMATCH);
3213             }
3214           if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3215           }
3216         }
3217
3218       if (min == max) continue;
3219
3220       if (minimize)
3221         {
3222 #ifdef SUPPORT_UTF8
3223         /* UTF-8 mode */
3224         if (utf8)
3225           {
3226           register unsigned int d;
3227           for (fi = min;; fi++)
3228             {
3229             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3230             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3231             if (fi >= max) MRRETURN(MATCH_NOMATCH);
3232             if (eptr >= md->end_subject)
3233               {
3234               SCHECK_PARTIAL();
3235               MRRETURN(MATCH_NOMATCH);
3236               }
3237             GETCHARINC(d, eptr);
3238             if (d < 256) d = md->lcc[d];
3239             if (fc == d) MRRETURN(MATCH_NOMATCH);
3240             }
3241           }
3242         else
3243 #endif
3244         /* Not UTF-8 mode */
3245           {
3246           for (fi = min;; fi++)
3247             {
3248             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3249             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3250             if (fi >= max) MRRETURN(MATCH_NOMATCH);
3251             if (eptr >= md->end_subject)
3252               {
3253               SCHECK_PARTIAL();
3254               MRRETURN(MATCH_NOMATCH);
3255               }
3256             if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3257             }
3258           }
3259         /* Control never gets here */
3260         }
3261
3262       /* Maximize case */
3263
3264       else
3265         {
3266         pp = eptr;
3267
3268 #ifdef SUPPORT_UTF8
3269         /* UTF-8 mode */
3270         if (utf8)
3271           {
3272           register unsigned int d;
3273           for (i = min; i < max; i++)
3274             {
3275             int len = 1;
3276             if (eptr >= md->end_subject)
3277               {
3278               SCHECK_PARTIAL();
3279               break;
3280               }
3281             GETCHARLEN(d, eptr, len);
3282             if (d < 256) d = md->lcc[d];
3283             if (fc == d) break;
3284             eptr += len;
3285             }
3286         if (possessive) continue;
3287         for(;;)
3288             {
3289             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3290             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3291             if (eptr-- == pp) break;        /* Stop if tried at original pos */
3292             BACKCHAR(eptr);
3293             }
3294           }
3295         else
3296 #endif
3297         /* Not UTF-8 mode */
3298           {
3299           for (i = min; i < max; i++)
3300             {
3301             if (eptr >= md->end_subject)
3302               {
3303               SCHECK_PARTIAL();
3304               break;
3305               }
3306             if (fc == md->lcc[*eptr]) break;
3307             eptr++;
3308             }
3309           if (possessive) continue;
3310           while (eptr >= pp)
3311             {
3312             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3313             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3314             eptr--;
3315             }
3316           }
3317
3318         MRRETURN(MATCH_NOMATCH);
3319         }
3320       /* Control never gets here */
3321       }
3322
3323     /* Caseful comparisons */
3324
3325     else
3326       {
3327 #ifdef SUPPORT_UTF8
3328       /* UTF-8 mode */
3329       if (utf8)
3330         {
3331         register unsigned int d;
3332         for (i = 1; i <= min; i++)
3333           {
3334           if (eptr >= md->end_subject)
3335             {
3336             SCHECK_PARTIAL();
3337             MRRETURN(MATCH_NOMATCH);
3338             }
3339           GETCHARINC(d, eptr);
3340           if (fc == d) MRRETURN(MATCH_NOMATCH);
3341           }
3342         }
3343       else
3344 #endif
3345       /* Not UTF-8 mode */
3346         {
3347         for (i = 1; i <= min; i++)
3348           {
3349           if (eptr >= md->end_subject)
3350             {
3351             SCHECK_PARTIAL();
3352             MRRETURN(MATCH_NOMATCH);
3353             }
3354           if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3355           }
3356         }
3357
3358       if (min == max) continue;
3359
3360       if (minimize)
3361         {
3362 #ifdef SUPPORT_UTF8
3363         /* UTF-8 mode */
3364         if (utf8)
3365           {
3366           register unsigned int d;
3367           for (fi = min;; fi++)
3368             {
3369             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3370             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3371             if (fi >= max) MRRETURN(MATCH_NOMATCH);
3372             if (eptr >= md->end_subject)
3373               {
3374               SCHECK_PARTIAL();
3375               MRRETURN(MATCH_NOMATCH);
3376               }
3377             GETCHARINC(d, eptr);
3378             if (fc == d) MRRETURN(MATCH_NOMATCH);
3379             }
3380           }
3381         else
3382 #endif
3383         /* Not UTF-8 mode */
3384           {
3385           for (fi = min;; fi++)
3386             {
3387             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3388             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3389             if (fi >= max) MRRETURN(MATCH_NOMATCH);
3390             if (eptr >= md->end_subject)
3391               {
3392               SCHECK_PARTIAL();
3393               MRRETURN(MATCH_NOMATCH);
3394               }
3395             if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3396             }
3397           }
3398         /* Control never gets here */
3399         }
3400
3401       /* Maximize case */
3402
3403       else
3404         {
3405         pp = eptr;
3406
3407 #ifdef SUPPORT_UTF8
3408         /* UTF-8 mode */
3409         if (utf8)
3410           {
3411           register unsigned int d;
3412           for (i = min; i < max; i++)
3413             {
3414             int len = 1;
3415             if (eptr >= md->end_subject)
3416               {
3417               SCHECK_PARTIAL();
3418               break;
3419               }
3420             GETCHARLEN(d, eptr, len);
3421             if (fc == d) break;
3422             eptr += len;
3423             }
3424           if (possessive) continue;
3425           for(;;)
3426             {
3427             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3428             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3429             if (eptr-- == pp) break;        /* Stop if tried at original pos */
3430             BACKCHAR(eptr);
3431             }
3432           }
3433         else
3434 #endif
3435         /* Not UTF-8 mode */
3436           {
3437           for (i = min; i < max; i++)
3438             {
3439             if (eptr >= md->end_subject)
3440               {
3441               SCHECK_PARTIAL();
3442               break;
3443               }
3444             if (fc == *eptr) break;
3445             eptr++;
3446             }
3447           if (possessive) continue;
3448           while (eptr >= pp)
3449             {
3450             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3451             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3452             eptr--;
3453             }
3454           }
3455
3456         MRRETURN(MATCH_NOMATCH);
3457         }
3458       }
3459     /* Control never gets here */
3460
3461     /* Match a single character type repeatedly; several different opcodes
3462     share code. This is very similar to the code for single characters, but we
3463     repeat it in the interests of efficiency. */
3464
3465     case OP_TYPEEXACT:
3466     min = max = GET2(ecode, 1);
3467     minimize = TRUE;
3468     ecode += 3;
3469     goto REPEATTYPE;
3470
3471     case OP_TYPEUPTO:
3472     case OP_TYPEMINUPTO:
3473     min = 0;
3474     max = GET2(ecode, 1);
3475     minimize = *ecode == OP_TYPEMINUPTO;
3476     ecode += 3;
3477     goto REPEATTYPE;
3478
3479     case OP_TYPEPOSSTAR:
3480     possessive = TRUE;
3481     min = 0;
3482     max = INT_MAX;
3483     ecode++;
3484     goto REPEATTYPE;
3485
3486     case OP_TYPEPOSPLUS:
3487     possessive = TRUE;
3488     min = 1;
3489     max = INT_MAX;
3490     ecode++;
3491     goto REPEATTYPE;
3492
3493     case OP_TYPEPOSQUERY:
3494     possessive = TRUE;
3495     min = 0;
3496     max = 1;
3497     ecode++;
3498     goto REPEATTYPE;
3499
3500     case OP_TYPEPOSUPTO:
3501     possessive = TRUE;
3502     min = 0;
3503     max = GET2(ecode, 1);
3504     ecode += 3;
3505     goto REPEATTYPE;
3506
3507     case OP_TYPESTAR:
3508     case OP_TYPEMINSTAR:
3509     case OP_TYPEPLUS:
3510     case OP_TYPEMINPLUS:
3511     case OP_TYPEQUERY:
3512     case OP_TYPEMINQUERY:
3513     c = *ecode++ - OP_TYPESTAR;
3514     minimize = (c & 1) != 0;
3515     min = rep_min[c];                 /* Pick up values from tables; */
3516     max = rep_max[c];                 /* zero for max => infinity */
3517     if (max == 0) max = INT_MAX;
3518
3519     /* Common code for all repeated single character type matches. Note that
3520     in UTF-8 mode, '.' matches a character of any length, but for the other
3521     character types, the valid characters are all one-byte long. */
3522
3523     REPEATTYPE:
3524     ctype = *ecode++;      /* Code for the character type */
3525
3526 #ifdef SUPPORT_UCP
3527     if (ctype == OP_PROP || ctype == OP_NOTPROP)
3528       {
3529       prop_fail_result = ctype == OP_NOTPROP;
3530       prop_type = *ecode++;
3531       prop_value = *ecode++;
3532       }
3533     else prop_type = -1;
3534 #endif
3535
3536     /* First, ensure the minimum number of matches are present. Use inline
3537     code for maximizing the speed, and do the type test once at the start
3538     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3539     is tidier. Also separate the UCP code, which can be the same for both UTF-8
3540     and single-bytes. */
3541
3542     if (min > 0)
3543       {
3544 #ifdef SUPPORT_UCP
3545       if (prop_type >= 0)
3546         {
3547         switch(prop_type)
3548           {
3549           case PT_ANY:
3550           if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3551           for (i = 1; i <= min; i++)
3552             {
3553             if (eptr >= md->end_subject)
3554               {
3555               SCHECK_PARTIAL();
3556               MRRETURN(MATCH_NOMATCH);
3557               }
3558             GETCHARINCTEST(c, eptr);
3559             }
3560           break;
3561
3562           case PT_LAMP:
3563           for (i = 1; i <= min; i++)
3564             {
3565             if (eptr >= md->end_subject)
3566               {
3567               SCHECK_PARTIAL();
3568               MRRETURN(MATCH_NOMATCH);
3569               }
3570             GETCHARINCTEST(c, eptr);
3571             prop_chartype = UCD_CHARTYPE(c);
3572             if ((prop_chartype == ucp_Lu ||
3573                  prop_chartype == ucp_Ll ||
3574                  prop_chartype == ucp_Lt) == prop_fail_result)
3575               MRRETURN(MATCH_NOMATCH);
3576             }
3577           break;
3578
3579           case PT_GC:
3580           for (i = 1; i <= min; i++)
3581             {
3582             if (eptr >= md->end_subject)
3583               {
3584               SCHECK_PARTIAL();
3585               MRRETURN(MATCH_NOMATCH);
3586               }
3587             GETCHARINCTEST(c, eptr);
3588             prop_category = UCD_CATEGORY(c);
3589             if ((prop_category == prop_value) == prop_fail_result)
3590               MRRETURN(MATCH_NOMATCH);
3591             }
3592           break;
3593
3594           case PT_PC:
3595           for (i = 1; i <= min; i++)
3596             {
3597             if (eptr >= md->end_subject)
3598               {
3599               SCHECK_PARTIAL();
3600               MRRETURN(MATCH_NOMATCH);
3601               }
3602             GETCHARINCTEST(c, eptr);
3603             prop_chartype = UCD_CHARTYPE(c);
3604             if ((prop_chartype == prop_value) == prop_fail_result)
3605               MRRETURN(MATCH_NOMATCH);
3606             }
3607           break;
3608
3609           case PT_SC:
3610           for (i = 1; i <= min; i++)
3611             {
3612             if (eptr >= md->end_subject)
3613               {
3614               SCHECK_PARTIAL();
3615               MRRETURN(MATCH_NOMATCH);
3616               }
3617             GETCHARINCTEST(c, eptr);
3618             prop_script = UCD_SCRIPT(c);
3619             if ((prop_script == prop_value) == prop_fail_result)
3620               MRRETURN(MATCH_NOMATCH);
3621             }
3622           break;
3623
3624           case PT_ALNUM:
3625           for (i = 1; i <= min; i++)
3626             {
3627             if (eptr >= md->end_subject)
3628               {
3629               SCHECK_PARTIAL();
3630               MRRETURN(MATCH_NOMATCH);
3631               }
3632             GETCHARINCTEST(c, eptr);
3633             prop_category = UCD_CATEGORY(c);
3634             if ((prop_category == ucp_L || prop_category == ucp_N)
3635                    == prop_fail_result)
3636               MRRETURN(MATCH_NOMATCH);
3637             }
3638           break;
3639
3640           case PT_SPACE:    /* Perl space */
3641           for (i = 1; i <= min; i++)
3642             {
3643             if (eptr >= md->end_subject)
3644               {
3645               SCHECK_PARTIAL();
3646               MRRETURN(MATCH_NOMATCH);
3647               }
3648             GETCHARINCTEST(c, eptr);
3649             prop_category = UCD_CATEGORY(c);
3650             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3651                  c == CHAR_FF || c == CHAR_CR)
3652                    == prop_fail_result)
3653               MRRETURN(MATCH_NOMATCH);
3654             }
3655           break;
3656
3657           case PT_PXSPACE:  /* POSIX space */
3658           for (i = 1; i <= min; i++)
3659             {
3660             if (eptr >= md->end_subject)
3661               {
3662               SCHECK_PARTIAL();
3663               MRRETURN(MATCH_NOMATCH);
3664               }
3665             GETCHARINCTEST(c, eptr);
3666             prop_category = UCD_CATEGORY(c);
3667             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3668                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3669                    == prop_fail_result)
3670               MRRETURN(MATCH_NOMATCH);
3671             }
3672           break;
3673
3674           case PT_WORD:
3675           for (i = 1; i <= min; i++)
3676             {
3677             if (eptr >= md->end_subject)
3678               {
3679               SCHECK_PARTIAL();
3680               MRRETURN(MATCH_NOMATCH);
3681               }
3682             GETCHARINCTEST(c, eptr);
3683             prop_category = UCD_CATEGORY(c);
3684             if ((prop_category == ucp_L || prop_category == ucp_N ||
3685                  c == CHAR_UNDERSCORE)
3686                    == prop_fail_result)
3687               MRRETURN(MATCH_NOMATCH);
3688             }
3689           break;
3690
3691           /* This should not occur */
3692
3693           default:
3694           RRETURN(PCRE_ERROR_INTERNAL);
3695           }
3696         }
3697
3698       /* Match extended Unicode sequences. We will get here only if the
3699       support is in the binary; otherwise a compile-time error occurs. */
3700
3701       else if (ctype == OP_EXTUNI)
3702         {
3703         for (i = 1; i <= min; i++)
3704           {
3705           if (eptr >= md->end_subject)
3706             {
3707             SCHECK_PARTIAL();
3708             MRRETURN(MATCH_NOMATCH);
3709             }
3710           GETCHARINCTEST(c, eptr);
3711           prop_category = UCD_CATEGORY(c);
3712           if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3713           while (eptr < md->end_subject)
3714             {
3715             int len = 1;
3716             if (!utf8) c = *eptr;
3717               else { GETCHARLEN(c, eptr, len); }
3718             prop_category = UCD_CATEGORY(c);
3719             if (prop_category != ucp_M) break;
3720             eptr += len;
3721             }
3722           }
3723         }
3724
3725       else
3726 #endif     /* SUPPORT_UCP */
3727
3728 /* Handle all other cases when the coding is UTF-8 */
3729
3730 #ifdef SUPPORT_UTF8
3731       if (utf8) switch(ctype)
3732         {
3733         case OP_ANY:
3734         for (i = 1; i <= min; i++)
3735           {
3736           if (eptr >= md->end_subject)
3737             {
3738             SCHECK_PARTIAL();
3739             MRRETURN(MATCH_NOMATCH);
3740             }
3741           if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3742           eptr++;
3743           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3744           }
3745         break;
3746
3747         case OP_ALLANY:
3748         for (i = 1; i <= min; i++)
3749           {
3750           if (eptr >= md->end_subject)
3751             {
3752             SCHECK_PARTIAL();
3753             MRRETURN(MATCH_NOMATCH);
3754             }
3755           eptr++;
3756           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3757           }
3758         break;
3759
3760         case OP_ANYBYTE:
3761         if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3762         eptr += min;
3763         break;
3764
3765         case OP_ANYNL:
3766         for (i = 1; i <= min; i++)
3767           {
3768           if (eptr >= md->end_subject)
3769             {
3770             SCHECK_PARTIAL();
3771             MRRETURN(MATCH_NOMATCH);
3772             }
3773           GETCHARINC(c, eptr);
3774           switch(c)
3775             {
3776             default: MRRETURN(MATCH_NOMATCH);
3777             case 0x000d:
3778             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3779             break;
3780
3781             case 0x000a:
3782             break;
3783
3784             case 0x000b:
3785             case 0x000c:
3786             case 0x0085:
3787             case 0x2028:
3788             case 0x2029:
3789             if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3790             break;
3791             }
3792           }
3793         break;
3794
3795         case OP_NOT_HSPACE:
3796         for (i = 1; i <= min; i++)
3797           {
3798           if (eptr >= md->end_subject)
3799             {
3800             SCHECK_PARTIAL();
3801             MRRETURN(MATCH_NOMATCH);
3802             }
3803           GETCHARINC(c, eptr);
3804           switch(c)
3805             {
3806             default: break;
3807             case 0x09:      /* HT */
3808             case 0x20:      /* SPACE */
3809             case 0xa0:      /* NBSP */
3810             case 0x1680:    /* OGHAM SPACE MARK */
3811             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3812             case 0x2000:    /* EN QUAD */
3813             case 0x2001:    /* EM QUAD */
3814             case 0x2002:    /* EN SPACE */
3815             case 0x2003:    /* EM SPACE */
3816             case 0x2004:    /* THREE-PER-EM SPACE */
3817             case 0x2005:    /* FOUR-PER-EM SPACE */
3818             case 0x2006:    /* SIX-PER-EM SPACE */
3819             case 0x2007:    /* FIGURE SPACE */
3820             case 0x2008:    /* PUNCTUATION SPACE */
3821             case 0x2009:    /* THIN SPACE */
3822             case 0x200A:    /* HAIR SPACE */
3823             case 0x202f:    /* NARROW NO-BREAK SPACE */
3824             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3825             case 0x3000:    /* IDEOGRAPHIC SPACE */
3826             MRRETURN(MATCH_NOMATCH);
3827             }
3828           }
3829         break;
3830
3831         case OP_HSPACE:
3832         for (i = 1; i <= min; i++)
3833           {
3834           if (eptr >= md->end_subject)
3835             {
3836             SCHECK_PARTIAL();
3837             MRRETURN(MATCH_NOMATCH);
3838             }
3839           GETCHARINC(c, eptr);
3840           switch(c)
3841             {
3842             default: MRRETURN(MATCH_NOMATCH);
3843             case 0x09:      /* HT */
3844             case 0x20:      /* SPACE */
3845             case 0xa0:      /* NBSP */
3846             case 0x1680:    /* OGHAM SPACE MARK */
3847             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3848             case 0x2000:    /* EN QUAD */
3849             case 0x2001:    /* EM QUAD */
3850             case 0x2002:    /* EN SPACE */
3851             case 0x2003:    /* EM SPACE */
3852             case 0x2004:    /* THREE-PER-EM SPACE */
3853             case 0x2005:    /* FOUR-PER-EM SPACE */
3854             case 0x2006:    /* SIX-PER-EM SPACE */
3855             case 0x2007:    /* FIGURE SPACE */
3856             case 0x2008:    /* PUNCTUATION SPACE */
3857             case 0x2009:    /* THIN SPACE */
3858             case 0x200A:    /* HAIR SPACE */
3859             case 0x202f:    /* NARROW NO-BREAK SPACE */
3860             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3861             case 0x3000:    /* IDEOGRAPHIC SPACE */
3862             break;
3863             }
3864           }
3865         break;
3866
3867         case OP_NOT_VSPACE:
3868         for (i = 1; i <= min; i++)
3869           {
3870           if (eptr >= md->end_subject)
3871             {
3872             SCHECK_PARTIAL();
3873             MRRETURN(MATCH_NOMATCH);
3874             }
3875           GETCHARINC(c, eptr);
3876           switch(c)
3877             {
3878             default: break;
3879             case 0x0a:      /* LF */
3880             case 0x0b:      /* VT */
3881             case 0x0c:      /* FF */
3882             case 0x0d:      /* CR */
3883             case 0x85:      /* NEL */
3884             case 0x2028:    /* LINE SEPARATOR */
3885             case 0x2029:    /* PARAGRAPH SEPARATOR */
3886             MRRETURN(MATCH_NOMATCH);
3887             }
3888           }
3889         break;
3890
3891         case OP_VSPACE:
3892         for (i = 1; i <= min; i++)
3893           {
3894           if (eptr >= md->end_subject)
3895             {
3896             SCHECK_PARTIAL();
3897             MRRETURN(MATCH_NOMATCH);
3898             }
3899           GETCHARINC(c, eptr);
3900           switch(c)
3901             {
3902             default: MRRETURN(MATCH_NOMATCH);
3903             case 0x0a:      /* LF */
3904             case 0x0b:      /* VT */
3905             case 0x0c:      /* FF */
3906             case 0x0d:      /* CR */
3907             case 0x85:      /* NEL */
3908             case 0x2028:    /* LINE SEPARATOR */
3909             case 0x2029:    /* PARAGRAPH SEPARATOR */
3910             break;
3911             }
3912           }
3913         break;
3914
3915         case OP_NOT_DIGIT:
3916         for (i = 1; i <= min; i++)
3917           {
3918           if (eptr >= md->end_subject)
3919             {
3920             SCHECK_PARTIAL();
3921             MRRETURN(MATCH_NOMATCH);
3922             }
3923           GETCHARINC(c, eptr);
3924           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3925             MRRETURN(MATCH_NOMATCH);
3926           }
3927         break;
3928
3929         case OP_DIGIT:
3930         for (i = 1; i <= min; i++)
3931           {
3932           if (eptr >= md->end_subject)
3933             {
3934             SCHECK_PARTIAL();
3935             MRRETURN(MATCH_NOMATCH);
3936             }
3937           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3938             MRRETURN(MATCH_NOMATCH);
3939           /* No need to skip more bytes - we know it's a 1-byte character */
3940           }
3941         break;
3942
3943         case OP_NOT_WHITESPACE:
3944         for (i = 1; i <= min; i++)
3945           {
3946           if (eptr >= md->end_subject)
3947             {
3948             SCHECK_PARTIAL();
3949             MRRETURN(MATCH_NOMATCH);
3950             }
3951           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3952             MRRETURN(MATCH_NOMATCH);
3953           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3954           }
3955         break;
3956
3957         case OP_WHITESPACE:
3958         for (i = 1; i <= min; i++)
3959           {
3960           if (eptr >= md->end_subject)
3961             {
3962             SCHECK_PARTIAL();
3963             MRRETURN(MATCH_NOMATCH);
3964             }
3965           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3966             MRRETURN(MATCH_NOMATCH);
3967           /* No need to skip more bytes - we know it's a 1-byte character */
3968           }
3969         break;
3970
3971         case OP_NOT_WORDCHAR:
3972         for (i = 1; i <= min; i++)
3973           {
3974           if (eptr >= md->end_subject)
3975             {
3976             SCHECK_PARTIAL();
3977             MRRETURN(MATCH_NOMATCH);
3978             }
3979           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3980             MRRETURN(MATCH_NOMATCH);
3981           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3982           }
3983         break;
3984
3985         case OP_WORDCHAR:
3986         for (i = 1; i <= min; i++)
3987           {
3988           if (eptr >= md->end_subject)
3989             {
3990             SCHECK_PARTIAL();
3991             MRRETURN(MATCH_NOMATCH);
3992             }
3993           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3994             MRRETURN(MATCH_NOMATCH);
3995           /* No need to skip more bytes - we know it's a 1-byte character */
3996           }
3997         break;
3998
3999         default:
4000         RRETURN(PCRE_ERROR_INTERNAL);
4001         }  /* End switch(ctype) */
4002
4003       else
4004 #endif     /* SUPPORT_UTF8 */
4005
4006       /* Code for the non-UTF-8 case for minimum matching of operators other
4007       than OP_PROP and OP_NOTPROP. */
4008
4009       switch(ctype)
4010         {
4011         case OP_ANY:
4012         for (i = 1; i <= min; i++)
4013           {
4014           if (eptr >= md->end_subject)
4015             {
4016             SCHECK_PARTIAL();
4017             MRRETURN(MATCH_NOMATCH);
4018             }
4019           if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
4020           eptr++;
4021           }
4022         break;
4023
4024         case OP_ALLANY:
4025         if (eptr > md->end_subject - min)
4026           {
4027           SCHECK_PARTIAL();
4028           MRRETURN(MATCH_NOMATCH);
4029           }
4030         eptr += min;
4031         break;
4032
4033         case OP_ANYBYTE:
4034         if (eptr > md->end_subject - min)
4035           {
4036           SCHECK_PARTIAL();
4037           MRRETURN(MATCH_NOMATCH);
4038           }
4039         eptr += min;
4040         break;
4041
4042         case OP_ANYNL:
4043         for (i = 1; i <= min; i++)
4044           {
4045           if (eptr >= md->end_subject)
4046             {
4047             SCHECK_PARTIAL();
4048             MRRETURN(MATCH_NOMATCH);
4049             }
4050           switch(*eptr++)
4051             {
4052             default: MRRETURN(MATCH_NOMATCH);
4053             case 0x000d:
4054             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4055             break;
4056             case 0x000a:
4057             break;
4058
4059             case 0x000b:
4060             case 0x000c:
4061             case 0x0085:
4062             if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4063             break;
4064             }
4065           }
4066         break;
4067
4068         case OP_NOT_HSPACE:
4069         for (i = 1; i <= min; i++)
4070           {
4071           if (eptr >= md->end_subject)
4072             {
4073             SCHECK_PARTIAL();
4074             MRRETURN(MATCH_NOMATCH);
4075             }
4076           switch(*eptr++)
4077             {
4078             default: break;
4079             case 0x09:      /* HT */
4080             case 0x20:      /* SPACE */
4081             case 0xa0:      /* NBSP */
4082             MRRETURN(MATCH_NOMATCH);
4083             }
4084           }
4085         break;
4086
4087         case OP_HSPACE:
4088         for (i = 1; i <= min; i++)
4089           {
4090           if (eptr >= md->end_subject)
4091             {
4092             SCHECK_PARTIAL();
4093             MRRETURN(MATCH_NOMATCH);
4094             }
4095           switch(*eptr++)
4096             {
4097             default: MRRETURN(MATCH_NOMATCH);
4098             case 0x09:      /* HT */
4099             case 0x20:      /* SPACE */
4100             case 0xa0:      /* NBSP */
4101             break;
4102             }
4103           }
4104         break;
4105
4106         case OP_NOT_VSPACE:
4107         for (i = 1; i <= min; i++)
4108           {
4109           if (eptr >= md->end_subject)
4110             {
4111             SCHECK_PARTIAL();
4112             MRRETURN(MATCH_NOMATCH);
4113             }
4114           switch(*eptr++)
4115             {
4116             default: break;
4117             case 0x0a:      /* LF */
4118             case 0x0b:      /* VT */
4119             case 0x0c:      /* FF */
4120             case 0x0d:      /* CR */
4121             case 0x85:      /* NEL */
4122             MRRETURN(MATCH_NOMATCH);
4123             }
4124           }
4125         break;
4126
4127         case OP_VSPACE:
4128         for (i = 1; i <= min; i++)
4129           {
4130           if (eptr >= md->end_subject)
4131             {
4132             SCHECK_PARTIAL();
4133             MRRETURN(MATCH_NOMATCH);
4134             }
4135           switch(*eptr++)
4136             {
4137             default: MRRETURN(MATCH_NOMATCH);
4138             case 0x0a:      /* LF */
4139             case 0x0b:      /* VT */
4140             case 0x0c:      /* FF */
4141             case 0x0d:      /* CR */
4142             case 0x85:      /* NEL */
4143             break;
4144             }
4145           }
4146         break;
4147
4148         case OP_NOT_DIGIT:
4149         for (i = 1; i <= min; i++)
4150           {
4151           if (eptr >= md->end_subject)
4152             {
4153             SCHECK_PARTIAL();
4154             MRRETURN(MATCH_NOMATCH);
4155             }
4156           if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
4157           }
4158         break;
4159
4160         case OP_DIGIT:
4161         for (i = 1; i <= min; i++)
4162           {
4163           if (eptr >= md->end_subject)
4164             {
4165             SCHECK_PARTIAL();
4166             MRRETURN(MATCH_NOMATCH);
4167             }
4168           if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
4169           }
4170         break;
4171
4172         case OP_NOT_WHITESPACE:
4173         for (i = 1; i <= min; i++)
4174           {
4175           if (eptr >= md->end_subject)
4176             {
4177             SCHECK_PARTIAL();
4178             MRRETURN(MATCH_NOMATCH);
4179             }
4180           if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
4181           }
4182         break;
4183
4184         case OP_WHITESPACE:
4185         for (i = 1; i <= min; i++)
4186           {
4187           if (eptr >= md->end_subject)
4188             {
4189             SCHECK_PARTIAL();
4190             MRRETURN(MATCH_NOMATCH);
4191             }
4192           if ((md->ctypes[*eptr++] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
4193           }
4194         break;
4195
4196         case OP_NOT_WORDCHAR:
4197         for (i = 1; i <= min; i++)
4198           {
4199           if (eptr >= md->end_subject)
4200             {
4201             SCHECK_PARTIAL();
4202             MRRETURN(MATCH_NOMATCH);
4203             }
4204           if ((md->ctypes[*eptr++] & ctype_word) != 0)
4205             MRRETURN(MATCH_NOMATCH);
4206           }
4207         break;
4208
4209         case OP_WORDCHAR:
4210         for (i = 1; i <= min; i++)
4211           {
4212           if (eptr >= md->end_subject)
4213             {
4214             SCHECK_PARTIAL();
4215             MRRETURN(MATCH_NOMATCH);
4216             }
4217           if ((md->ctypes[*eptr++] & ctype_word) == 0)
4218             MRRETURN(MATCH_NOMATCH);
4219           }
4220         break;
4221
4222         default:
4223         RRETURN(PCRE_ERROR_INTERNAL);
4224         }
4225       }
4226
4227     /* If min = max, continue at the same level without recursing */
4228
4229     if (min == max) continue;
4230
4231     /* If minimizing, we have to test the rest of the pattern before each
4232     subsequent match. Again, separate the UTF-8 case for speed, and also
4233     separate the UCP cases. */
4234
4235     if (minimize)
4236       {
4237 #ifdef SUPPORT_UCP
4238       if (prop_type >= 0)
4239         {
4240         switch(prop_type)
4241           {
4242           case PT_ANY:
4243           for (fi = min;; fi++)
4244             {
4245             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
4246             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4247             if (fi >= max) MRRETURN(MATCH_NOMATCH);
4248             if (eptr >= md->end_subject)
4249               {
4250               SCHECK_PARTIAL();
4251               MRRETURN(MATCH_NOMATCH);
4252               }
4253             GETCHARINCTEST(c, eptr);
4254             if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
4255             }
4256           /* Control never gets here */
4257
4258           case PT_LAMP:
4259           for (fi = min;; fi++)
4260             {
4261             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
4262             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4263             if (fi >= max) MRRETURN(MATCH_NOMATCH);
4264             if (eptr >= md->end_subject)
4265               {
4266               SCHECK_PARTIAL();
4267               MRRETURN(MATCH_NOMATCH);
4268               }
4269             GETCHARINCTEST(c, eptr);
4270             prop_chartype = UCD_CHARTYPE(c);
4271             if ((prop_chartype == ucp_Lu ||
4272                  prop_chartype == ucp_Ll ||
4273                  prop_chartype == ucp_Lt) == prop_fail_result)
4274               MRRETURN(MATCH_NOMATCH);
4275             }
4276           /* Control never gets here */
4277
4278           case PT_GC:
4279           for (fi = min;; fi++)
4280             {
4281             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
4282             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4283             if (fi >= max) MRRETURN(MATCH_NOMATCH);
4284             if (eptr >= md->end_subject)
4285               {
4286               SCHECK_PARTIAL();
4287               MRRETURN(MATCH_NOMATCH);
4288               }
4289             GETCHARINCTEST(c, eptr);
4290             prop_category = UCD_CATEGORY(c);
4291             if ((prop_category == prop_value) == prop_fail_result)
4292               MRRETURN(MATCH_NOMATCH);
4293             }
4294           /* Control never gets here */
4295
4296           case PT_PC:
4297           for (fi = min;; fi++)
4298             {
4299             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
4300             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4301             if (fi >= max) MRRETURN(MATCH_NOMATCH);
4302             if (eptr >= md->end_subject)
4303               {
4304               SCHECK_PARTIAL();
4305               MRRETURN(MATCH_NOMATCH);
4306               }
4307             GETCHARINCTEST(c, eptr);
4308             prop_chartype = UCD_CHARTYPE(c);
4309             if ((prop_chartype == prop_value) == prop_fail_result)
4310               MRRETURN(MATCH_NOMATCH);
4311             }
4312           /* Control never gets here */
4313
4314           case PT_SC:
4315           for (fi = min;; fi++)
4316             {
4317             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
4318             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4319             if (fi >= max) MRRETURN(MATCH_NOMATCH);
4320             if (eptr >= md->end_subject)
4321               {
4322               SCHECK_PARTIAL();
4323               MRRETURN(MATCH_NOMATCH);
4324               }
4325             GETCHARINCTEST(c, eptr);
4326             prop_script = UCD_SCRIPT(c);
4327             if ((prop_script == prop_value) == prop_fail_result)
4328               MRRETURN(MATCH_NOMATCH);
4329             }
4330           /* Control never gets here */
4331
4332           case PT_ALNUM:
4333           for (fi = min;; fi++)
4334             {
4335             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM59);
4336             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4337             if (fi >= max) MRRETURN(MATCH_NOMATCH);
4338             if (eptr >= md->end_subject)
4339               {
4340               SCHECK_PARTIAL();
4341               MRRETURN(MATCH_NOMATCH);
4342               }
4343             GETCHARINCTEST(c, eptr);
4344             prop_category = UCD_CATEGORY(c);
4345             if ((prop_category == ucp_L || prop_category == ucp_N)
4346                    == prop_fail_result)
4347               MRRETURN(MATCH_NOMATCH);
4348             }
4349           /* Control never gets here */
4350
4351           case PT_SPACE:    /* Perl space */
4352           for (fi = min;; fi++)
4353             {
4354             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM60);
4355             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4356             if (fi >= max) MRRETURN(MATCH_NOMATCH);
4357             if (eptr >= md->end_subject)
4358               {
4359               SCHECK_PARTIAL();
4360               MRRETURN(MATCH_NOMATCH);
4361               }
4362             GETCHARINCTEST(c, eptr);
4363             prop_category = UCD_CATEGORY(c);
4364             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4365                  c == CHAR_FF || c == CHAR_CR)
4366                    == prop_fail_result)
4367               MRRETURN(MATCH_NOMATCH);
4368             }
4369           /* Control never gets here */
4370
4371           case PT_PXSPACE:  /* POSIX space */
4372           for (fi = min;; fi++)
4373             {
4374             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM61);
4375             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4376             if (fi >= max) MRRETURN(MATCH_NOMATCH);
4377             if (eptr >= md->end_subject)
4378               {
4379               SCHECK_PARTIAL();
4380               MRRETURN(MATCH_NOMATCH);
4381               }
4382             GETCHARINCTEST(c, eptr);
4383             prop_category = UCD_CATEGORY(c);
4384             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4385                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4386                    == prop_fail_result)
4387               MRRETURN(MATCH_NOMATCH);
4388             }
4389           /* Control never gets here */
4390
4391           case PT_WORD:
4392           for (fi = min;; fi++)
4393             {
4394             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM62);
4395             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4396             if (fi >= max) MRRETURN(MATCH_NOMATCH);
4397             if (eptr >= md->end_subject)
4398               {
4399               SCHECK_PARTIAL();
4400               MRRETURN(MATCH_NOMATCH);
4401               }
4402             GETCHARINCTEST(c, eptr);
4403             prop_category = UCD_CATEGORY(c);
4404             if ((prop_category == ucp_L ||
4405                  prop_category == ucp_N ||
4406                  c == CHAR_UNDERSCORE)
4407                    == prop_fail_result)
4408               MRRETURN(MATCH_NOMATCH);
4409             }
4410           /* Control never gets here */
4411
4412           /* This should never occur */
4413
4414           default:
4415           RRETURN(PCRE_ERROR_INTERNAL);
4416           }
4417         }
4418
4419       /* Match extended Unicode sequences. We will get here only if the
4420       support is in the binary; otherwise a compile-time error occurs. */
4421
4422       else if (ctype == OP_EXTUNI)
4423         {
4424         for (fi = min;; fi++)
4425           {
4426           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
4427           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4428           if (fi >= max) MRRETURN(MATCH_NOMATCH);
4429           if (eptr >= md->end_subject)
4430             {
4431             SCHECK_PARTIAL();
4432             MRRETURN(MATCH_NOMATCH);
4433             }
4434           GETCHARINCTEST(c, eptr);
4435           prop_category = UCD_CATEGORY(c);
4436           if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
4437           while (eptr < md->end_subject)
4438             {
4439             int len = 1;
4440             if (!utf8) c = *eptr;
4441               else { GETCHARLEN(c, eptr, len); }
4442             prop_category = UCD_CATEGORY(c);
4443             if (prop_category != ucp_M) break;
4444             eptr += len;
4445             }
4446           }
4447         }
4448
4449       else
4450 #endif     /* SUPPORT_UCP */
4451
4452 #ifdef SUPPORT_UTF8
4453       /* UTF-8 mode */
4454       if (utf8)
4455         {
4456         for (fi = min;; fi++)
4457           {
4458           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
4459           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4460           if (fi >= max) MRRETURN(MATCH_NOMATCH);
4461           if (eptr >= md->end_subject)
4462             {
4463             SCHECK_PARTIAL();
4464             MRRETURN(MATCH_NOMATCH);
4465             }
4466           if (ctype == OP_ANY && IS_NEWLINE(eptr))
4467             MRRETURN(MATCH_NOMATCH);
4468           GETCHARINC(c, eptr);
4469           switch(ctype)
4470             {
4471             case OP_ANY:        /* This is the non-NL case */
4472             case OP_ALLANY:
4473             case OP_ANYBYTE:
4474             break;
4475
4476             case OP_ANYNL:
4477             switch(c)
4478               {
4479               default: MRRETURN(MATCH_NOMATCH);
4480               case 0x000d:
4481               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4482               break;
4483               case 0x000a:
4484               break;
4485
4486               case 0x000b:
4487               case 0x000c:
4488               case 0x0085:
4489               case 0x2028:
4490               case 0x2029:
4491               if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4492               break;
4493               }
4494             break;
4495
4496             case OP_NOT_HSPACE:
4497             switch(c)
4498               {
4499               default: break;
4500               case 0x09:      /* HT */
4501               case 0x20:      /* SPACE */
4502               case 0xa0:      /* NBSP */
4503               case 0x1680:    /* OGHAM SPACE MARK */
4504               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4505               case 0x2000:    /* EN QUAD */
4506               case 0x2001:    /* EM QUAD */
4507               case 0x2002:    /* EN SPACE */
4508               case 0x2003:    /* EM SPACE */
4509               case 0x2004:    /* THREE-PER-EM SPACE */
4510               case 0x2005:    /* FOUR-PER-EM SPACE */
4511               case 0x2006:    /* SIX-PER-EM SPACE */
4512               case 0x2007:    /* FIGURE SPACE */
4513               case 0x2008:    /* PUNCTUATION SPACE */
4514               case 0x2009:    /* THIN SPACE */
4515               case 0x200A:    /* HAIR SPACE */
4516               case 0x202f:    /* NARROW NO-BREAK SPACE */
4517               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4518               case 0x3000:    /* IDEOGRAPHIC SPACE */
4519               MRRETURN(MATCH_NOMATCH);
4520               }
4521             break;
4522
4523             case OP_HSPACE:
4524             switch(c)
4525               {
4526               default: MRRETURN(MATCH_NOMATCH);
4527               case 0x09:      /* HT */
4528               case 0x20:      /* SPACE */
4529               case 0xa0:      /* NBSP */
4530               case 0x1680:    /* OGHAM SPACE MARK */
4531               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4532               case 0x2000:    /* EN QUAD */
4533               case 0x2001:    /* EM QUAD */
4534               case 0x2002:    /* EN SPACE */
4535               case 0x2003:    /* EM SPACE */
4536               case 0x2004:    /* THREE-PER-EM SPACE */
4537               case 0x2005:    /* FOUR-PER-EM SPACE */
4538               case 0x2006:    /* SIX-PER-EM SPACE */
4539               case 0x2007:    /* FIGURE SPACE */
4540               case 0x2008:    /* PUNCTUATION SPACE */
4541               case 0x2009:    /* THIN SPACE */
4542               case 0x200A:    /* HAIR SPACE */
4543               case 0x202f:    /* NARROW NO-BREAK SPACE */
4544               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4545               case 0x3000:    /* IDEOGRAPHIC SPACE */
4546               break;
4547               }
4548             break;
4549
4550             case OP_NOT_VSPACE:
4551             switch(c)
4552               {
4553               default: break;
4554               case 0x0a:      /* LF */
4555               case 0x0b:      /* VT */
4556               case 0x0c:      /* FF */
4557               case 0x0d:      /* CR */
4558               case 0x85:      /* NEL */
4559               case 0x2028:    /* LINE SEPARATOR */
4560               case 0x2029:    /* PARAGRAPH SEPARATOR */
4561               MRRETURN(MATCH_NOMATCH);
4562               }
4563             break;
4564
4565             case OP_VSPACE:
4566             switch(c)
4567               {
4568               default: MRRETURN(MATCH_NOMATCH);
4569               case 0x0a:      /* LF */
4570               case 0x0b:      /* VT */
4571               case 0x0c:      /* FF */
4572               case 0x0d:      /* CR */
4573               case 0x85:      /* NEL */
4574               case 0x2028:    /* LINE SEPARATOR */
4575               case 0x2029:    /* PARAGRAPH SEPARATOR */
4576               break;
4577               }
4578             break;
4579
4580             case OP_NOT_DIGIT:
4581             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
4582               MRRETURN(MATCH_NOMATCH);
4583             break;
4584
4585             case OP_DIGIT:
4586             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
4587               MRRETURN(MATCH_NOMATCH);
4588             break;
4589
4590             case OP_NOT_WHITESPACE:
4591             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
4592               MRRETURN(MATCH_NOMATCH);
4593             break;
4594
4595             case OP_WHITESPACE:
4596             if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
4597               MRRETURN(MATCH_NOMATCH);
4598             break;
4599
4600             case OP_NOT_WORDCHAR:
4601             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
4602               MRRETURN(MATCH_NOMATCH);
4603             break;
4604
4605             case OP_WORDCHAR:
4606             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
4607               MRRETURN(MATCH_NOMATCH);
4608             break;
4609
4610             default:
4611             RRETURN(PCRE_ERROR_INTERNAL);
4612             }
4613           }
4614         }
4615       else
4616 #endif
4617       /* Not UTF-8 mode */
4618         {
4619         for (fi = min;; fi++)
4620           {
4621           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
4622           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4623           if (fi >= max) MRRETURN(MATCH_NOMATCH);
4624           if (eptr >= md->end_subject)
4625             {
4626             SCHECK_PARTIAL();
4627             MRRETURN(MATCH_NOMATCH);
4628             }
4629           if (ctype == OP_ANY && IS_NEWLINE(eptr))
4630             MRRETURN(MATCH_NOMATCH);
4631           c = *eptr++;
4632           switch(ctype)
4633             {
4634             case OP_ANY:     /* This is the non-NL case */
4635             case OP_ALLANY:
4636             case OP_ANYBYTE:
4637             break;
4638
4639             case OP_ANYNL:
4640             switch(c)
4641               {
4642               default: MRRETURN(MATCH_NOMATCH);
4643               case 0x000d:
4644               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4645               break;
4646
4647               case 0x000a:
4648               break;
4649
4650               case 0x000b:
4651               case 0x000c:
4652               case 0x0085:
4653               if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4654               break;
4655               }
4656             break;
4657
4658             case OP_NOT_HSPACE:
4659             switch(c)
4660               {
4661               default: break;
4662               case 0x09:      /* HT */
4663               case 0x20:      /* SPACE */
4664               case 0xa0:      /* NBSP */
4665               MRRETURN(MATCH_NOMATCH);
4666               }
4667             break;
4668
4669             case OP_HSPACE:
4670             switch(c)
4671               {
4672               default: MRRETURN(MATCH_NOMATCH);
4673               case 0x09:      /* HT */
4674               case 0x20:      /* SPACE */
4675               case 0xa0:      /* NBSP */
4676               break;
4677               }
4678             break;
4679
4680             case OP_NOT_VSPACE:
4681             switch(c)
4682               {
4683               default: break;
4684               case 0x0a:      /* LF */
4685               case 0x0b:      /* VT */
4686               case 0x0c:      /* FF */
4687               case 0x0d:      /* CR */
4688               case 0x85:      /* NEL */
4689               MRRETURN(MATCH_NOMATCH);
4690               }
4691             break;
4692
4693             case OP_VSPACE:
4694             switch(c)
4695               {
4696               default: MRRETURN(MATCH_NOMATCH);
4697               case 0x0a:      /* LF */
4698               case 0x0b:      /* VT */
4699               case 0x0c:      /* FF */
4700               case 0x0d:      /* CR */
4701               case 0x85:      /* NEL */
4702               break;
4703               }
4704             break;
4705
4706             case OP_NOT_DIGIT:
4707             if ((md->ctypes[c] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
4708             break;
4709
4710             case OP_DIGIT:
4711             if ((md->ctypes[c] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
4712             break;
4713
4714             case OP_NOT_WHITESPACE:
4715             if ((md->ctypes[c] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
4716             break;
4717
4718             case OP_WHITESPACE:
4719             if  ((md->ctypes[c] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
4720             break;
4721
4722             case OP_NOT_WORDCHAR:
4723             if ((md->ctypes[c] & ctype_word) != 0) MRRETURN(MATCH_NOMATCH);
4724             break;
4725
4726             case OP_WORDCHAR:
4727             if ((md->ctypes[c] & ctype_word) == 0) MRRETURN(MATCH_NOMATCH);
4728             break;
4729
4730             default:
4731             RRETURN(PCRE_ERROR_INTERNAL);
4732             }
4733           }
4734         }
4735       /* Control never gets here */
4736       }
4737
4738     /* If maximizing, it is worth using inline code for speed, doing the type
4739     test once at the start (i.e. keep it out of the loop). Again, keep the
4740     UTF-8 and UCP stuff separate. */
4741
4742     else
4743       {
4744       pp = eptr;  /* Remember where we started */
4745
4746 #ifdef SUPPORT_UCP
4747       if (prop_type >= 0)
4748         {
4749         switch(prop_type)
4750           {
4751           case PT_ANY:
4752           for (i = min; i < max; i++)
4753             {
4754             int len = 1;
4755             if (eptr >= md->end_subject)
4756               {
4757               SCHECK_PARTIAL();
4758               break;
4759               }
4760             GETCHARLENTEST(c, eptr, len);
4761             if (prop_fail_result) break;
4762             eptr+= len;
4763             }
4764           break;
4765
4766           case PT_LAMP:
4767           for (i = min; i < max; i++)
4768             {
4769             int len = 1;
4770             if (eptr >= md->end_subject)
4771               {
4772               SCHECK_PARTIAL();
4773               break;
4774               }
4775             GETCHARLENTEST(c, eptr, len);
4776             prop_chartype = UCD_CHARTYPE(c);
4777             if ((prop_chartype == ucp_Lu ||
4778                  prop_chartype == ucp_Ll ||
4779                  prop_chartype == ucp_Lt) == prop_fail_result)
4780               break;
4781             eptr+= len;
4782             }
4783           break;
4784
4785           case PT_GC:
4786           for (i = min; i < max; i++)
4787             {
4788             int len = 1;
4789             if (eptr >= md->end_subject)
4790               {
4791               SCHECK_PARTIAL();
4792               break;
4793               }
4794             GETCHARLENTEST(c, eptr, len);
4795             prop_category = UCD_CATEGORY(c);
4796             if ((prop_category == prop_value) == prop_fail_result)
4797               break;
4798             eptr+= len;
4799             }
4800           break;
4801
4802           case PT_PC:
4803           for (i = min; i < max; i++)
4804             {
4805             int len = 1;
4806             if (eptr >= md->end_subject)
4807               {
4808               SCHECK_PARTIAL();
4809               break;
4810               }
4811             GETCHARLENTEST(c, eptr, len);
4812             prop_chartype = UCD_CHARTYPE(c);
4813             if ((prop_chartype == prop_value) == prop_fail_result)
4814               break;
4815             eptr+= len;
4816             }
4817           break;
4818
4819           case PT_SC:
4820           for (i = min; i < max; i++)
4821             {
4822             int len = 1;
4823             if (eptr >= md->end_subject)
4824               {
4825               SCHECK_PARTIAL();
4826               break;
4827               }
4828             GETCHARLENTEST(c, eptr, len);
4829             prop_script = UCD_SCRIPT(c);
4830             if ((prop_script == prop_value) == prop_fail_result)
4831               break;
4832             eptr+= len;
4833             }
4834           break;
4835
4836           case PT_ALNUM:
4837           for (i = min; i < max; i++)
4838             {
4839             int len = 1;
4840             if (eptr >= md->end_subject)
4841               {
4842               SCHECK_PARTIAL();
4843               break;
4844               }
4845             GETCHARLENTEST(c, eptr, len);
4846             prop_category = UCD_CATEGORY(c);
4847             if ((prop_category == ucp_L || prop_category == ucp_N)
4848                  == prop_fail_result)
4849               break;
4850             eptr+= len;
4851             }
4852           break;
4853
4854           case PT_SPACE:    /* Perl space */
4855           for (i = min; i < max; i++)
4856             {
4857             int len = 1;
4858             if (eptr >= md->end_subject)
4859               {
4860               SCHECK_PARTIAL();
4861               break;
4862               }
4863             GETCHARLENTEST(c, eptr, len);
4864             prop_category = UCD_CATEGORY(c);
4865             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4866                  c == CHAR_FF || c == CHAR_CR)
4867                  == prop_fail_result)
4868               break;
4869             eptr+= len;
4870             }
4871           break;
4872
4873           case PT_PXSPACE:  /* POSIX space */
4874           for (i = min; i < max; i++)
4875             {
4876             int len = 1;
4877             if (eptr >= md->end_subject)
4878               {
4879               SCHECK_PARTIAL();
4880               break;
4881               }
4882             GETCHARLENTEST(c, eptr, len);
4883             prop_category = UCD_CATEGORY(c);
4884             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4885                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4886                  == prop_fail_result)
4887               break;
4888             eptr+= len;
4889             }
4890           break;
4891
4892           case PT_WORD:
4893           for (i = min; i < max; i++)
4894             {
4895             int len = 1;
4896             if (eptr >= md->end_subject)
4897               {
4898               SCHECK_PARTIAL();
4899               break;
4900               }
4901             GETCHARLENTEST(c, eptr, len);
4902             prop_category = UCD_CATEGORY(c);
4903             if ((prop_category == ucp_L || prop_category == ucp_N ||
4904                  c == CHAR_UNDERSCORE) == prop_fail_result)
4905               break;
4906             eptr+= len;
4907             }
4908           break;
4909
4910           default:
4911           RRETURN(PCRE_ERROR_INTERNAL);
4912           }
4913
4914         /* eptr is now past the end of the maximum run */
4915
4916         if (possessive) continue;
4917         for(;;)
4918           {
4919           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
4920           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4921           if (eptr-- == pp) break;        /* Stop if tried at original pos */
4922           if (utf8) BACKCHAR(eptr);
4923           }
4924         }
4925
4926       /* Match extended Unicode sequences. We will get here only if the
4927       support is in the binary; otherwise a compile-time error occurs. */
4928
4929       else if (ctype == OP_EXTUNI)
4930         {
4931         for (i = min; i < max; i++)
4932           {
4933           if (eptr >= md->end_subject)
4934             {
4935             SCHECK_PARTIAL();
4936             break;
4937             }
4938           GETCHARINCTEST(c, eptr);
4939           prop_category = UCD_CATEGORY(c);
4940           if (prop_category == ucp_M) break;
4941           while (eptr < md->end_subject)
4942             {
4943             int len = 1;
4944             if (!utf8) c = *eptr; else
4945               {
4946               GETCHARLEN(c, eptr, len);
4947               }
4948             prop_category = UCD_CATEGORY(c);
4949             if (prop_category != ucp_M) break;
4950             eptr += len;
4951             }
4952           }
4953
4954         /* eptr is now past the end of the maximum run */
4955
4956         if (possessive) continue;
4957
4958         for(;;)
4959           {
4960           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
4961           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4962           if (eptr-- == pp) break;        /* Stop if tried at original pos */
4963           for (;;)                        /* Move back over one extended */
4964             {
4965             int len = 1;
4966             if (!utf8) c = *eptr; else
4967               {
4968               BACKCHAR(eptr);
4969               GETCHARLEN(c, eptr, len);
4970               }
4971             prop_category = UCD_CATEGORY(c);
4972             if (prop_category != ucp_M) break;
4973             eptr--;
4974             }
4975           }
4976         }
4977
4978       else
4979 #endif   /* SUPPORT_UCP */
4980
4981 #ifdef SUPPORT_UTF8
4982       /* UTF-8 mode */
4983
4984       if (utf8)
4985         {
4986         switch(ctype)
4987           {
4988           case OP_ANY:
4989           if (max < INT_MAX)
4990             {
4991             for (i = min; i < max; i++)
4992               {
4993               if (eptr >= md->end_subject)
4994                 {
4995                 SCHECK_PARTIAL();
4996                 break;
4997                 }
4998               if (IS_NEWLINE(eptr)) break;
4999               eptr++;
5000               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5001               }
5002             }
5003
5004           /* Handle unlimited UTF-8 repeat */
5005
5006           else
5007             {
5008             for (i = min; i < max; i++)
5009               {
5010               if (eptr >= md->end_subject)
5011                 {
5012                 SCHECK_PARTIAL();
5013                 break;
5014                 }
5015               if (IS_NEWLINE(eptr)) break;
5016               eptr++;
5017               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5018               }
5019             }
5020           break;
5021
5022           case OP_ALLANY:
5023           if (max < INT_MAX)
5024             {
5025             for (i = min; i < max; i++)
5026               {
5027               if (eptr >= md->end_subject)
5028                 {
5029                 SCHECK_PARTIAL();
5030                 break;
5031                 }
5032               eptr++;
5033               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5034               }
5035             }
5036           else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
5037           break;
5038
5039           /* The byte case is the same as non-UTF8 */
5040
5041           case OP_ANYBYTE:
5042           c = max - min;
5043           if (c > (unsigned int)(md->end_subject - eptr))
5044             {
5045             eptr = md->end_subject;
5046             SCHECK_PARTIAL();
5047             }
5048           else eptr += c;
5049           break;
5050
5051           case OP_ANYNL:
5052           for (i = min; i < max; i++)
5053             {
5054             int len = 1;
5055             if (eptr >= md->end_subject)
5056               {
5057               SCHECK_PARTIAL();
5058               break;
5059               }
5060             GETCHARLEN(c, eptr, len);
5061             if (c == 0x000d)
5062               {
5063               if (++eptr >= md->end_subject) break;
5064               if (*eptr == 0x000a) eptr++;
5065               }
5066             else
5067               {
5068               if (c != 0x000a &&
5069                   (md->bsr_anycrlf ||
5070                    (c != 0x000b && c != 0x000c &&
5071                     c != 0x0085 && c != 0x2028 && c != 0x2029)))
5072                 break;
5073               eptr += len;
5074               }
5075             }
5076           break;
5077
5078           case OP_NOT_HSPACE:
5079           case OP_HSPACE:
5080           for (i = min; i < max; i++)
5081             {
5082             BOOL gotspace;
5083             int len = 1;
5084             if (eptr >= md->end_subject)
5085               {
5086               SCHECK_PARTIAL();
5087               break;
5088               }
5089             GETCHARLEN(c, eptr, len);
5090             switch(c)
5091               {
5092               default: gotspace = FALSE; break;
5093               case 0x09:      /* HT */
5094               case 0x20:      /* SPACE */
5095               case 0xa0:      /* NBSP */
5096               case 0x1680:    /* OGHAM SPACE MARK */
5097               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5098               case 0x2000:    /* EN QUAD */
5099               case 0x2001:    /* EM QUAD */
5100               case 0x2002:    /* EN SPACE */
5101               case 0x2003:    /* EM SPACE */
5102               case 0x2004:    /* THREE-PER-EM SPACE */
5103               case 0x2005:    /* FOUR-PER-EM SPACE */
5104               case 0x2006:    /* SIX-PER-EM SPACE */
5105               case 0x2007:    /* FIGURE SPACE */
5106               case 0x2008:    /* PUNCTUATION SPACE */
5107               case 0x2009:    /* THIN SPACE */
5108               case 0x200A:    /* HAIR SPACE */
5109               case 0x202f:    /* NARROW NO-BREAK SPACE */
5110               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5111               case 0x3000:    /* IDEOGRAPHIC SPACE */
5112               gotspace = TRUE;
5113               break;
5114               }
5115             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5116             eptr += len;
5117             }
5118           break;
5119
5120           case OP_NOT_VSPACE:
5121           case OP_VSPACE:
5122           for (i = min; i < max; i++)
5123             {
5124             BOOL gotspace;
5125             int len = 1;
5126             if (eptr >= md->end_subject)
5127               {
5128               SCHECK_PARTIAL();
5129               break;
5130               }
5131             GETCHARLEN(c, eptr, len);
5132             switch(c)
5133               {
5134               default: gotspace = FALSE; break;
5135               case 0x0a:      /* LF */
5136               case 0x0b:      /* VT */
5137               case 0x0c:      /* FF */
5138               case 0x0d:      /* CR */
5139               case 0x85:      /* NEL */
5140               case 0x2028:    /* LINE SEPARATOR */
5141               case 0x2029:    /* PARAGRAPH SEPARATOR */
5142               gotspace = TRUE;
5143               break;
5144               }
5145             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5146             eptr += len;
5147             }
5148           break;
5149
5150           case OP_NOT_DIGIT:
5151           for (i = min; i < max; i++)
5152             {
5153             int len = 1;
5154             if (eptr >= md->end_subject)
5155               {
5156               SCHECK_PARTIAL();
5157               break;
5158               }
5159             GETCHARLEN(c, eptr, len);
5160             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
5161             eptr+= len;
5162             }
5163           break;
5164
5165           case OP_DIGIT:
5166           for (i = min; i < max; i++)
5167             {
5168             int len = 1;
5169             if (eptr >= md->end_subject)
5170               {
5171               SCHECK_PARTIAL();
5172               break;
5173               }
5174             GETCHARLEN(c, eptr, len);
5175             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
5176             eptr+= len;
5177             }
5178           break;
5179
5180           case OP_NOT_WHITESPACE:
5181           for (i = min; i < max; i++)
5182             {
5183             int len = 1;
5184             if (eptr >= md->end_subject)
5185               {
5186               SCHECK_PARTIAL();
5187               break;
5188               }
5189             GETCHARLEN(c, eptr, len);
5190             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
5191             eptr+= len;
5192             }
5193           break;
5194
5195           case OP_WHITESPACE:
5196           for (i = min; i < max; i++)
5197             {
5198             int len = 1;
5199             if (eptr >= md->end_subject)
5200               {
5201               SCHECK_PARTIAL();
5202               break;
5203               }
5204             GETCHARLEN(c, eptr, len);
5205             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
5206             eptr+= len;
5207             }
5208           break;
5209
5210           case OP_NOT_WORDCHAR:
5211           for (i = min; i < max; i++)
5212             {
5213             int len = 1;
5214             if (eptr >= md->end_subject)
5215               {
5216               SCHECK_PARTIAL();
5217               break;
5218               }
5219             GETCHARLEN(c, eptr, len);
5220             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
5221             eptr+= len;
5222             }
5223           break;
5224
5225           case OP_WORDCHAR:
5226           for (i = min; i < max; i++)
5227             {
5228             int len = 1;
5229             if (eptr >= md->end_subject)
5230               {
5231               SCHECK_PARTIAL();
5232               break;
5233               }
5234             GETCHARLEN(c, eptr, len);
5235             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
5236             eptr+= len;
5237             }
5238           break;
5239
5240           default:
5241           RRETURN(PCRE_ERROR_INTERNAL);
5242           }
5243
5244         /* eptr is now past the end of the maximum run */
5245
5246         if (possessive) continue;
5247         for(;;)
5248           {
5249           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
5250           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5251           if (eptr-- == pp) break;        /* Stop if tried at original pos */
5252           BACKCHAR(eptr);
5253           }
5254         }
5255       else
5256 #endif  /* SUPPORT_UTF8 */
5257
5258       /* Not UTF-8 mode */
5259         {
5260         switch(ctype)
5261           {
5262           case OP_ANY:
5263           for (i = min; i < max; i++)
5264             {
5265             if (eptr >= md->end_subject)
5266               {
5267               SCHECK_PARTIAL();
5268               break;
5269               }
5270             if (IS_NEWLINE(eptr)) break;
5271             eptr++;
5272             }
5273           break;
5274
5275           case OP_ALLANY:
5276           case OP_ANYBYTE:
5277           c = max - min;
5278           if (c > (unsigned int)(md->end_subject - eptr))
5279             {
5280             eptr = md->end_subject;
5281             SCHECK_PARTIAL();
5282             }
5283           else eptr += c;
5284           break;
5285
5286           case OP_ANYNL:
5287           for (i = min; i < max; i++)
5288             {
5289             if (eptr >= md->end_subject)
5290               {
5291               SCHECK_PARTIAL();
5292               break;
5293               }
5294             c = *eptr;
5295             if (c == 0x000d)
5296               {
5297               if (++eptr >= md->end_subject) break;
5298               if (*eptr == 0x000a) eptr++;
5299               }
5300             else
5301               {
5302               if (c != 0x000a &&
5303                   (md->bsr_anycrlf ||
5304                     (c != 0x000b && c != 0x000c && c != 0x0085)))
5305                 break;
5306               eptr++;
5307               }
5308             }
5309           break;
5310
5311           case OP_NOT_HSPACE:
5312           for (i = min; i < max; i++)
5313             {
5314             if (eptr >= md->end_subject)
5315               {
5316               SCHECK_PARTIAL();
5317               break;
5318               }
5319             c = *eptr;
5320             if (c == 0x09 || c == 0x20 || c == 0xa0) break;
5321             eptr++;
5322             }
5323           break;
5324
5325           case OP_HSPACE:
5326           for (i = min; i < max; i++)
5327             {
5328             if (eptr >= md->end_subject)
5329               {
5330               SCHECK_PARTIAL();
5331               break;
5332               }
5333             c = *eptr;
5334             if (c != 0x09 && c != 0x20 && c != 0xa0) break;
5335             eptr++;
5336             }
5337           break;
5338
5339           case OP_NOT_VSPACE:
5340           for (i = min; i < max; i++)
5341             {
5342             if (eptr >= md->end_subject)
5343               {
5344               SCHECK_PARTIAL();
5345               break;
5346               }
5347             c = *eptr;
5348             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
5349               break;
5350             eptr++;
5351             }
5352           break;
5353
5354           case OP_VSPACE:
5355           for (i = min; i < max; i++)
5356             {
5357             if (eptr >= md->end_subject)
5358               {
5359               SCHECK_PARTIAL();
5360               break;
5361               }
5362             c = *eptr;
5363             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
5364               break;
5365             eptr++;
5366             }
5367           break;
5368
5369           case OP_NOT_DIGIT:
5370           for (i = min; i < max; i++)
5371             {
5372             if (eptr >= md->end_subject)
5373               {
5374               SCHECK_PARTIAL();
5375               break;
5376               }
5377             if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
5378             eptr++;
5379             }
5380           break;
5381
5382           case OP_DIGIT:
5383           for (i = min; i < max; i++)
5384             {
5385             if (eptr >= md->end_subject)
5386               {
5387               SCHECK_PARTIAL();
5388               break;
5389               }
5390             if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
5391             eptr++;
5392             }
5393           break;
5394
5395           case OP_NOT_WHITESPACE:
5396           for (i = min; i < max; i++)
5397             {
5398             if (eptr >= md->end_subject)
5399               {
5400               SCHECK_PARTIAL();
5401               break;
5402               }
5403             if ((md->ctypes[*eptr] & ctype_space) != 0) break;
5404             eptr++;
5405             }
5406           break;
5407
5408           case OP_WHITESPACE:
5409           for (i = min; i < max; i++)
5410             {
5411             if (eptr >= md->end_subject)
5412               {
5413               SCHECK_PARTIAL();
5414               break;
5415               }
5416             if ((md->ctypes[*eptr] & ctype_space) == 0) break;
5417             eptr++;
5418             }
5419           break;
5420
5421           case OP_NOT_WORDCHAR:
5422           for (i = min; i < max; i++)
5423             {
5424             if (eptr >= md->end_subject)
5425               {
5426               SCHECK_PARTIAL();
5427               break;
5428               }
5429             if ((md->ctypes[*eptr] & ctype_word) != 0) break;
5430             eptr++;
5431             }
5432           break;
5433
5434           case OP_WORDCHAR:
5435           for (i = min; i < max; i++)
5436             {
5437             if (eptr >= md->end_subject)
5438               {
5439               SCHECK_PARTIAL();
5440               break;
5441               }
5442             if ((md->ctypes[*eptr] & ctype_word) == 0) break;
5443             eptr++;
5444             }
5445           break;
5446
5447           default:
5448           RRETURN(PCRE_ERROR_INTERNAL);
5449           }
5450
5451         /* eptr is now past the end of the maximum run */
5452
5453         if (possessive) continue;
5454         while (eptr >= pp)
5455           {
5456           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
5457           eptr--;
5458           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5459           }
5460         }
5461
5462       /* Get here if we can't make it match with any permitted repetitions */
5463
5464       MRRETURN(MATCH_NOMATCH);
5465       }
5466     /* Control never gets here */
5467
5468     /* There's been some horrible disaster. Arrival here can only mean there is
5469     something seriously wrong in the code above or the OP_xxx definitions. */
5470
5471     default:
5472     DPRINTF(("Unknown opcode %d\n", *ecode));
5473     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
5474     }
5475
5476   /* Do not stick any code in here without much thought; it is assumed
5477   that "continue" in the code above comes out to here to repeat the main
5478   loop. */
5479
5480   }             /* End of main loop */
5481 /* Control never reaches here */
5482
5483
5484 /* When compiling to use the heap rather than the stack for recursive calls to
5485 match(), the RRETURN() macro jumps here. The number that is saved in
5486 frame->Xwhere indicates which label we actually want to return to. */
5487
5488 #ifdef NO_RECURSE
5489 #define LBL(val) case val: goto L_RM##val;
5490 HEAP_RETURN:
5491 switch (frame->Xwhere)
5492   {
5493   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
5494   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
5495   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
5496   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
5497   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58)
5498 #ifdef SUPPORT_UTF8
5499   LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
5500   LBL(32) LBL(34) LBL(42) LBL(46)
5501 #ifdef SUPPORT_UCP
5502   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
5503   LBL(59) LBL(60) LBL(61) LBL(62)
5504 #endif  /* SUPPORT_UCP */
5505 #endif  /* SUPPORT_UTF8 */
5506   default:
5507   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
5508   return PCRE_ERROR_INTERNAL;
5509   }
5510 #undef LBL
5511 #endif  /* NO_RECURSE */
5512 }
5513
5514
5515 /***************************************************************************
5516 ****************************************************************************
5517                    RECURSION IN THE match() FUNCTION
5518
5519 Undefine all the macros that were defined above to handle this. */
5520
5521 #ifdef NO_RECURSE
5522 #undef eptr
5523 #undef ecode
5524 #undef mstart
5525 #undef offset_top
5526 #undef ims
5527 #undef eptrb
5528 #undef flags
5529
5530 #undef callpat
5531 #undef charptr
5532 #undef data
5533 #undef next
5534 #undef pp
5535 #undef prev
5536 #undef saved_eptr
5537
5538 #undef new_recursive
5539
5540 #undef cur_is_word
5541 #undef condition
5542 #undef prev_is_word
5543
5544 #undef original_ims
5545
5546 #undef ctype
5547 #undef length
5548 #undef max
5549 #undef min
5550 #undef number
5551 #undef offset
5552 #undef op
5553 #undef save_capture_last
5554 #undef save_offset1
5555 #undef save_offset2
5556 #undef save_offset3
5557 #undef stacksave
5558
5559 #undef newptrb
5560
5561 #endif
5562
5563 /* These two are defined as macros in both cases */
5564
5565 #undef fc
5566 #undef fi
5567
5568 /***************************************************************************
5569 ***************************************************************************/
5570
5571
5572
5573 /*************************************************
5574 *         Execute a Regular Expression           *
5575 *************************************************/
5576
5577 /* This function applies a compiled re to a subject string and picks out
5578 portions of the string if it matches. Two elements in the vector are set for
5579 each substring: the offsets to the start and end of the substring.
5580
5581 Arguments:
5582   argument_re     points to the compiled expression
5583   extra_data      points to extra data or is NULL
5584   subject         points to the subject string
5585   length          length of subject string (may contain binary zeros)
5586   start_offset    where to start in the subject string
5587   options         option bits
5588   offsets         points to a vector of ints to be filled in with offsets
5589   offsetcount     the number of elements in the vector
5590
5591 Returns:          > 0 => success; value is the number of elements filled in
5592                   = 0 => success, but offsets is not big enough
5593                    -1 => failed to match
5594                  < -1 => some kind of unexpected problem
5595 */
5596
5597 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
5598 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
5599   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
5600   int offsetcount)
5601 {
5602 int rc, resetcount, ocount;
5603 int first_byte = -1;
5604 int req_byte = -1;
5605 int req_byte2 = -1;
5606 int newline;
5607 unsigned long int ims;
5608 BOOL using_temporary_offsets = FALSE;
5609 BOOL anchored;
5610 BOOL startline;
5611 BOOL firstline;
5612 BOOL first_byte_caseless = FALSE;
5613 BOOL req_byte_caseless = FALSE;
5614 BOOL utf8;
5615 match_data match_block;
5616 match_data *md = &match_block;
5617 const uschar *tables;
5618 const uschar *start_bits = NULL;
5619 USPTR start_match = (USPTR)subject + start_offset;
5620 USPTR end_subject;
5621 USPTR start_partial = NULL;
5622 USPTR req_byte_ptr = start_match - 1;
5623
5624 pcre_study_data internal_study;
5625 const pcre_study_data *study;
5626
5627 real_pcre internal_re;
5628 const real_pcre *external_re = (const real_pcre *)argument_re;
5629 const real_pcre *re = external_re;
5630
5631 /* Plausibility checks */
5632 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
5633 if (re == NULL || subject == NULL ||
5634    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
5635 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5636 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
5637
5638 /* This information is for finding all the numbers associated with a given
5639 name, for condition testing. */
5640
5641 md->name_table = (uschar *)re + re->name_table_offset;
5642 md->name_count = re->name_count;
5643 md->name_entry_size = re->name_entry_size;
5644
5645 /* Fish out the optional data from the extra_data structure, first setting
5646 the default values. */
5647
5648 study = NULL;
5649 md->match_limit = MATCH_LIMIT;
5650 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
5651 md->callout_data = NULL;
5652
5653 /* The table pointer is always in native byte order. */
5654
5655 tables = external_re->tables;
5656
5657 if (extra_data != NULL)
5658   {
5659   register unsigned int flags = extra_data->flags;
5660   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
5661     study = (const pcre_study_data *)extra_data->study_data;
5662   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
5663     md->match_limit = extra_data->match_limit;
5664   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
5665     md->match_limit_recursion = extra_data->match_limit_recursion;
5666   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
5667     md->callout_data = extra_data->callout_data;
5668   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
5669   }
5670
5671 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
5672 is a feature that makes it possible to save compiled regex and re-use them
5673 in other programs later. */
5674
5675 if (tables == NULL) tables = _pcre_default_tables;
5676
5677 /* Check that the first field in the block is the magic number. If it is not,
5678 test for a regex that was compiled on a host of opposite endianness. If this is
5679 the case, flipped values are put in internal_re and internal_study if there was
5680 study data too. */
5681
5682 if (re->magic_number != MAGIC_NUMBER)
5683   {
5684   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
5685   if (re == NULL) return PCRE_ERROR_BADMAGIC;
5686   if (study != NULL) study = &internal_study;
5687   }
5688
5689 /* Set up other data */
5690
5691 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
5692 startline = (re->flags & PCRE_STARTLINE) != 0;
5693 firstline = (re->options & PCRE_FIRSTLINE) != 0;
5694
5695 /* The code starts after the real_pcre block and the capture name table. */
5696
5697 md->start_code = (const uschar *)external_re + re->name_table_offset +
5698   re->name_count * re->name_entry_size;
5699
5700 md->start_subject = (USPTR)subject;
5701 md->start_offset = start_offset;
5702 md->end_subject = md->start_subject + length;
5703 end_subject = md->end_subject;
5704
5705 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
5706 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
5707 md->use_ucp = (re->options & PCRE_UCP) != 0;
5708 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
5709
5710 md->notbol = (options & PCRE_NOTBOL) != 0;
5711 md->noteol = (options & PCRE_NOTEOL) != 0;
5712 md->notempty = (options & PCRE_NOTEMPTY) != 0;
5713 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
5714 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
5715               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
5716 md->hitend = FALSE;
5717 md->mark = NULL;                        /* In case never set */
5718
5719 md->recursive = NULL;                   /* No recursion at top level */
5720
5721 md->lcc = tables + lcc_offset;
5722 md->ctypes = tables + ctypes_offset;
5723
5724 /* Handle different \R options. */
5725
5726 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
5727   {
5728   case 0:
5729   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
5730     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
5731   else
5732 #ifdef BSR_ANYCRLF
5733   md->bsr_anycrlf = TRUE;
5734 #else
5735   md->bsr_anycrlf = FALSE;
5736 #endif
5737   break;
5738
5739   case PCRE_BSR_ANYCRLF:
5740   md->bsr_anycrlf = TRUE;
5741   break;
5742
5743   case PCRE_BSR_UNICODE:
5744   md->bsr_anycrlf = FALSE;
5745   break;
5746
5747   default: return PCRE_ERROR_BADNEWLINE;
5748   }
5749
5750 /* Handle different types of newline. The three bits give eight cases. If
5751 nothing is set at run time, whatever was used at compile time applies. */
5752
5753 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
5754         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
5755   {
5756   case 0: newline = NEWLINE; break;   /* Compile-time default */
5757   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
5758   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
5759   case PCRE_NEWLINE_CR+
5760        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
5761   case PCRE_NEWLINE_ANY: newline = -1; break;
5762   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
5763   default: return PCRE_ERROR_BADNEWLINE;
5764   }
5765
5766 if (newline == -2)
5767   {
5768   md->nltype = NLTYPE_ANYCRLF;
5769   }
5770 else if (newline < 0)
5771   {
5772   md->nltype = NLTYPE_ANY;
5773   }
5774 else
5775   {
5776   md->nltype = NLTYPE_FIXED;
5777   if (newline > 255)
5778     {
5779     md->nllen = 2;
5780     md->nl[0] = (newline >> 8) & 255;
5781     md->nl[1] = newline & 255;
5782     }
5783   else
5784     {
5785     md->nllen = 1;
5786     md->nl[0] = newline;
5787     }
5788   }
5789
5790 /* Partial matching was originally supported only for a restricted set of
5791 regexes; from release 8.00 there are no restrictions, but the bits are still
5792 defined (though never set). So there's no harm in leaving this code. */
5793
5794 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
5795   return PCRE_ERROR_BADPARTIAL;
5796
5797 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
5798 back the character offset. */
5799
5800 #ifdef SUPPORT_UTF8
5801 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5802   {
5803   int tb;
5804   if ((tb = _pcre_valid_utf8((USPTR)subject, length)) >= 0)
5805     return (tb == length && md->partial > 1)?
5806       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
5807   if (start_offset > 0 && start_offset < length)
5808     {
5809     tb = ((USPTR)subject)[start_offset] & 0xc0;
5810     if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET;
5811     }
5812   }
5813 #endif
5814
5815 /* The ims options can vary during the matching as a result of the presence
5816 of (?ims) items in the pattern. They are kept in a local variable so that
5817 restoring at the exit of a group is easy. */
5818
5819 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
5820
5821 /* If the expression has got more back references than the offsets supplied can
5822 hold, we get a temporary chunk of working store to use during the matching.
5823 Otherwise, we can use the vector supplied, rounding down its size to a multiple
5824 of 3. */
5825
5826 ocount = offsetcount - (offsetcount % 3);
5827
5828 if (re->top_backref > 0 && re->top_backref >= ocount/3)
5829   {
5830   ocount = re->top_backref * 3 + 3;
5831   md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
5832   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
5833   using_temporary_offsets = TRUE;
5834   DPRINTF(("Got memory to hold back references\n"));
5835   }
5836 else md->offset_vector = offsets;
5837
5838 md->offset_end = ocount;
5839 md->offset_max = (2*ocount)/3;
5840 md->offset_overflow = FALSE;
5841 md->capture_last = -1;
5842
5843 /* Compute the minimum number of offsets that we need to reset each time. Doing
5844 this makes a huge difference to execution time when there aren't many brackets
5845 in the pattern. */
5846
5847 resetcount = 2 + re->top_bracket * 2;
5848 if (resetcount > offsetcount) resetcount = ocount;
5849
5850 /* Reset the working variable associated with each extraction. These should
5851 never be used unless previously set, but they get saved and restored, and so we
5852 initialize them to avoid reading uninitialized locations. */
5853
5854 if (md->offset_vector != NULL)
5855   {
5856   register int *iptr = md->offset_vector + ocount;
5857   register int *iend = iptr - resetcount/2 + 1;
5858   while (--iptr >= iend) *iptr = -1;
5859   }
5860
5861 /* Set up the first character to match, if available. The first_byte value is
5862 never set for an anchored regular expression, but the anchoring may be forced
5863 at run time, so we have to test for anchoring. The first char may be unset for
5864 an unanchored pattern, of course. If there's no first char and the pattern was
5865 studied, there may be a bitmap of possible first characters. */
5866
5867 if (!anchored)
5868   {
5869   if ((re->flags & PCRE_FIRSTSET) != 0)
5870     {
5871     first_byte = re->first_byte & 255;
5872     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
5873       first_byte = md->lcc[first_byte];
5874     }
5875   else
5876     if (!startline && study != NULL &&
5877       (study->flags & PCRE_STUDY_MAPPED) != 0)
5878         start_bits = study->start_bits;
5879   }
5880
5881 /* For anchored or unanchored matches, there may be a "last known required
5882 character" set. */
5883
5884 if ((re->flags & PCRE_REQCHSET) != 0)
5885   {
5886   req_byte = re->req_byte & 255;
5887   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
5888   req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
5889   }
5890
5891
5892 /* ==========================================================================*/
5893
5894 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
5895 the loop runs just once. */
5896
5897 for(;;)
5898   {
5899   USPTR save_end_subject = end_subject;
5900   USPTR new_start_match;
5901
5902   /* Reset the maximum number of extractions we might see. */
5903
5904   if (md->offset_vector != NULL)
5905     {
5906     register int *iptr = md->offset_vector;
5907     register int *iend = iptr + resetcount;
5908     while (iptr < iend) *iptr++ = -1;
5909     }
5910
5911   /* If firstline is TRUE, the start of the match is constrained to the first
5912   line of a multiline string. That is, the match must be before or at the first
5913   newline. Implement this by temporarily adjusting end_subject so that we stop
5914   scanning at a newline. If the match fails at the newline, later code breaks
5915   this loop. */
5916
5917   if (firstline)
5918     {
5919     USPTR t = start_match;
5920 #ifdef SUPPORT_UTF8
5921     if (utf8)
5922       {
5923       while (t < md->end_subject && !IS_NEWLINE(t))
5924         {
5925         t++;
5926         while (t < end_subject && (*t & 0xc0) == 0x80) t++;
5927         }
5928       }
5929     else
5930 #endif
5931     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
5932     end_subject = t;
5933     }
5934
5935   /* There are some optimizations that avoid running the match if a known
5936   starting point is not found, or if a known later character is not present.
5937   However, there is an option that disables these, for testing and for ensuring
5938   that all callouts do actually occur. The option can be set in the regex by
5939   (*NO_START_OPT) or passed in match-time options. */
5940
5941   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
5942     {
5943     /* Advance to a unique first byte if there is one. */
5944
5945     if (first_byte >= 0)
5946       {
5947       if (first_byte_caseless)
5948         while (start_match < end_subject && md->lcc[*start_match] != first_byte)
5949           start_match++;
5950       else
5951         while (start_match < end_subject && *start_match != first_byte)
5952           start_match++;
5953       }
5954
5955     /* Or to just after a linebreak for a multiline match */
5956
5957     else if (startline)
5958       {
5959       if (start_match > md->start_subject + start_offset)
5960         {
5961 #ifdef SUPPORT_UTF8
5962         if (utf8)
5963           {
5964           while (start_match < end_subject && !WAS_NEWLINE(start_match))
5965             {
5966             start_match++;
5967             while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
5968               start_match++;
5969             }
5970           }
5971         else
5972 #endif
5973         while (start_match < end_subject && !WAS_NEWLINE(start_match))
5974           start_match++;
5975
5976         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
5977         and we are now at a LF, advance the match position by one more character.
5978         */
5979
5980         if (start_match[-1] == CHAR_CR &&
5981              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
5982              start_match < end_subject &&
5983              *start_match == CHAR_NL)
5984           start_match++;
5985         }
5986       }
5987
5988     /* Or to a non-unique first byte after study */
5989
5990     else if (start_bits != NULL)
5991       {
5992       while (start_match < end_subject)
5993         {
5994         register unsigned int c = *start_match;
5995         if ((start_bits[c/8] & (1 << (c&7))) == 0)
5996           {
5997           start_match++;
5998 #ifdef SUPPORT_UTF8
5999           if (utf8)
6000             while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
6001               start_match++;
6002 #endif
6003           }
6004         else break;
6005         }
6006       }
6007     }   /* Starting optimizations */
6008
6009   /* Restore fudged end_subject */
6010
6011   end_subject = save_end_subject;
6012
6013   /* The following two optimizations are disabled for partial matching or if
6014   disabling is explicitly requested. */
6015
6016   if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
6017     {
6018     /* If the pattern was studied, a minimum subject length may be set. This is
6019     a lower bound; no actual string of that length may actually match the
6020     pattern. Although the value is, strictly, in characters, we treat it as
6021     bytes to avoid spending too much time in this optimization. */
6022
6023     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
6024         (pcre_uint32)(end_subject - start_match) < study->minlength)
6025       {
6026       rc = MATCH_NOMATCH;
6027       break;
6028       }
6029
6030     /* If req_byte is set, we know that that character must appear in the
6031     subject for the match to succeed. If the first character is set, req_byte
6032     must be later in the subject; otherwise the test starts at the match point.
6033     This optimization can save a huge amount of backtracking in patterns with
6034     nested unlimited repeats that aren't going to match. Writing separate code
6035     for cased/caseless versions makes it go faster, as does using an
6036     autoincrement and backing off on a match.
6037
6038     HOWEVER: when the subject string is very, very long, searching to its end
6039     can take a long time, and give bad performance on quite ordinary patterns.
6040     This showed up when somebody was matching something like /^\d+C/ on a
6041     32-megabyte string... so we don't do this when the string is sufficiently
6042     long. */
6043
6044     if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
6045       {
6046       register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
6047
6048       /* We don't need to repeat the search if we haven't yet reached the
6049       place we found it at last time. */
6050
6051       if (p > req_byte_ptr)
6052         {
6053         if (req_byte_caseless)
6054           {
6055           while (p < end_subject)
6056             {
6057             register int pp = *p++;
6058             if (pp == req_byte || pp == req_byte2) { p--; break; }
6059             }
6060           }
6061         else
6062           {
6063           while (p < end_subject)
6064             {
6065             if (*p++ == req_byte) { p--; break; }
6066             }
6067           }
6068
6069         /* If we can't find the required character, break the matching loop,
6070         forcing a match failure. */
6071
6072         if (p >= end_subject)
6073           {
6074           rc = MATCH_NOMATCH;
6075           break;
6076           }
6077
6078         /* If we have found the required character, save the point where we
6079         found it, so that we don't search again next time round the loop if
6080         the start hasn't passed this character yet. */
6081
6082         req_byte_ptr = p;
6083         }
6084       }
6085     }
6086
6087 #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
6088   printf(">>>> Match against: ");
6089   pchars(start_match, end_subject - start_match, TRUE, md);
6090   printf("\n");
6091 #endif
6092
6093   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6094   first starting point for which a partial match was found. */
6095
6096   md->start_match_ptr = start_match;
6097   md->start_used_ptr = start_match;
6098   md->match_call_count = 0;
6099   rc = match(start_match, md->start_code, start_match, NULL, 2, md, ims, NULL,
6100     0, 0);
6101   if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
6102
6103   switch(rc)
6104     {
6105     /* SKIP passes back the next starting point explicitly, but if it is the
6106     same as the match we have just done, treat it as NOMATCH. */
6107
6108     case MATCH_SKIP:
6109     if (md->start_match_ptr != start_match)
6110       {
6111       new_start_match = md->start_match_ptr;
6112       break;
6113       }
6114     /* Fall through */
6115
6116     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6117     the SKIP's arg was not found. We also treat this as NOMATCH. */
6118
6119     case MATCH_SKIP_ARG:
6120     /* Fall through */
6121
6122     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6123     exactly like PRUNE. */
6124
6125     case MATCH_NOMATCH:
6126     case MATCH_PRUNE:
6127     case MATCH_THEN:
6128     new_start_match = start_match + 1;
6129 #ifdef SUPPORT_UTF8
6130     if (utf8)
6131       while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
6132         new_start_match++;
6133 #endif
6134     break;
6135
6136     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6137
6138     case MATCH_COMMIT:
6139     rc = MATCH_NOMATCH;
6140     goto ENDLOOP;
6141
6142     /* Any other return is either a match, or some kind of error. */
6143
6144     default:
6145     goto ENDLOOP;
6146     }
6147
6148   /* Control reaches here for the various types of "no match at this point"
6149   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
6150
6151   rc = MATCH_NOMATCH;
6152
6153   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
6154   newline in the subject (though it may continue over the newline). Therefore,
6155   if we have just failed to match, starting at a newline, do not continue. */
6156
6157   if (firstline && IS_NEWLINE(start_match)) break;
6158
6159   /* Advance to new matching position */
6160
6161   start_match = new_start_match;
6162
6163   /* Break the loop if the pattern is anchored or if we have passed the end of
6164   the subject. */
6165
6166   if (anchored || start_match > end_subject) break;
6167
6168   /* If we have just passed a CR and we are now at a LF, and the pattern does
6169   not contain any explicit matches for \r or \n, and the newline option is CRLF
6170   or ANY or ANYCRLF, advance the match position by one more character. */
6171
6172   if (start_match[-1] == CHAR_CR &&
6173       start_match < end_subject &&
6174       *start_match == CHAR_NL &&
6175       (re->flags & PCRE_HASCRORLF) == 0 &&
6176         (md->nltype == NLTYPE_ANY ||
6177          md->nltype == NLTYPE_ANYCRLF ||
6178          md->nllen == 2))
6179     start_match++;
6180
6181   md->mark = NULL;   /* Reset for start of next match attempt */
6182   }                  /* End of for(;;) "bumpalong" loop */
6183
6184 /* ==========================================================================*/
6185
6186 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
6187 conditions is true:
6188
6189 (1) The pattern is anchored or the match was failed by (*COMMIT);
6190
6191 (2) We are past the end of the subject;
6192
6193 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
6194     this option requests that a match occur at or before the first newline in
6195     the subject.
6196
6197 When we have a match and the offset vector is big enough to deal with any
6198 backreferences, captured substring offsets will already be set up. In the case
6199 where we had to get some local store to hold offsets for backreference
6200 processing, copy those that we can. In this case there need not be overflow if
6201 certain parts of the pattern were not used, even though there are more
6202 capturing parentheses than vector slots. */
6203
6204 ENDLOOP:
6205
6206 if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
6207   {
6208   if (using_temporary_offsets)
6209     {
6210     if (offsetcount >= 4)
6211       {
6212       memcpy(offsets + 2, md->offset_vector + 2,
6213         (offsetcount - 2) * sizeof(int));
6214       DPRINTF(("Copied offsets from temporary memory\n"));
6215       }
6216     if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
6217     DPRINTF(("Freeing temporary memory\n"));
6218     (pcre_free)(md->offset_vector);
6219     }
6220
6221   /* Set the return code to the number of captured strings, or 0 if there are
6222   too many to fit into the vector. */
6223
6224   rc = md->offset_overflow? 0 : md->end_offset_top/2;
6225
6226   /* If there is space, set up the whole thing as substring 0. The value of
6227   md->start_match_ptr might be modified if \K was encountered on the success
6228   matching path. */
6229
6230   if (offsetcount < 2) rc = 0; else
6231     {
6232     offsets[0] = (int)(md->start_match_ptr - md->start_subject);
6233     offsets[1] = (int)(md->end_match_ptr - md->start_subject);
6234     }
6235
6236   DPRINTF((">>>> returning %d\n", rc));
6237   goto RETURN_MARK;
6238   }
6239
6240 /* Control gets here if there has been an error, or if the overall match
6241 attempt has failed at all permitted starting positions. */
6242
6243 if (using_temporary_offsets)
6244   {
6245   DPRINTF(("Freeing temporary memory\n"));
6246   (pcre_free)(md->offset_vector);
6247   }
6248
6249 /* For anything other than nomatch or partial match, just return the code. */
6250
6251 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
6252   {
6253   DPRINTF((">>>> error: returning %d\n", rc));
6254   return rc;
6255   }
6256
6257 /* Handle partial matches - disable any mark data */
6258
6259 if (start_partial != NULL)
6260   {
6261   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
6262   md->mark = NULL;
6263   if (offsetcount > 1)
6264     {
6265     offsets[0] = (int)(start_partial - (USPTR)subject);
6266     offsets[1] = (int)(end_subject - (USPTR)subject);
6267     }
6268   rc = PCRE_ERROR_PARTIAL;
6269   }
6270
6271 /* This is the classic nomatch case */
6272
6273 else
6274   {
6275   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
6276   rc = PCRE_ERROR_NOMATCH;
6277   }
6278
6279 /* Return the MARK data if it has been requested. */
6280
6281 RETURN_MARK:
6282
6283 if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
6284   *(extra_data->mark) = (unsigned char *)(md->mark);
6285 return rc;
6286 }
6287
6288 /* End of pcre_exec.c */