Merge branch '976-disable-assert-checks' into 'master'
[glib.git] / glib / pcre / pcre_study.c
blobee0930718fdb11646e3427138dd5bbd9fa70ece5
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
41 /* This module contains the external function pcre_study(), along with local
42 supporting functions. */
45 #include "config.h"
47 #include "pcre_internal.h"
49 #define SET_BIT(c) start_bits[c/8] |= (1 << (c&7))
51 /* Returns from set_start_bits() */
53 enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN };
57 /*************************************************
58 * Find the minimum subject length for a group *
59 *************************************************/
61 /* Scan a parenthesized group and compute the minimum length of subject that
62 is needed to match it. This is a lower bound; it does not mean there is a
63 string of that length that matches. In UTF8 mode, the result is in characters
64 rather than bytes.
66 Arguments:
67 code pointer to start of group (the bracket)
68 startcode pointer to start of the whole pattern
69 options the compiling options
70 int RECURSE depth
72 Returns: the minimum length
73 -1 if \C in UTF-8 mode or (*ACCEPT) was encountered
74 -2 internal error (missing capturing bracket)
75 -3 internal error (opcode not listed)
78 static int
79 find_minlength(const pcre_uchar *code, const pcre_uchar *startcode, int options,
80 int recurse_depth)
82 int length = -1;
83 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
84 BOOL utf = (options & PCRE_UTF8) != 0;
85 BOOL had_recurse = FALSE;
86 int branchlength = 0;
87 pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE;
89 if (*code == OP_CBRA || *code == OP_SCBRA ||
90 *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE;
92 /* Scan along the opcodes for this branch. If we get to the end of the
93 branch, check the length against that of the other branches. */
95 for (;;)
97 int d, min;
98 pcre_uchar *cs, *ce;
99 int op = *cc;
101 switch (op)
103 case OP_COND:
104 case OP_SCOND:
106 /* If there is only one branch in a condition, the implied branch has zero
107 length, so we don't add anything. This covers the DEFINE "condition"
108 automatically. */
110 cs = cc + GET(cc, 1);
111 if (*cs != OP_ALT)
113 cc = cs + 1 + LINK_SIZE;
114 break;
117 /* Otherwise we can fall through and treat it the same as any other
118 subpattern. */
120 case OP_CBRA:
121 case OP_SCBRA:
122 case OP_BRA:
123 case OP_SBRA:
124 case OP_CBRAPOS:
125 case OP_SCBRAPOS:
126 case OP_BRAPOS:
127 case OP_SBRAPOS:
128 case OP_ONCE:
129 case OP_ONCE_NC:
130 d = find_minlength(cc, startcode, options, recurse_depth);
131 if (d < 0) return d;
132 branchlength += d;
133 do cc += GET(cc, 1); while (*cc == OP_ALT);
134 cc += 1 + LINK_SIZE;
135 break;
137 /* ACCEPT makes things far too complicated; we have to give up. */
139 case OP_ACCEPT:
140 case OP_ASSERT_ACCEPT:
141 return -1;
143 /* Reached end of a branch; if it's a ket it is the end of a nested
144 call. If it's ALT it is an alternation in a nested call. If it is END it's
145 the end of the outer call. All can be handled by the same code. If an
146 ACCEPT was previously encountered, use the length that was in force at that
147 time, and pass back the shortest ACCEPT length. */
149 case OP_ALT:
150 case OP_KET:
151 case OP_KETRMAX:
152 case OP_KETRMIN:
153 case OP_KETRPOS:
154 case OP_END:
155 if (length < 0 || (!had_recurse && branchlength < length))
156 length = branchlength;
157 if (op != OP_ALT) return length;
158 cc += 1 + LINK_SIZE;
159 branchlength = 0;
160 had_recurse = FALSE;
161 break;
163 /* Skip over assertive subpatterns */
165 case OP_ASSERT:
166 case OP_ASSERT_NOT:
167 case OP_ASSERTBACK:
168 case OP_ASSERTBACK_NOT:
169 do cc += GET(cc, 1); while (*cc == OP_ALT);
170 /* Fall through */
172 /* Skip over things that don't match chars */
174 case OP_REVERSE:
175 case OP_CREF:
176 case OP_NCREF:
177 case OP_RREF:
178 case OP_NRREF:
179 case OP_DEF:
180 case OP_CALLOUT:
181 case OP_SOD:
182 case OP_SOM:
183 case OP_EOD:
184 case OP_EODN:
185 case OP_CIRC:
186 case OP_CIRCM:
187 case OP_DOLL:
188 case OP_DOLLM:
189 case OP_NOT_WORD_BOUNDARY:
190 case OP_WORD_BOUNDARY:
191 cc += PRIV(OP_lengths)[*cc];
192 break;
194 /* Skip over a subpattern that has a {0} or {0,x} quantifier */
196 case OP_BRAZERO:
197 case OP_BRAMINZERO:
198 case OP_BRAPOSZERO:
199 case OP_SKIPZERO:
200 cc += PRIV(OP_lengths)[*cc];
201 do cc += GET(cc, 1); while (*cc == OP_ALT);
202 cc += 1 + LINK_SIZE;
203 break;
205 /* Handle literal characters and + repetitions */
207 case OP_CHAR:
208 case OP_CHARI:
209 case OP_NOT:
210 case OP_NOTI:
211 case OP_PLUS:
212 case OP_PLUSI:
213 case OP_MINPLUS:
214 case OP_MINPLUSI:
215 case OP_POSPLUS:
216 case OP_POSPLUSI:
217 case OP_NOTPLUS:
218 case OP_NOTPLUSI:
219 case OP_NOTMINPLUS:
220 case OP_NOTMINPLUSI:
221 case OP_NOTPOSPLUS:
222 case OP_NOTPOSPLUSI:
223 branchlength++;
224 cc += 2;
225 #ifdef SUPPORT_UTF
226 if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
227 #endif
228 break;
230 case OP_TYPEPLUS:
231 case OP_TYPEMINPLUS:
232 case OP_TYPEPOSPLUS:
233 branchlength++;
234 cc += (cc[1] == OP_PROP || cc[1] == OP_NOTPROP)? 4 : 2;
235 break;
237 /* Handle exact repetitions. The count is already in characters, but we
238 need to skip over a multibyte character in UTF8 mode. */
240 case OP_EXACT:
241 case OP_EXACTI:
242 case OP_NOTEXACT:
243 case OP_NOTEXACTI:
244 branchlength += GET2(cc,1);
245 cc += 2 + IMM2_SIZE;
246 #ifdef SUPPORT_UTF
247 if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
248 #endif
249 break;
251 case OP_TYPEEXACT:
252 branchlength += GET2(cc,1);
253 cc += 2 + IMM2_SIZE + ((cc[1 + IMM2_SIZE] == OP_PROP
254 || cc[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0);
255 break;
257 /* Handle single-char non-literal matchers */
259 case OP_PROP:
260 case OP_NOTPROP:
261 cc += 2;
262 /* Fall through */
264 case OP_NOT_DIGIT:
265 case OP_DIGIT:
266 case OP_NOT_WHITESPACE:
267 case OP_WHITESPACE:
268 case OP_NOT_WORDCHAR:
269 case OP_WORDCHAR:
270 case OP_ANY:
271 case OP_ALLANY:
272 case OP_EXTUNI:
273 case OP_HSPACE:
274 case OP_NOT_HSPACE:
275 case OP_VSPACE:
276 case OP_NOT_VSPACE:
277 branchlength++;
278 cc++;
279 break;
281 /* "Any newline" might match two characters, but it also might match just
282 one. */
284 case OP_ANYNL:
285 branchlength += 1;
286 cc++;
287 break;
289 /* The single-byte matcher means we can't proceed in UTF-8 mode. (In
290 non-UTF-8 mode \C will actually be turned into OP_ALLANY, so won't ever
291 appear, but leave the code, just in case.) */
293 case OP_ANYBYTE:
294 #ifdef SUPPORT_UTF
295 if (utf) return -1;
296 #endif
297 branchlength++;
298 cc++;
299 break;
301 /* For repeated character types, we have to test for \p and \P, which have
302 an extra two bytes of parameters. */
304 case OP_TYPESTAR:
305 case OP_TYPEMINSTAR:
306 case OP_TYPEQUERY:
307 case OP_TYPEMINQUERY:
308 case OP_TYPEPOSSTAR:
309 case OP_TYPEPOSQUERY:
310 if (cc[1] == OP_PROP || cc[1] == OP_NOTPROP) cc += 2;
311 cc += PRIV(OP_lengths)[op];
312 break;
314 case OP_TYPEUPTO:
315 case OP_TYPEMINUPTO:
316 case OP_TYPEPOSUPTO:
317 if (cc[1 + IMM2_SIZE] == OP_PROP
318 || cc[1 + IMM2_SIZE] == OP_NOTPROP) cc += 2;
319 cc += PRIV(OP_lengths)[op];
320 break;
322 /* Check a class for variable quantification */
324 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
325 case OP_XCLASS:
326 cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS];
327 /* Fall through */
328 #endif
330 case OP_CLASS:
331 case OP_NCLASS:
332 cc += PRIV(OP_lengths)[OP_CLASS];
334 switch (*cc)
336 case OP_CRPLUS:
337 case OP_CRMINPLUS:
338 branchlength++;
339 /* Fall through */
341 case OP_CRSTAR:
342 case OP_CRMINSTAR:
343 case OP_CRQUERY:
344 case OP_CRMINQUERY:
345 cc++;
346 break;
348 case OP_CRRANGE:
349 case OP_CRMINRANGE:
350 branchlength += GET2(cc,1);
351 cc += 1 + 2 * IMM2_SIZE;
352 break;
354 default:
355 branchlength++;
356 break;
358 break;
360 /* Backreferences and subroutine calls are treated in the same way: we find
361 the minimum length for the subpattern. A recursion, however, causes an
362 a flag to be set that causes the length of this branch to be ignored. The
363 logic is that a recursion can only make sense if there is another
364 alternation that stops the recursing. That will provide the minimum length
365 (when no recursion happens). A backreference within the group that it is
366 referencing behaves in the same way.
368 If PCRE_JAVASCRIPT_COMPAT is set, a backreference to an unset bracket
369 matches an empty string (by default it causes a matching failure), so in
370 that case we must set the minimum length to zero. */
372 case OP_REF:
373 case OP_REFI:
374 if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
376 ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1));
377 if (cs == NULL) return -2;
378 do ce += GET(ce, 1); while (*ce == OP_ALT);
379 if (cc > cs && cc < ce)
381 d = 0;
382 had_recurse = TRUE;
384 else
386 d = find_minlength(cs, startcode, options, recurse_depth);
389 else d = 0;
390 cc += 1 + IMM2_SIZE;
392 /* Handle repeated back references */
394 switch (*cc)
396 case OP_CRSTAR:
397 case OP_CRMINSTAR:
398 case OP_CRQUERY:
399 case OP_CRMINQUERY:
400 min = 0;
401 cc++;
402 break;
404 case OP_CRPLUS:
405 case OP_CRMINPLUS:
406 min = 1;
407 cc++;
408 break;
410 case OP_CRRANGE:
411 case OP_CRMINRANGE:
412 min = GET2(cc, 1);
413 cc += 1 + 2 * IMM2_SIZE;
414 break;
416 default:
417 min = 1;
418 break;
421 branchlength += min * d;
422 break;
424 /* We can easily detect direct recursion, but not mutual recursion. This is
425 caught by a recursion depth count. */
427 case OP_RECURSE:
428 cs = ce = (pcre_uchar *)startcode + GET(cc, 1);
429 do ce += GET(ce, 1); while (*ce == OP_ALT);
430 if ((cc > cs && cc < ce) || recurse_depth > 10)
431 had_recurse = TRUE;
432 else
434 branchlength += find_minlength(cs, startcode, options, recurse_depth + 1);
436 cc += 1 + LINK_SIZE;
437 break;
439 /* Anything else does not or need not match a character. We can get the
440 item's length from the table, but for those that can match zero occurrences
441 of a character, we must take special action for UTF-8 characters. As it
442 happens, the "NOT" versions of these opcodes are used at present only for
443 ASCII characters, so they could be omitted from this list. However, in
444 future that may change, so we include them here so as not to leave a
445 gotcha for a future maintainer. */
447 case OP_UPTO:
448 case OP_UPTOI:
449 case OP_NOTUPTO:
450 case OP_NOTUPTOI:
451 case OP_MINUPTO:
452 case OP_MINUPTOI:
453 case OP_NOTMINUPTO:
454 case OP_NOTMINUPTOI:
455 case OP_POSUPTO:
456 case OP_POSUPTOI:
457 case OP_NOTPOSUPTO:
458 case OP_NOTPOSUPTOI:
460 case OP_STAR:
461 case OP_STARI:
462 case OP_NOTSTAR:
463 case OP_NOTSTARI:
464 case OP_MINSTAR:
465 case OP_MINSTARI:
466 case OP_NOTMINSTAR:
467 case OP_NOTMINSTARI:
468 case OP_POSSTAR:
469 case OP_POSSTARI:
470 case OP_NOTPOSSTAR:
471 case OP_NOTPOSSTARI:
473 case OP_QUERY:
474 case OP_QUERYI:
475 case OP_NOTQUERY:
476 case OP_NOTQUERYI:
477 case OP_MINQUERY:
478 case OP_MINQUERYI:
479 case OP_NOTMINQUERY:
480 case OP_NOTMINQUERYI:
481 case OP_POSQUERY:
482 case OP_POSQUERYI:
483 case OP_NOTPOSQUERY:
484 case OP_NOTPOSQUERYI:
486 cc += PRIV(OP_lengths)[op];
487 #ifdef SUPPORT_UTF
488 if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
489 #endif
490 break;
492 /* Skip these, but we need to add in the name length. */
494 case OP_MARK:
495 case OP_PRUNE_ARG:
496 case OP_SKIP_ARG:
497 case OP_THEN_ARG:
498 cc += PRIV(OP_lengths)[op] + cc[1];
499 break;
501 /* The remaining opcodes are just skipped over. */
503 case OP_CLOSE:
504 case OP_COMMIT:
505 case OP_FAIL:
506 case OP_PRUNE:
507 case OP_SET_SOM:
508 case OP_SKIP:
509 case OP_THEN:
510 cc += PRIV(OP_lengths)[op];
511 break;
513 /* This should not occur: we list all opcodes explicitly so that when
514 new ones get added they are properly considered. */
516 default:
517 return -3;
520 /* Control never gets here */
525 /*************************************************
526 * Set a bit and maybe its alternate case *
527 *************************************************/
529 /* Given a character, set its first byte's bit in the table, and also the
530 corresponding bit for the other version of a letter if we are caseless. In
531 UTF-8 mode, for characters greater than 127, we can only do the caseless thing
532 when Unicode property support is available.
534 Arguments:
535 start_bits points to the bit map
536 p points to the character
537 caseless the caseless flag
538 cd the block with char table pointers
539 utf TRUE for UTF-8 / UTF-16 mode
541 Returns: pointer after the character
544 static const pcre_uchar *
545 set_table_bit(pcre_uint8 *start_bits, const pcre_uchar *p, BOOL caseless,
546 compile_data *cd, BOOL utf)
548 unsigned int c = *p;
550 #ifdef COMPILE_PCRE8
551 SET_BIT(c);
553 #ifdef SUPPORT_UTF
554 if (utf && c > 127)
556 GETCHARINC(c, p);
557 #ifdef SUPPORT_UCP
558 if (caseless)
560 pcre_uchar buff[6];
561 c = UCD_OTHERCASE(c);
562 (void)PRIV(ord2utf)(c, buff);
563 SET_BIT(buff[0]);
565 #endif
566 return p;
568 #endif
570 /* Not UTF-8 mode, or character is less than 127. */
572 if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
573 return p + 1;
574 #endif
576 #ifdef COMPILE_PCRE16
577 if (c > 0xff)
579 c = 0xff;
580 caseless = FALSE;
582 SET_BIT(c);
584 #ifdef SUPPORT_UTF
585 if (utf && c > 127)
587 GETCHARINC(c, p);
588 #ifdef SUPPORT_UCP
589 if (caseless)
591 c = UCD_OTHERCASE(c);
592 if (c > 0xff)
593 c = 0xff;
594 SET_BIT(c);
596 #endif
597 return p;
599 #endif
601 if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
602 return p + 1;
603 #endif
608 /*************************************************
609 * Set bits for a positive character type *
610 *************************************************/
612 /* This function sets starting bits for a character type. In UTF-8 mode, we can
613 only do a direct setting for bytes less than 128, as otherwise there can be
614 confusion with bytes in the middle of UTF-8 characters. In a "traditional"
615 environment, the tables will only recognize ASCII characters anyway, but in at
616 least one Windows environment, some higher bytes bits were set in the tables.
617 So we deal with that case by considering the UTF-8 encoding.
619 Arguments:
620 start_bits the starting bitmap
621 cbit type the type of character wanted
622 table_limit 32 for non-UTF-8; 16 for UTF-8
623 cd the block with char table pointers
625 Returns: nothing
628 static void
629 set_type_bits(pcre_uint8 *start_bits, int cbit_type, int table_limit,
630 compile_data *cd)
632 int c;
633 for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type];
634 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
635 if (table_limit == 32) return;
636 for (c = 128; c < 256; c++)
638 if ((cd->cbits[c/8] & (1 << (c&7))) != 0)
640 pcre_uchar buff[6];
641 (void)PRIV(ord2utf)(c, buff);
642 SET_BIT(buff[0]);
645 #endif
649 /*************************************************
650 * Set bits for a negative character type *
651 *************************************************/
653 /* This function sets starting bits for a negative character type such as \D.
654 In UTF-8 mode, we can only do a direct setting for bytes less than 128, as
655 otherwise there can be confusion with bytes in the middle of UTF-8 characters.
656 Unlike in the positive case, where we can set appropriate starting bits for
657 specific high-valued UTF-8 characters, in this case we have to set the bits for
658 all high-valued characters. The lowest is 0xc2, but we overkill by starting at
659 0xc0 (192) for simplicity.
661 Arguments:
662 start_bits the starting bitmap
663 cbit type the type of character wanted
664 table_limit 32 for non-UTF-8; 16 for UTF-8
665 cd the block with char table pointers
667 Returns: nothing
670 static void
671 set_nottype_bits(pcre_uint8 *start_bits, int cbit_type, int table_limit,
672 compile_data *cd)
674 int c;
675 for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type];
676 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
677 if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff;
678 #endif
683 /*************************************************
684 * Create bitmap of starting bytes *
685 *************************************************/
687 /* This function scans a compiled unanchored expression recursively and
688 attempts to build a bitmap of the set of possible starting bytes. As time goes
689 by, we may be able to get more clever at doing this. The SSB_CONTINUE return is
690 useful for parenthesized groups in patterns such as (a*)b where the group
691 provides some optional starting bytes but scanning must continue at the outer
692 level to find at least one mandatory byte. At the outermost level, this
693 function fails unless the result is SSB_DONE.
695 Arguments:
696 code points to an expression
697 start_bits points to a 32-byte table, initialized to 0
698 utf TRUE if in UTF-8 / UTF-16 mode
699 cd the block with char table pointers
701 Returns: SSB_FAIL => Failed to find any starting bytes
702 SSB_DONE => Found mandatory starting bytes
703 SSB_CONTINUE => Found optional starting bytes
704 SSB_UNKNOWN => Hit an unrecognized opcode
707 static int
708 set_start_bits(const pcre_uchar *code, pcre_uint8 *start_bits, BOOL utf,
709 compile_data *cd)
711 int c;
712 int yield = SSB_DONE;
713 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
714 int table_limit = utf? 16:32;
715 #else
716 int table_limit = 32;
717 #endif
719 #if 0
720 /* ========================================================================= */
721 /* The following comment and code was inserted in January 1999. In May 2006,
722 when it was observed to cause compiler warnings about unused values, I took it
723 out again. If anybody is still using OS/2, they will have to put it back
724 manually. */
726 /* This next statement and the later reference to dummy are here in order to
727 trick the optimizer of the IBM C compiler for OS/2 into generating correct
728 code. Apparently IBM isn't going to fix the problem, and we would rather not
729 disable optimization (in this module it actually makes a big difference, and
730 the pcre module can use all the optimization it can get). */
732 volatile int dummy;
733 /* ========================================================================= */
734 #endif
738 BOOL try_next = TRUE;
739 const pcre_uchar *tcode = code + 1 + LINK_SIZE;
741 if (*code == OP_CBRA || *code == OP_SCBRA ||
742 *code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += IMM2_SIZE;
744 while (try_next) /* Loop for items in this branch */
746 int rc;
748 switch(*tcode)
750 /* If we reach something we don't understand, it means a new opcode has
751 been created that hasn't been added to this code. Hopefully this problem
752 will be discovered during testing. */
754 default:
755 return SSB_UNKNOWN;
757 /* Fail for a valid opcode that implies no starting bits. */
759 case OP_ACCEPT:
760 case OP_ASSERT_ACCEPT:
761 case OP_ALLANY:
762 case OP_ANY:
763 case OP_ANYBYTE:
764 case OP_CIRC:
765 case OP_CIRCM:
766 case OP_CLOSE:
767 case OP_COMMIT:
768 case OP_COND:
769 case OP_CREF:
770 case OP_DEF:
771 case OP_DOLL:
772 case OP_DOLLM:
773 case OP_END:
774 case OP_EOD:
775 case OP_EODN:
776 case OP_EXTUNI:
777 case OP_FAIL:
778 case OP_MARK:
779 case OP_NCREF:
780 case OP_NOT:
781 case OP_NOTEXACT:
782 case OP_NOTEXACTI:
783 case OP_NOTI:
784 case OP_NOTMINPLUS:
785 case OP_NOTMINPLUSI:
786 case OP_NOTMINQUERY:
787 case OP_NOTMINQUERYI:
788 case OP_NOTMINSTAR:
789 case OP_NOTMINSTARI:
790 case OP_NOTMINUPTO:
791 case OP_NOTMINUPTOI:
792 case OP_NOTPLUS:
793 case OP_NOTPLUSI:
794 case OP_NOTPOSPLUS:
795 case OP_NOTPOSPLUSI:
796 case OP_NOTPOSQUERY:
797 case OP_NOTPOSQUERYI:
798 case OP_NOTPOSSTAR:
799 case OP_NOTPOSSTARI:
800 case OP_NOTPOSUPTO:
801 case OP_NOTPOSUPTOI:
802 case OP_NOTPROP:
803 case OP_NOTQUERY:
804 case OP_NOTQUERYI:
805 case OP_NOTSTAR:
806 case OP_NOTSTARI:
807 case OP_NOTUPTO:
808 case OP_NOTUPTOI:
809 case OP_NOT_HSPACE:
810 case OP_NOT_VSPACE:
811 case OP_NRREF:
812 case OP_PROP:
813 case OP_PRUNE:
814 case OP_PRUNE_ARG:
815 case OP_RECURSE:
816 case OP_REF:
817 case OP_REFI:
818 case OP_REVERSE:
819 case OP_RREF:
820 case OP_SCOND:
821 case OP_SET_SOM:
822 case OP_SKIP:
823 case OP_SKIP_ARG:
824 case OP_SOD:
825 case OP_SOM:
826 case OP_THEN:
827 case OP_THEN_ARG:
828 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
829 case OP_XCLASS:
830 #endif
831 return SSB_FAIL;
833 /* We can ignore word boundary tests. */
835 case OP_WORD_BOUNDARY:
836 case OP_NOT_WORD_BOUNDARY:
837 tcode++;
838 break;
840 /* If we hit a bracket or a positive lookahead assertion, recurse to set
841 bits from within the subpattern. If it can't find anything, we have to
842 give up. If it finds some mandatory character(s), we are done for this
843 branch. Otherwise, carry on scanning after the subpattern. */
845 case OP_BRA:
846 case OP_SBRA:
847 case OP_CBRA:
848 case OP_SCBRA:
849 case OP_BRAPOS:
850 case OP_SBRAPOS:
851 case OP_CBRAPOS:
852 case OP_SCBRAPOS:
853 case OP_ONCE:
854 case OP_ONCE_NC:
855 case OP_ASSERT:
856 rc = set_start_bits(tcode, start_bits, utf, cd);
857 if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
858 if (rc == SSB_DONE) try_next = FALSE; else
860 do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
861 tcode += 1 + LINK_SIZE;
863 break;
865 /* If we hit ALT or KET, it means we haven't found anything mandatory in
866 this branch, though we might have found something optional. For ALT, we
867 continue with the next alternative, but we have to arrange that the final
868 result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET,
869 return SSB_CONTINUE: if this is the top level, that indicates failure,
870 but after a nested subpattern, it causes scanning to continue. */
872 case OP_ALT:
873 yield = SSB_CONTINUE;
874 try_next = FALSE;
875 break;
877 case OP_KET:
878 case OP_KETRMAX:
879 case OP_KETRMIN:
880 case OP_KETRPOS:
881 return SSB_CONTINUE;
883 /* Skip over callout */
885 case OP_CALLOUT:
886 tcode += 2 + 2*LINK_SIZE;
887 break;
889 /* Skip over lookbehind and negative lookahead assertions */
891 case OP_ASSERT_NOT:
892 case OP_ASSERTBACK:
893 case OP_ASSERTBACK_NOT:
894 do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
895 tcode += 1 + LINK_SIZE;
896 break;
898 /* BRAZERO does the bracket, but carries on. */
900 case OP_BRAZERO:
901 case OP_BRAMINZERO:
902 case OP_BRAPOSZERO:
903 rc = set_start_bits(++tcode, start_bits, utf, cd);
904 if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
905 /* =========================================================================
906 See the comment at the head of this function concerning the next line,
907 which was an old fudge for the benefit of OS/2.
908 dummy = 1;
909 ========================================================================= */
910 do tcode += GET(tcode,1); while (*tcode == OP_ALT);
911 tcode += 1 + LINK_SIZE;
912 break;
914 /* SKIPZERO skips the bracket. */
916 case OP_SKIPZERO:
917 tcode++;
918 do tcode += GET(tcode,1); while (*tcode == OP_ALT);
919 tcode += 1 + LINK_SIZE;
920 break;
922 /* Single-char * or ? sets the bit and tries the next item */
924 case OP_STAR:
925 case OP_MINSTAR:
926 case OP_POSSTAR:
927 case OP_QUERY:
928 case OP_MINQUERY:
929 case OP_POSQUERY:
930 tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf);
931 break;
933 case OP_STARI:
934 case OP_MINSTARI:
935 case OP_POSSTARI:
936 case OP_QUERYI:
937 case OP_MINQUERYI:
938 case OP_POSQUERYI:
939 tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf);
940 break;
942 /* Single-char upto sets the bit and tries the next */
944 case OP_UPTO:
945 case OP_MINUPTO:
946 case OP_POSUPTO:
947 tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, FALSE, cd, utf);
948 break;
950 case OP_UPTOI:
951 case OP_MINUPTOI:
952 case OP_POSUPTOI:
953 tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, TRUE, cd, utf);
954 break;
956 /* At least one single char sets the bit and stops */
958 case OP_EXACT:
959 tcode += IMM2_SIZE;
960 /* Fall through */
961 case OP_CHAR:
962 case OP_PLUS:
963 case OP_MINPLUS:
964 case OP_POSPLUS:
965 (void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf);
966 try_next = FALSE;
967 break;
969 case OP_EXACTI:
970 tcode += IMM2_SIZE;
971 /* Fall through */
972 case OP_CHARI:
973 case OP_PLUSI:
974 case OP_MINPLUSI:
975 case OP_POSPLUSI:
976 (void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf);
977 try_next = FALSE;
978 break;
980 /* Special spacing and line-terminating items. These recognize specific
981 lists of characters. The difference between VSPACE and ANYNL is that the
982 latter can match the two-character CRLF sequence, but that is not
983 relevant for finding the first character, so their code here is
984 identical. */
986 case OP_HSPACE:
987 SET_BIT(0x09);
988 SET_BIT(0x20);
989 #ifdef SUPPORT_UTF
990 if (utf)
992 #ifdef COMPILE_PCRE8
993 SET_BIT(0xC2); /* For U+00A0 */
994 SET_BIT(0xE1); /* For U+1680, U+180E */
995 SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */
996 SET_BIT(0xE3); /* For U+3000 */
997 #endif
998 #ifdef COMPILE_PCRE16
999 SET_BIT(0xA0);
1000 SET_BIT(0xFF); /* For characters > 255 */
1001 #endif
1003 else
1004 #endif /* SUPPORT_UTF */
1006 SET_BIT(0xA0);
1007 #ifdef COMPILE_PCRE16
1008 SET_BIT(0xFF); /* For characters > 255 */
1009 #endif
1011 try_next = FALSE;
1012 break;
1014 case OP_ANYNL:
1015 case OP_VSPACE:
1016 SET_BIT(0x0A);
1017 SET_BIT(0x0B);
1018 SET_BIT(0x0C);
1019 SET_BIT(0x0D);
1020 #ifdef SUPPORT_UTF
1021 if (utf)
1023 #ifdef COMPILE_PCRE8
1024 SET_BIT(0xC2); /* For U+0085 */
1025 SET_BIT(0xE2); /* For U+2028, U+2029 */
1026 #endif
1027 #ifdef COMPILE_PCRE16
1028 SET_BIT(0x85);
1029 SET_BIT(0xFF); /* For characters > 255 */
1030 #endif
1032 else
1033 #endif /* SUPPORT_UTF */
1035 SET_BIT(0x85);
1036 #ifdef COMPILE_PCRE16
1037 SET_BIT(0xFF); /* For characters > 255 */
1038 #endif
1040 try_next = FALSE;
1041 break;
1043 /* Single character types set the bits and stop. Note that if PCRE_UCP
1044 is set, we do not see these op codes because \d etc are converted to
1045 properties. Therefore, these apply in the case when only characters less
1046 than 256 are recognized to match the types. */
1048 case OP_NOT_DIGIT:
1049 set_nottype_bits(start_bits, cbit_digit, table_limit, cd);
1050 try_next = FALSE;
1051 break;
1053 case OP_DIGIT:
1054 set_type_bits(start_bits, cbit_digit, table_limit, cd);
1055 try_next = FALSE;
1056 break;
1058 /* The cbit_space table has vertical tab as whitespace; we have to
1059 ensure it is set as not whitespace. */
1061 case OP_NOT_WHITESPACE:
1062 set_nottype_bits(start_bits, cbit_space, table_limit, cd);
1063 start_bits[1] |= 0x08;
1064 try_next = FALSE;
1065 break;
1067 /* The cbit_space table has vertical tab as whitespace; we have to
1068 not set it from the table. */
1070 case OP_WHITESPACE:
1071 c = start_bits[1]; /* Save in case it was already set */
1072 set_type_bits(start_bits, cbit_space, table_limit, cd);
1073 start_bits[1] = (start_bits[1] & ~0x08) | c;
1074 try_next = FALSE;
1075 break;
1077 case OP_NOT_WORDCHAR:
1078 set_nottype_bits(start_bits, cbit_word, table_limit, cd);
1079 try_next = FALSE;
1080 break;
1082 case OP_WORDCHAR:
1083 set_type_bits(start_bits, cbit_word, table_limit, cd);
1084 try_next = FALSE;
1085 break;
1087 /* One or more character type fudges the pointer and restarts, knowing
1088 it will hit a single character type and stop there. */
1090 case OP_TYPEPLUS:
1091 case OP_TYPEMINPLUS:
1092 case OP_TYPEPOSPLUS:
1093 tcode++;
1094 break;
1096 case OP_TYPEEXACT:
1097 tcode += 1 + IMM2_SIZE;
1098 break;
1100 /* Zero or more repeats of character types set the bits and then
1101 try again. */
1103 case OP_TYPEUPTO:
1104 case OP_TYPEMINUPTO:
1105 case OP_TYPEPOSUPTO:
1106 tcode += IMM2_SIZE; /* Fall through */
1108 case OP_TYPESTAR:
1109 case OP_TYPEMINSTAR:
1110 case OP_TYPEPOSSTAR:
1111 case OP_TYPEQUERY:
1112 case OP_TYPEMINQUERY:
1113 case OP_TYPEPOSQUERY:
1114 switch(tcode[1])
1116 default:
1117 case OP_ANY:
1118 case OP_ALLANY:
1119 return SSB_FAIL;
1121 case OP_HSPACE:
1122 SET_BIT(0x09);
1123 SET_BIT(0x20);
1124 #ifdef SUPPORT_UTF
1125 if (utf)
1127 #ifdef COMPILE_PCRE8
1128 SET_BIT(0xC2); /* For U+00A0 */
1129 SET_BIT(0xE1); /* For U+1680, U+180E */
1130 SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */
1131 SET_BIT(0xE3); /* For U+3000 */
1132 #endif
1133 #ifdef COMPILE_PCRE16
1134 SET_BIT(0xA0);
1135 SET_BIT(0xFF); /* For characters > 255 */
1136 #endif
1138 else
1139 #endif /* SUPPORT_UTF */
1140 SET_BIT(0xA0);
1141 break;
1143 case OP_ANYNL:
1144 case OP_VSPACE:
1145 SET_BIT(0x0A);
1146 SET_BIT(0x0B);
1147 SET_BIT(0x0C);
1148 SET_BIT(0x0D);
1149 #ifdef SUPPORT_UTF
1150 if (utf)
1152 #ifdef COMPILE_PCRE8
1153 SET_BIT(0xC2); /* For U+0085 */
1154 SET_BIT(0xE2); /* For U+2028, U+2029 */
1155 #endif
1156 #ifdef COMPILE_PCRE16
1157 SET_BIT(0x85);
1158 SET_BIT(0xFF); /* For characters > 255 */
1159 #endif
1161 else
1162 #endif /* SUPPORT_UTF */
1163 SET_BIT(0x85);
1164 break;
1166 case OP_NOT_DIGIT:
1167 set_nottype_bits(start_bits, cbit_digit, table_limit, cd);
1168 break;
1170 case OP_DIGIT:
1171 set_type_bits(start_bits, cbit_digit, table_limit, cd);
1172 break;
1174 /* The cbit_space table has vertical tab as whitespace; we have to
1175 ensure it gets set as not whitespace. */
1177 case OP_NOT_WHITESPACE:
1178 set_nottype_bits(start_bits, cbit_space, table_limit, cd);
1179 start_bits[1] |= 0x08;
1180 break;
1182 /* The cbit_space table has vertical tab as whitespace; we have to
1183 avoid setting it. */
1185 case OP_WHITESPACE:
1186 c = start_bits[1]; /* Save in case it was already set */
1187 set_type_bits(start_bits, cbit_space, table_limit, cd);
1188 start_bits[1] = (start_bits[1] & ~0x08) | c;
1189 break;
1191 case OP_NOT_WORDCHAR:
1192 set_nottype_bits(start_bits, cbit_word, table_limit, cd);
1193 break;
1195 case OP_WORDCHAR:
1196 set_type_bits(start_bits, cbit_word, table_limit, cd);
1197 break;
1200 tcode += 2;
1201 break;
1203 /* Character class where all the information is in a bit map: set the
1204 bits and either carry on or not, according to the repeat count. If it was
1205 a negative class, and we are operating with UTF-8 characters, any byte
1206 with a value >= 0xc4 is a potentially valid starter because it starts a
1207 character with a value > 255. */
1209 case OP_NCLASS:
1210 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1211 if (utf)
1213 start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */
1214 memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */
1216 #endif
1217 #ifdef COMPILE_PCRE16
1218 SET_BIT(0xFF); /* For characters > 255 */
1219 #endif
1220 /* Fall through */
1222 case OP_CLASS:
1224 pcre_uint8 *map;
1225 tcode++;
1226 map = (pcre_uint8 *)tcode;
1228 /* In UTF-8 mode, the bits in a bit map correspond to character
1229 values, not to byte values. However, the bit map we are constructing is
1230 for byte values. So we have to do a conversion for characters whose
1231 value is > 127. In fact, there are only two possible starting bytes for
1232 characters in the range 128 - 255. */
1234 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1235 if (utf)
1237 for (c = 0; c < 16; c++) start_bits[c] |= map[c];
1238 for (c = 128; c < 256; c++)
1240 if ((map[c/8] && (1 << (c&7))) != 0)
1242 int d = (c >> 6) | 0xc0; /* Set bit for this starter */
1243 start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */
1244 c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */
1248 else
1249 #endif
1251 /* In non-UTF-8 mode, the two bit maps are completely compatible. */
1252 for (c = 0; c < 32; c++) start_bits[c] |= map[c];
1255 /* Advance past the bit map, and act on what follows. For a zero
1256 minimum repeat, continue; otherwise stop processing. */
1258 tcode += 32 / sizeof(pcre_uchar);
1259 switch (*tcode)
1261 case OP_CRSTAR:
1262 case OP_CRMINSTAR:
1263 case OP_CRQUERY:
1264 case OP_CRMINQUERY:
1265 tcode++;
1266 break;
1268 case OP_CRRANGE:
1269 case OP_CRMINRANGE:
1270 if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE;
1271 else try_next = FALSE;
1272 break;
1274 default:
1275 try_next = FALSE;
1276 break;
1279 break; /* End of bitmap class handling */
1281 } /* End of switch */
1282 } /* End of try_next loop */
1284 code += GET(code, 1); /* Advance to next branch */
1286 while (*code == OP_ALT);
1287 return yield;
1294 /*************************************************
1295 * Study a compiled expression *
1296 *************************************************/
1298 /* This function is handed a compiled expression that it must study to produce
1299 information that will speed up the matching. It returns a pcre[16]_extra block
1300 which then gets handed back to pcre_exec().
1302 Arguments:
1303 re points to the compiled expression
1304 options contains option bits
1305 errorptr points to where to place error messages;
1306 set NULL unless error
1308 Returns: pointer to a pcre[16]_extra block, with study_data filled in and
1309 the appropriate flags set;
1310 NULL on error or if no optimization possible
1313 #ifdef COMPILE_PCRE8
1314 PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
1315 pcre_study(const pcre *external_re, int options, const char **errorptr)
1316 #else
1317 PCRE_EXP_DEFN pcre16_extra * PCRE_CALL_CONVENTION
1318 pcre16_study(const pcre16 *external_re, int options, const char **errorptr)
1319 #endif
1321 int min;
1322 BOOL bits_set = FALSE;
1323 pcre_uint8 start_bits[32];
1324 PUBL(extra) *extra = NULL;
1325 pcre_study_data *study;
1326 const pcre_uint8 *tables;
1327 pcre_uchar *code;
1328 compile_data compile_block;
1329 const REAL_PCRE *re = (const REAL_PCRE *)external_re;
1331 *errorptr = NULL;
1333 if (re == NULL || re->magic_number != MAGIC_NUMBER)
1335 *errorptr = "argument is not a compiled regular expression";
1336 return NULL;
1339 if ((re->flags & PCRE_MODE) == 0)
1341 #ifdef COMPILE_PCRE8
1342 *errorptr = "argument is compiled in 16 bit mode";
1343 #else
1344 *errorptr = "argument is compiled in 8 bit mode";
1345 #endif
1346 return NULL;
1349 if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
1351 *errorptr = "unknown or incorrect option bit(s) set";
1352 return NULL;
1355 code = (pcre_uchar *)re + re->name_table_offset +
1356 (re->name_count * re->name_entry_size);
1358 /* For an anchored pattern, or an unanchored pattern that has a first char, or
1359 a multiline pattern that matches only at "line starts", there is no point in
1360 seeking a list of starting bytes. */
1362 if ((re->options & PCRE_ANCHORED) == 0 &&
1363 (re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) == 0)
1365 int rc;
1367 /* Set the character tables in the block that is passed around */
1369 tables = re->tables;
1371 #ifdef COMPILE_PCRE8
1372 if (tables == NULL)
1373 (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
1374 (void *)(&tables));
1375 #else
1376 if (tables == NULL)
1377 (void)pcre16_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
1378 (void *)(&tables));
1379 #endif
1381 compile_block.lcc = tables + lcc_offset;
1382 compile_block.fcc = tables + fcc_offset;
1383 compile_block.cbits = tables + cbits_offset;
1384 compile_block.ctypes = tables + ctypes_offset;
1386 /* See if we can find a fixed set of initial characters for the pattern. */
1388 memset(start_bits, 0, 32 * sizeof(pcre_uint8));
1389 rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0,
1390 &compile_block);
1391 bits_set = rc == SSB_DONE;
1392 if (rc == SSB_UNKNOWN)
1394 *errorptr = "internal error: opcode not recognized";
1395 return NULL;
1399 /* Find the minimum length of subject string. */
1401 switch(min = find_minlength(code, code, re->options, 0))
1403 case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
1404 case -3: *errorptr = "internal error: opcode not recognized"; return NULL;
1405 default: break;
1408 /* If a set of starting bytes has been identified, or if the minimum length is
1409 greater than zero, or if JIT optimization has been requested, get a
1410 pcre[16]_extra block and a pcre_study_data block. The study data is put in the
1411 latter, which is pointed to by the former, which may also get additional data
1412 set later by the calling program. At the moment, the size of pcre_study_data
1413 is fixed. We nevertheless save it in a field for returning via the
1414 pcre_fullinfo() function so that if it becomes variable in the future,
1415 we don't have to change that code. */
1417 if (bits_set || min > 0
1418 #ifdef SUPPORT_JIT
1419 || (options & (PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
1420 | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)) != 0
1421 #endif
1424 extra = (PUBL(extra) *)(PUBL(malloc))
1425 (sizeof(PUBL(extra)) + sizeof(pcre_study_data));
1426 if (extra == NULL)
1428 *errorptr = "failed to get memory";
1429 return NULL;
1432 study = (pcre_study_data *)((char *)extra + sizeof(PUBL(extra)));
1433 extra->flags = PCRE_EXTRA_STUDY_DATA;
1434 extra->study_data = study;
1436 study->size = sizeof(pcre_study_data);
1437 study->flags = 0;
1439 /* Set the start bits always, to avoid unset memory errors if the
1440 study data is written to a file, but set the flag only if any of the bits
1441 are set, to save time looking when none are. */
1443 if (bits_set)
1445 study->flags |= PCRE_STUDY_MAPPED;
1446 memcpy(study->start_bits, start_bits, sizeof(start_bits));
1448 else memset(study->start_bits, 0, 32 * sizeof(pcre_uint8));
1450 #ifdef PCRE_DEBUG
1451 if (bits_set)
1453 pcre_uint8 *ptr = start_bits;
1454 int i;
1456 printf("Start bits:\n");
1457 for (i = 0; i < 32; i++)
1458 printf("%3d: %02x%s", i * 8, *ptr++, ((i + 1) & 0x7) != 0? " " : "\n");
1460 #endif
1462 /* Always set the minlength value in the block, because the JIT compiler
1463 makes use of it. However, don't set the bit unless the length is greater than
1464 zero - the interpretive pcre_exec() and pcre_dfa_exec() needn't waste time
1465 checking the zero case. */
1467 if (min > 0)
1469 study->flags |= PCRE_STUDY_MINLEN;
1470 study->minlength = min;
1472 else study->minlength = 0;
1474 /* If JIT support was compiled and requested, attempt the JIT compilation.
1475 If no starting bytes were found, and the minimum length is zero, and JIT
1476 compilation fails, abandon the extra block and return NULL. */
1478 #ifdef SUPPORT_JIT
1479 extra->executable_jit = NULL;
1480 if ((options & PCRE_STUDY_JIT_COMPILE) != 0)
1481 PRIV(jit_compile)(re, extra, JIT_COMPILE);
1482 if ((options & PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE) != 0)
1483 PRIV(jit_compile)(re, extra, JIT_PARTIAL_SOFT_COMPILE);
1484 if ((options & PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE) != 0)
1485 PRIV(jit_compile)(re, extra, JIT_PARTIAL_HARD_COMPILE);
1487 if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0)
1489 #ifdef COMPILE_PCRE8
1490 pcre_free_study(extra);
1491 #endif
1492 #ifdef COMPILE_PCRE16
1493 pcre16_free_study(extra);
1494 #endif
1495 extra = NULL;
1497 #endif
1500 return extra;
1504 /*************************************************
1505 * Free the study data *
1506 *************************************************/
1508 /* This function frees the memory that was obtained by pcre_study().
1510 Argument: a pointer to the pcre[16]_extra block
1511 Returns: nothing
1514 #ifdef COMPILE_PCRE8
1515 PCRE_EXP_DEFN void
1516 pcre_free_study(pcre_extra *extra)
1517 #else
1518 PCRE_EXP_DEFN void
1519 pcre16_free_study(pcre16_extra *extra)
1520 #endif
1522 if (extra == NULL)
1523 return;
1524 #ifdef SUPPORT_JIT
1525 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
1526 extra->executable_jit != NULL)
1527 PRIV(jit_free)(extra->executable_jit);
1528 #endif
1529 PUBL(free)(extra);
1532 /* End of pcre_study.c */