1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set sw=4 ts=8 et tw=78:
4 * ***** BEGIN LICENSE BLOCK *****
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
7 * The contents of this file are subject to the Mozilla Public License Version
8 * 1.1 (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 * for the specific language governing rights and limitations under the
17 * The Original Code is Mozilla Communicator client code, released
20 * The Initial Developer of the Original Code is
21 * Netscape Communications Corporation.
22 * Portions created by the Initial Developer are Copyright (C) 1998
23 * the Initial Developer. All Rights Reserved.
27 * Alternatively, the contents of this file may be used under the terms of
28 * either of the GNU General Public License Version 2 or later (the "GPL"),
29 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
39 * ***** END LICENSE BLOCK ***** */
42 * JS regular expressions, after Perl.
49 #include "jsarena.h" /* Added by JSIFY */
50 #include "jsutil.h" /* Added by JSIFY */
55 #include "jsversion.h"
69 #define REOP_DEF(opcode, name) opcode,
70 #include "jsreops.tbl"
72 REOP_LIMIT
/* META: no operator >= to this */
75 #define REOP_IS_SIMPLE(op) ((op) <= REOP_NCLASS)
78 const char *reop_names
[] = {
79 #define REOP_DEF(opcode, name) name,
80 #include "jsreops.tbl"
88 re_debug(const char *fmt
, ...) __attribute__ ((format(printf
, 1, 2)));
93 re_debug(const char *fmt
, ...)
99 retval
= vprintf(fmt
, ap
);
105 re_debug_chars(const jschar
*chrs
, size_t length
)
110 while (*chrs
&& i
++ < length
) {
111 putchar((char)*chrs
++);
115 #else /* !REGEXP_DEBUG */
116 /* This should be optimized to a no-op by our tier-1 compilers. */
118 re_debug(const char *fmt
, ...)
124 re_debug_chars(const jschar
*chrs
, size_t length
)
127 #endif /* !REGEXP_DEBUG */
130 REOp op
; /* r.e. op bytecode */
131 RENode
*next
; /* next in concatenation order */
132 void *kid
; /* first operand */
134 void *kid2
; /* second operand */
135 jsint num
; /* could be a number */
136 size_t parenIndex
; /* or a parenthesis index */
137 struct { /* or a quantifier range */
142 struct { /* or a character class */
144 size_t kidlen
; /* length of string at kid, in jschars */
145 size_t index
; /* index into class list */
146 uint16 bmsize
; /* bitmap size, based on max char code */
149 struct { /* or a literal sequence */
150 jschar chr
; /* of one character */
151 size_t length
; /* or many (via the kid) */
154 RENode
*kid2
; /* second operand from ALT */
155 jschar ch1
; /* match char for ALTPREREQ */
156 jschar ch2
; /* ditto, or class index for ALTPREREQ2 */
161 #define RE_IS_LETTER(c) (((c >= 'A') && (c <= 'Z')) || \
162 ((c >= 'a') && (c <= 'z')) )
163 #define RE_IS_LINE_TERM(c) ((c == '\n') || (c == '\r') || \
164 (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
166 #define CLASS_CACHE_SIZE 4
168 typedef struct CompilerState
{
170 JSTokenStream
*tokenStream
; /* For reporting errors */
171 const jschar
*cpbegin
;
175 size_t classCount
; /* number of [] encountered */
176 size_t treeDepth
; /* maximum depth of parse tree */
177 size_t progLength
; /* estimated bytecode length */
179 size_t classBitmapsMem
; /* memory to hold all class bitmaps */
181 const jschar
*start
; /* small cache of class strings */
182 size_t length
; /* since they're often the same */
184 } classCache
[CLASS_CACHE_SIZE
];
188 typedef struct EmitStateStackEntry
{
189 jsbytecode
*altHead
; /* start of REOP_ALT* opcode */
190 jsbytecode
*nextAltFixup
; /* fixup pointer to next-alt offset */
191 jsbytecode
*nextTermFixup
; /* fixup ptr. to REOP_JUMP offset */
192 jsbytecode
*endTermFixup
; /* fixup ptr. to REOPT_ALTPREREQ* offset */
193 RENode
*continueNode
; /* original REOP_ALT* node being stacked */
194 jsbytecode continueOp
; /* REOP_JUMP or REOP_ENDALT continuation */
195 JSPackedBool jumpToJumpFlag
; /* true if we've patched jump-to-jump to
196 avoid 16-bit unsigned offset overflow */
197 } EmitStateStackEntry
;
200 * Immediate operand sizes and getter/setters. Unlike the ones in jsopcode.h,
201 * the getters and setters take the pc of the offset, not of the opcode before
205 #define GET_ARG(pc) ((uint16)(((pc)[0] << 8) | (pc)[1]))
206 #define SET_ARG(pc, arg) ((pc)[0] = (jsbytecode) ((arg) >> 8), \
207 (pc)[1] = (jsbytecode) (arg))
209 #define OFFSET_LEN ARG_LEN
210 #define OFFSET_MAX (JS_BIT(ARG_LEN * 8) - 1)
211 #define GET_OFFSET(pc) GET_ARG(pc)
214 * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
215 * For sanity, we limit it to 2^24 bytes.
217 #define TREE_DEPTH_MAX (JS_BIT(24) / sizeof(EmitStateStackEntry))
220 * The maximum memory that can be allocated for class bitmaps.
221 * For sanity, we limit it to 2^24 bytes.
223 #define CLASS_BITMAPS_MEM_LIMIT JS_BIT(24)
226 * Functions to get size and write/read bytecode that represent small indexes
228 * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
229 * indicates that the following byte brings more bits to the index. Otherwise
230 * this is the last byte in the index bytecode representing highest index bits.
233 GetCompactIndexWidth(size_t index
)
237 for (width
= 1; (index
>>= 7) != 0; ++width
) { }
241 static JS_ALWAYS_INLINE jsbytecode
*
242 WriteCompactIndex(jsbytecode
*pc
, size_t index
)
246 while ((next
= index
>> 7) != 0) {
247 *pc
++ = (jsbytecode
)(index
| 0x80);
250 *pc
++ = (jsbytecode
)index
;
254 static JS_ALWAYS_INLINE jsbytecode
*
255 ReadCompactIndex(jsbytecode
*pc
, size_t *result
)
260 if ((nextByte
& 0x80) == 0) {
262 * Short-circuit the most common case when compact index <= 127.
267 *result
= 0x7F & nextByte
;
270 *result
|= (nextByte
& 0x7F) << shift
;
272 } while ((nextByte
& 0x80) != 0);
277 typedef struct RECapture
{
278 ptrdiff_t index
; /* start of contents, -1 for empty */
279 size_t length
; /* length of capture */
282 typedef struct REMatchState
{
284 RECapture parens
[1]; /* first of 're->parenCount' captures,
285 allocated at end of this struct */
288 struct REBackTrackData
;
290 typedef struct REProgState
{
291 jsbytecode
*continue_pc
; /* current continuation data */
292 jsbytecode continue_op
;
293 ptrdiff_t index
; /* progress in text */
294 size_t parenSoFar
; /* highest indexed paren started */
297 uintN min
; /* current quantifier limits */
301 size_t top
; /* backtrack stack state */
307 typedef struct REBackTrackData
{
308 size_t sz
; /* size of previous stack entry */
309 jsbytecode
*backtrack_pc
; /* where to backtrack to */
310 jsbytecode backtrack_op
;
311 const jschar
*cp
; /* index in text of match at backtrack */
312 size_t parenIndex
; /* start index of saved paren contents */
313 size_t parenCount
; /* # of saved paren contents */
314 size_t saveStateStackTop
; /* number of parent states */
315 /* saved parent states follow */
316 /* saved paren contents follow */
319 #define INITIAL_STATESTACK 100
320 #define INITIAL_BACKTRACK 8000
322 typedef struct REGlobalData
{
324 JSRegExp
*regexp
; /* the RE in execution */
325 JSBool ok
; /* runtime error (out_of_memory only?) */
326 size_t start
; /* offset to start at */
327 ptrdiff_t skipped
; /* chars skipped anchoring this r.e. */
328 const jschar
*cpbegin
; /* text base address */
329 const jschar
*cpend
; /* text limit address */
331 REProgState
*stateStack
; /* stack of state of current parents */
332 size_t stateStackTop
;
333 size_t stateStackLimit
;
335 REBackTrackData
*backTrackStack
;/* stack of matched-so-far positions */
336 REBackTrackData
*backTrackSP
;
337 size_t backTrackStackSize
;
338 size_t cursz
; /* size of current stack entry */
339 size_t backTrackCount
; /* how many times we've backtracked */
340 size_t backTrackLimit
; /* upper limit on backtrack states */
344 * 1. If IgnoreCase is false, return ch.
345 * 2. Let u be ch converted to upper case as if by calling
346 * String.prototype.toUpperCase on the one-character string ch.
347 * 3. If u does not consist of a single character, return ch.
348 * 4. Let cu be u's character.
349 * 5. If ch's code point value is greater than or equal to decimal 128 and cu's
350 * code point value is less than decimal 128, then return ch.
353 static JS_ALWAYS_INLINE uintN
358 JS_ASSERT((uintN
) (jschar
) ch
== ch
);
360 if (ch
- (uintN
) 'a' <= (uintN
) ('z' - 'a'))
361 ch
-= (uintN
) ('a' - 'A');
366 return (cu
< 128) ? ch
: cu
;
369 static JS_ALWAYS_INLINE uintN
372 JS_ASSERT((uintN
) (jschar
) ch
== ch
);
374 if (ch
- (uintN
) 'A' <= (uintN
) ('Z' - 'A'))
375 ch
+= (uintN
) ('a' - 'A');
379 return JS_TOLOWER(ch
);
382 /* Construct and initialize an RENode, returning NULL for out-of-memory */
384 NewRENode(CompilerState
*state
, REOp op
)
390 JS_ARENA_ALLOCATE_CAST(ren
, RENode
*, &cx
->tempPool
, sizeof *ren
);
392 js_ReportOutOfScriptQuota(cx
);
402 * Validates and converts hex ascii value.
405 isASCIIHexDigit(jschar c
, uintN
*digit
)
416 if (cv
>= 'a' && cv
<= 'f') {
417 *digit
= cv
- 'a' + 10;
426 const jschar
*errPos
;
431 ReportRegExpErrorHelper(CompilerState
*state
, uintN flags
, uintN errorNumber
,
434 if (state
->tokenStream
) {
435 return js_ReportCompileErrorNumber(state
->context
, state
->tokenStream
,
436 NULL
, JSREPORT_UC
| flags
,
439 return JS_ReportErrorFlagsAndNumberUC(state
->context
, flags
,
440 js_GetErrorMessage
, NULL
,
445 ReportRegExpError(CompilerState
*state
, uintN flags
, uintN errorNumber
)
447 return ReportRegExpErrorHelper(state
, flags
, errorNumber
, NULL
);
451 * Process the op against the two top operands, reducing them to a single
452 * operand in the penultimate slot. Update progLength and treeDepth.
455 ProcessOp(CompilerState
*state
, REOpData
*opData
, RENode
**operandStack
,
460 switch (opData
->op
) {
462 result
= NewRENode(state
, REOP_ALT
);
465 result
->kid
= operandStack
[operandSP
- 2];
466 result
->u
.kid2
= operandStack
[operandSP
- 1];
467 operandStack
[operandSP
- 2] = result
;
469 if (state
->treeDepth
== TREE_DEPTH_MAX
) {
470 ReportRegExpError(state
, JSREPORT_ERROR
, JSMSG_REGEXP_TOO_COMPLEX
);
476 * Look at both alternates to see if there's a FLAT or a CLASS at
477 * the start of each. If so, use a prerequisite match.
479 if (((RENode
*) result
->kid
)->op
== REOP_FLAT
&&
480 ((RENode
*) result
->u
.kid2
)->op
== REOP_FLAT
&&
481 (state
->flags
& JSREG_FOLD
) == 0) {
482 result
->op
= REOP_ALTPREREQ
;
483 result
->u
.altprereq
.ch1
= ((RENode
*) result
->kid
)->u
.flat
.chr
;
484 result
->u
.altprereq
.ch2
= ((RENode
*) result
->u
.kid2
)->u
.flat
.chr
;
485 /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
486 JUMP, <end> ... ENDALT */
487 state
->progLength
+= 13;
490 if (((RENode
*) result
->kid
)->op
== REOP_CLASS
&&
491 ((RENode
*) result
->kid
)->u
.ucclass
.index
< 256 &&
492 ((RENode
*) result
->u
.kid2
)->op
== REOP_FLAT
&&
493 (state
->flags
& JSREG_FOLD
) == 0) {
494 result
->op
= REOP_ALTPREREQ2
;
495 result
->u
.altprereq
.ch1
= ((RENode
*) result
->u
.kid2
)->u
.flat
.chr
;
496 result
->u
.altprereq
.ch2
= ((RENode
*) result
->kid
)->u
.ucclass
.index
;
497 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
498 JUMP, <end> ... ENDALT */
499 state
->progLength
+= 13;
502 if (((RENode
*) result
->kid
)->op
== REOP_FLAT
&&
503 ((RENode
*) result
->u
.kid2
)->op
== REOP_CLASS
&&
504 ((RENode
*) result
->u
.kid2
)->u
.ucclass
.index
< 256 &&
505 (state
->flags
& JSREG_FOLD
) == 0) {
506 result
->op
= REOP_ALTPREREQ2
;
507 result
->u
.altprereq
.ch1
= ((RENode
*) result
->kid
)->u
.flat
.chr
;
508 result
->u
.altprereq
.ch2
=
509 ((RENode
*) result
->u
.kid2
)->u
.ucclass
.index
;
510 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
511 JUMP, <end> ... ENDALT */
512 state
->progLength
+= 13;
515 /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
516 state
->progLength
+= 7;
521 result
= operandStack
[operandSP
- 2];
523 result
= result
->next
;
524 result
->next
= operandStack
[operandSP
- 1];
528 case REOP_ASSERT_NOT
:
531 /* These should have been processed by a close paren. */
532 ReportRegExpErrorHelper(state
, JSREPORT_ERROR
, JSMSG_MISSING_PAREN
,
542 * Parser forward declarations.
544 static JSBool
ParseTerm(CompilerState
*state
);
545 static JSBool
ParseQuantifier(CompilerState
*state
);
546 static intN
ParseMinMaxQuantifier(CompilerState
*state
, JSBool ignoreValues
);
549 * Top-down regular expression grammar, based closely on Perl4.
551 * regexp: altern A regular expression is one or more
552 * altern '|' regexp alternatives separated by vertical bar.
554 #define INITIAL_STACK_SIZE 128
557 ParseRegExp(CompilerState
*state
)
561 REOpData
*operatorStack
;
562 RENode
**operandStack
;
565 JSBool result
= JS_FALSE
;
567 intN operatorSP
= 0, operatorStackSize
= INITIAL_STACK_SIZE
;
568 intN operandSP
= 0, operandStackSize
= INITIAL_STACK_SIZE
;
570 /* Watch out for empty regexp */
571 if (state
->cp
== state
->cpend
) {
572 state
->result
= NewRENode(state
, REOP_EMPTY
);
573 return (state
->result
!= NULL
);
576 operatorStack
= (REOpData
*)
577 JS_malloc(state
->context
, sizeof(REOpData
) * operatorStackSize
);
581 operandStack
= (RENode
**)
582 JS_malloc(state
->context
, sizeof(RENode
*) * operandStackSize
);
587 parenIndex
= state
->parenCount
;
588 if (state
->cp
== state
->cpend
) {
590 * If we are at the end of the regexp and we're short one or more
591 * operands, the regexp must have the form /x|/ or some such, with
592 * left parentheses making us short more than one operand.
594 if (operatorSP
>= operandSP
) {
595 operand
= NewRENode(state
, REOP_EMPTY
);
601 switch (*state
->cp
) {
604 if (state
->cp
+ 1 < state
->cpend
&&
606 (state
->cp
[1] == '=' ||
607 state
->cp
[1] == '!' ||
608 state
->cp
[1] == ':')) {
609 switch (state
->cp
[1]) {
612 /* ASSERT, <next>, ... ASSERTTEST */
613 state
->progLength
+= 4;
616 op
= REOP_ASSERT_NOT
;
617 /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
618 state
->progLength
+= 4;
627 /* LPAREN, <index>, ... RPAREN, <index> */
629 += 2 * (1 + GetCompactIndexWidth(parenIndex
));
631 if (state
->parenCount
== 65535) {
632 ReportRegExpError(state
, JSREPORT_ERROR
,
633 JSMSG_TOO_MANY_PARENS
);
641 * If there's no stacked open parenthesis, throw syntax error.
643 for (i
= operatorSP
- 1; ; i
--) {
645 ReportRegExpError(state
, JSREPORT_ERROR
,
646 JSMSG_UNMATCHED_RIGHT_PAREN
);
649 if (operatorStack
[i
].op
== REOP_ASSERT
||
650 operatorStack
[i
].op
== REOP_ASSERT_NOT
||
651 operatorStack
[i
].op
== REOP_LPARENNON
||
652 operatorStack
[i
].op
== REOP_LPAREN
) {
659 /* Expected an operand before these, so make an empty one */
660 operand
= NewRENode(state
, REOP_EMPTY
);
666 if (!ParseTerm(state
))
668 operand
= state
->result
;
670 if (operandSP
== operandStackSize
) {
672 operandStackSize
+= operandStackSize
;
674 JS_realloc(state
->context
, operandStack
,
675 sizeof(RENode
*) * operandStackSize
);
680 operandStack
[operandSP
++] = operand
;
685 /* At the end; process remaining operators. */
687 if (state
->cp
== state
->cpend
) {
690 if (!ProcessOp(state
, &operatorStack
[operatorSP
],
691 operandStack
, operandSP
))
695 JS_ASSERT(operandSP
== 1);
696 state
->result
= operandStack
[0];
701 switch (*state
->cp
) {
703 /* Process any stacked 'concat' operators */
706 operatorStack
[operatorSP
- 1].op
== REOP_CONCAT
) {
708 if (!ProcessOp(state
, &operatorStack
[operatorSP
],
709 operandStack
, operandSP
)) {
719 * If there's no stacked open parenthesis, throw syntax error.
721 for (i
= operatorSP
- 1; ; i
--) {
723 ReportRegExpError(state
, JSREPORT_ERROR
,
724 JSMSG_UNMATCHED_RIGHT_PAREN
);
727 if (operatorStack
[i
].op
== REOP_ASSERT
||
728 operatorStack
[i
].op
== REOP_ASSERT_NOT
||
729 operatorStack
[i
].op
== REOP_LPARENNON
||
730 operatorStack
[i
].op
== REOP_LPAREN
) {
736 /* Process everything on the stack until the open parenthesis. */
738 JS_ASSERT(operatorSP
);
740 switch (operatorStack
[operatorSP
].op
) {
742 case REOP_ASSERT_NOT
:
744 operand
= NewRENode(state
, operatorStack
[operatorSP
].op
);
747 operand
->u
.parenIndex
=
748 operatorStack
[operatorSP
].parenIndex
;
749 JS_ASSERT(operandSP
);
750 operand
->kid
= operandStack
[operandSP
- 1];
751 operandStack
[operandSP
- 1] = operand
;
752 if (state
->treeDepth
== TREE_DEPTH_MAX
) {
753 ReportRegExpError(state
, JSREPORT_ERROR
,
754 JSMSG_REGEXP_TOO_COMPLEX
);
761 state
->result
= operandStack
[operandSP
- 1];
762 if (!ParseQuantifier(state
))
764 operandStack
[operandSP
- 1] = state
->result
;
765 goto restartOperator
;
767 if (!ProcessOp(state
, &operatorStack
[operatorSP
],
768 operandStack
, operandSP
))
778 const jschar
*errp
= state
->cp
;
780 if (ParseMinMaxQuantifier(state
, JS_TRUE
) < 0) {
782 * This didn't even scan correctly as a quantifier, so we should
796 ReportRegExpErrorHelper(state
, JSREPORT_ERROR
, JSMSG_BAD_QUANTIFIER
,
802 /* Anything else is the start of the next term. */
805 if (operatorSP
== operatorStackSize
) {
807 operatorStackSize
+= operatorStackSize
;
809 JS_realloc(state
->context
, operatorStack
,
810 sizeof(REOpData
) * operatorStackSize
);
815 operatorStack
[operatorSP
].op
= op
;
816 operatorStack
[operatorSP
].errPos
= state
->cp
;
817 operatorStack
[operatorSP
++].parenIndex
= parenIndex
;
823 JS_free(state
->context
, operatorStack
);
825 JS_free(state
->context
, operandStack
);
830 * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
831 * its being on the stack, and to propagate errors to its callers.
833 #define JSREG_FIND_PAREN_COUNT 0x8000
834 #define JSREG_FIND_PAREN_ERROR 0x4000
837 * Magic return value from FindParenCount and GetDecimalValue, to indicate
838 * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
839 * its findMax parameter is non-null.
841 #define OVERFLOW_VALUE ((uintN)-1)
844 FindParenCount(CompilerState
*state
)
849 if (state
->flags
& JSREG_FIND_PAREN_COUNT
)
850 return OVERFLOW_VALUE
;
853 * Copy state into temp, flag it so we never report an invalid backref,
854 * and reset its members to parse the entire regexp. This is obviously
855 * suboptimal, but GetDecimalValue calls us only if a backref appears to
856 * refer to a forward parenthetical, which is rare.
859 temp
.flags
|= JSREG_FIND_PAREN_COUNT
;
860 temp
.cp
= temp
.cpbegin
;
865 temp
.classBitmapsMem
= 0;
866 for (i
= 0; i
< CLASS_CACHE_SIZE
; i
++)
867 temp
.classCache
[i
].start
= NULL
;
869 if (!ParseRegExp(&temp
)) {
870 state
->flags
|= JSREG_FIND_PAREN_ERROR
;
871 return OVERFLOW_VALUE
;
873 return temp
.parenCount
;
877 * Extract and return a decimal value at state->cp. The initial character c
878 * has already been read. Return OVERFLOW_VALUE if the result exceeds max.
879 * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
880 * state->flags to discover whether an error occurred under findMax.
883 GetDecimalValue(jschar c
, uintN max
, uintN (*findMax
)(CompilerState
*state
),
884 CompilerState
*state
)
886 uintN value
= JS7_UNDEC(c
);
887 JSBool overflow
= (value
> max
&& (!findMax
|| value
> findMax(state
)));
889 /* The following restriction allows simpler overflow checks. */
890 JS_ASSERT(max
<= ((uintN
)-1 - 9) / 10);
891 while (state
->cp
< state
->cpend
) {
895 value
= 10 * value
+ JS7_UNDEC(c
);
896 if (!overflow
&& value
> max
&& (!findMax
|| value
> findMax(state
)))
900 return overflow
? OVERFLOW_VALUE
: value
;
904 * Calculate the total size of the bitmap required for a class expression.
907 CalculateBitmapSize(CompilerState
*state
, RENode
*target
, const jschar
*src
,
911 JSBool inRange
= JS_FALSE
;
912 jschar c
, rangeStart
= 0;
913 uintN n
, digit
, nDigits
, i
;
915 target
->u
.ucclass
.bmsize
= 0;
916 target
->u
.ucclass
.sense
= JS_TRUE
;
923 target
->u
.ucclass
.sense
= JS_FALSE
;
927 JSBool canStartRange
= JS_TRUE
;
954 if (src
< end
&& RE_IS_LETTER(*src
)) {
955 localMax
= (uintN
) (*src
++) & 0x1F;
968 for (i
= 0; (i
< nDigits
) && (src
< end
); i
++) {
970 if (!isASCIIHexDigit(c
, &digit
)) {
972 * Back off to accepting the original
979 n
= (n
<< 4) | digit
;
984 canStartRange
= JS_FALSE
;
986 JS_ReportErrorNumber(state
->context
,
987 js_GetErrorMessage
, NULL
,
988 JSMSG_BAD_CLASS_RANGE
);
998 canStartRange
= JS_FALSE
;
1000 JS_ReportErrorNumber(state
->context
,
1001 js_GetErrorMessage
, NULL
,
1002 JSMSG_BAD_CLASS_RANGE
);
1008 * If this is the start of a range, ensure that it's less than
1022 * This is a non-ECMA extension - decimal escapes (in this
1023 * case, octal!) are supposed to be an error inside class
1024 * ranges, but supported here for backwards compatibility.
1029 if ('0' <= c
&& c
<= '7') {
1031 n
= 8 * n
+ JS7_UNDEC(c
);
1033 if ('0' <= c
&& c
<= '7') {
1035 i
= 8 * n
+ JS7_UNDEC(c
);
1056 /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
1057 if (rangeStart
> localMax
) {
1058 JS_ReportErrorNumber(state
->context
,
1059 js_GetErrorMessage
, NULL
,
1060 JSMSG_BAD_CLASS_RANGE
);
1065 if (canStartRange
&& src
< end
- 1) {
1069 rangeStart
= (jschar
)localMax
;
1073 if (state
->flags
& JSREG_FOLD
)
1074 rangeStart
= localMax
; /* one run of the uc/dc loop below */
1077 if (state
->flags
& JSREG_FOLD
) {
1078 jschar maxch
= localMax
;
1080 for (i
= rangeStart
; i
<= localMax
; i
++) {
1085 maxch
= JS_MAX(maxch
, uch
);
1086 maxch
= JS_MAX(maxch
, dch
);
1094 target
->u
.ucclass
.bmsize
= max
;
1099 * item: assertion An item is either an assertion or
1100 * quantatom a quantified atom.
1102 * assertion: '^' Assertions match beginning of string
1103 * (or line if the class static property
1104 * RegExp.multiline is true).
1105 * '$' End of string (or line if the class
1106 * static property RegExp.multiline is
1108 * '\b' Word boundary (between \w and \W).
1109 * '\B' Word non-boundary.
1111 * quantatom: atom An unquantified atom.
1112 * quantatom '{' n ',' m '}'
1113 * Atom must occur between n and m times.
1114 * quantatom '{' n ',' '}' Atom must occur at least n times.
1115 * quantatom '{' n '}' Atom must occur exactly n times.
1116 * quantatom '*' Zero or more times (same as {0,}).
1117 * quantatom '+' One or more times (same as {1,}).
1118 * quantatom '?' Zero or one time (same as {0,1}).
1120 * any of which can be optionally followed by '?' for ungreedy
1122 * atom: '(' regexp ')' A parenthesized regexp (what matched
1123 * can be addressed using a backreference,
1125 * '.' Matches any char except '\n'.
1126 * '[' classlist ']' A character class.
1127 * '[' '^' classlist ']' A negated character class.
1129 * '\n' Newline (Line Feed).
1130 * '\r' Carriage Return.
1131 * '\t' Horizontal Tab.
1132 * '\v' Vertical Tab.
1133 * '\d' A digit (same as [0-9]).
1135 * '\w' A word character, [0-9a-z_A-Z].
1136 * '\W' A non-word character.
1137 * '\s' A whitespace character, [ \b\f\n\r\t\v].
1138 * '\S' A non-whitespace character.
1139 * '\' n A backreference to the nth (n decimal
1140 * and positive) parenthesized expression.
1141 * '\' octal An octal escape sequence (octal must be
1142 * two or three digits long, unless it is
1143 * 0 for the null character).
1144 * '\x' hex A hex escape (hex must be two digits).
1145 * '\u' unicode A unicode escape (must be four digits).
1146 * '\c' ctrl A control character, ctrl is a letter.
1147 * '\' literalatomchar Any character except one of the above
1148 * that follow '\' in an atom.
1149 * otheratomchar Any character not first among the other
1150 * atom right-hand sides.
1153 ParseTerm(CompilerState
*state
)
1155 jschar c
= *state
->cp
++;
1157 uintN num
, tmp
, n
, i
;
1158 const jschar
*termStart
;
1161 /* assertions and atoms */
1163 state
->result
= NewRENode(state
, REOP_BOL
);
1166 state
->progLength
++;
1169 state
->result
= NewRENode(state
, REOP_EOL
);
1172 state
->progLength
++;
1175 if (state
->cp
>= state
->cpend
) {
1176 /* a trailing '\' is an error */
1177 ReportRegExpError(state
, JSREPORT_ERROR
, JSMSG_TRAILING_SLASH
);
1182 /* assertion escapes */
1184 state
->result
= NewRENode(state
, REOP_WBDRY
);
1187 state
->progLength
++;
1190 state
->result
= NewRENode(state
, REOP_WNONBDRY
);
1193 state
->progLength
++;
1195 /* Decimal escape */
1197 /* Give a strict warning. See also the note below. */
1198 if (!ReportRegExpError(state
, JSREPORT_WARNING
| JSREPORT_STRICT
,
1199 JSMSG_INVALID_BACKREF
)) {
1204 while (state
->cp
< state
->cpend
) {
1206 if (c
< '0' || '7' < c
)
1209 tmp
= 8 * num
+ (uintN
)JS7_UNDEC(c
);
1216 state
->result
= NewRENode(state
, REOP_FLAT
);
1219 state
->result
->u
.flat
.chr
= c
;
1220 state
->result
->u
.flat
.length
= 1;
1221 state
->progLength
+= 3;
1232 termStart
= state
->cp
- 1;
1233 num
= GetDecimalValue(c
, state
->parenCount
, FindParenCount
, state
);
1234 if (state
->flags
& JSREG_FIND_PAREN_ERROR
)
1236 if (num
== OVERFLOW_VALUE
) {
1237 /* Give a strict mode warning. */
1238 if (!ReportRegExpError(state
,
1239 JSREPORT_WARNING
| JSREPORT_STRICT
,
1241 ? JSMSG_INVALID_BACKREF
1242 : JSMSG_BAD_BACKREF
)) {
1247 * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
1248 * error here. However, for compatibility with IE, we treat the
1249 * whole backref as flat if the first character in it is not a
1250 * valid octal character, and as an octal escape otherwise.
1252 state
->cp
= termStart
;
1254 /* Treat this as flat. termStart - 1 is the \. */
1259 /* Treat this as an octal escape. */
1262 JS_ASSERT(1 <= num
&& num
<= 0x10000);
1263 state
->result
= NewRENode(state
, REOP_BACKREF
);
1266 state
->result
->u
.parenIndex
= num
- 1;
1268 += 1 + GetCompactIndexWidth(state
->result
->u
.parenIndex
);
1270 /* Control escape */
1286 /* Control letter */
1288 if (state
->cp
< state
->cpend
&& RE_IS_LETTER(*state
->cp
)) {
1289 c
= (jschar
) (*state
->cp
++ & 0x1F);
1291 /* back off to accepting the original '\' as a literal */
1296 /* HexEscapeSequence */
1300 /* UnicodeEscapeSequence */
1305 for (i
= 0; i
< nDigits
&& state
->cp
< state
->cpend
; i
++) {
1308 if (!isASCIIHexDigit(c
, &digit
)) {
1310 * Back off to accepting the original 'u' or 'x' as a
1317 n
= (n
<< 4) | digit
;
1321 /* Character class escapes */
1323 state
->result
= NewRENode(state
, REOP_DIGIT
);
1327 state
->progLength
++;
1330 state
->result
= NewRENode(state
, REOP_NONDIGIT
);
1333 state
->result
= NewRENode(state
, REOP_SPACE
);
1336 state
->result
= NewRENode(state
, REOP_NONSPACE
);
1339 state
->result
= NewRENode(state
, REOP_ALNUM
);
1342 state
->result
= NewRENode(state
, REOP_NONALNUM
);
1344 /* IdentityEscape */
1346 state
->result
= NewRENode(state
, REOP_FLAT
);
1349 state
->result
->u
.flat
.chr
= c
;
1350 state
->result
->u
.flat
.length
= 1;
1351 state
->result
->kid
= (void *) (state
->cp
- 1);
1352 state
->progLength
+= 3;
1357 state
->result
= NewRENode(state
, REOP_CLASS
);
1360 termStart
= state
->cp
;
1361 state
->result
->u
.ucclass
.startIndex
= termStart
- state
->cpbegin
;
1363 if (state
->cp
== state
->cpend
) {
1364 ReportRegExpErrorHelper(state
, JSREPORT_ERROR
,
1365 JSMSG_UNTERM_CLASS
, termStart
);
1369 if (*state
->cp
== '\\') {
1371 if (state
->cp
!= state
->cpend
)
1375 if (*state
->cp
== ']') {
1376 state
->result
->u
.ucclass
.kidlen
= state
->cp
- termStart
;
1381 for (i
= 0; i
< CLASS_CACHE_SIZE
; i
++) {
1382 if (!state
->classCache
[i
].start
) {
1383 state
->classCache
[i
].start
= termStart
;
1384 state
->classCache
[i
].length
= state
->result
->u
.ucclass
.kidlen
;
1385 state
->classCache
[i
].index
= state
->classCount
;
1388 if (state
->classCache
[i
].length
==
1389 state
->result
->u
.ucclass
.kidlen
) {
1390 for (n
= 0; ; n
++) {
1391 if (n
== state
->classCache
[i
].length
) {
1392 state
->result
->u
.ucclass
.index
1393 = state
->classCache
[i
].index
;
1396 if (state
->classCache
[i
].start
[n
] != termStart
[n
])
1401 state
->result
->u
.ucclass
.index
= state
->classCount
++;
1405 * Call CalculateBitmapSize now as we want any errors it finds
1406 * to be reported during the parse phase, not at execution.
1408 if (!CalculateBitmapSize(state
, state
->result
, termStart
, state
->cp
++))
1411 * Update classBitmapsMem with number of bytes to hold bmsize bits,
1412 * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
1413 * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
1415 n
= (state
->result
->u
.ucclass
.bmsize
>> 3) + 1;
1416 if (n
> CLASS_BITMAPS_MEM_LIMIT
- state
->classBitmapsMem
) {
1417 ReportRegExpError(state
, JSREPORT_ERROR
, JSMSG_REGEXP_TOO_COMPLEX
);
1420 state
->classBitmapsMem
+= n
;
1421 /* CLASS, <index> */
1423 += 1 + GetCompactIndexWidth(state
->result
->u
.ucclass
.index
);
1427 state
->result
= NewRENode(state
, REOP_DOT
);
1432 const jschar
*errp
= state
->cp
--;
1435 err
= ParseMinMaxQuantifier(state
, JS_TRUE
);
1446 ReportRegExpErrorHelper(state
, JSREPORT_ERROR
,
1447 JSMSG_BAD_QUANTIFIER
, state
->cp
- 1);
1451 state
->result
= NewRENode(state
, REOP_FLAT
);
1454 state
->result
->u
.flat
.chr
= c
;
1455 state
->result
->u
.flat
.length
= 1;
1456 state
->result
->kid
= (void *) (state
->cp
- 1);
1457 state
->progLength
+= 3;
1460 return ParseQuantifier(state
);
1464 ParseQuantifier(CompilerState
*state
)
1467 term
= state
->result
;
1468 if (state
->cp
< state
->cpend
) {
1469 switch (*state
->cp
) {
1471 state
->result
= NewRENode(state
, REOP_QUANT
);
1474 state
->result
->u
.range
.min
= 1;
1475 state
->result
->u
.range
.max
= (uintN
)-1;
1476 /* <PLUS>, <next> ... <ENDCHILD> */
1477 state
->progLength
+= 4;
1480 state
->result
= NewRENode(state
, REOP_QUANT
);
1483 state
->result
->u
.range
.min
= 0;
1484 state
->result
->u
.range
.max
= (uintN
)-1;
1485 /* <STAR>, <next> ... <ENDCHILD> */
1486 state
->progLength
+= 4;
1489 state
->result
= NewRENode(state
, REOP_QUANT
);
1492 state
->result
->u
.range
.min
= 0;
1493 state
->result
->u
.range
.max
= 1;
1494 /* <OPT>, <next> ... <ENDCHILD> */
1495 state
->progLength
+= 4;
1497 case '{': /* balance '}' */
1500 const jschar
*errp
= state
->cp
;
1502 err
= ParseMinMaxQuantifier(state
, JS_FALSE
);
1508 ReportRegExpErrorHelper(state
, JSREPORT_ERROR
, err
, errp
);
1517 if (state
->treeDepth
== TREE_DEPTH_MAX
) {
1518 ReportRegExpError(state
, JSREPORT_ERROR
, JSMSG_REGEXP_TOO_COMPLEX
);
1524 state
->result
->kid
= term
;
1525 if (state
->cp
< state
->cpend
&& *state
->cp
== '?') {
1527 state
->result
->u
.range
.greedy
= JS_FALSE
;
1529 state
->result
->u
.range
.greedy
= JS_TRUE
;
1535 ParseMinMaxQuantifier(CompilerState
*state
, JSBool ignoreValues
)
1539 const jschar
*errp
= state
->cp
++;
1544 min
= GetDecimalValue(c
, 0xFFFF, NULL
, state
);
1547 if (!ignoreValues
&& min
== OVERFLOW_VALUE
)
1548 return JSMSG_MIN_TOO_BIG
;
1554 max
= GetDecimalValue(c
, 0xFFFF, NULL
, state
);
1556 if (!ignoreValues
&& max
== OVERFLOW_VALUE
)
1557 return JSMSG_MAX_TOO_BIG
;
1558 if (!ignoreValues
&& min
> max
)
1559 return JSMSG_OUT_OF_ORDER
;
1567 state
->result
= NewRENode(state
, REOP_QUANT
);
1569 return JSMSG_OUT_OF_MEMORY
;
1570 state
->result
->u
.range
.min
= min
;
1571 state
->result
->u
.range
.max
= max
;
1573 * QUANT, <min>, <max>, <next> ... <ENDCHILD>
1574 * where <max> is written as compact(max+1) to make
1575 * (uintN)-1 sentinel to occupy 1 byte, not width_of(max)+1.
1577 state
->progLength
+= (1 + GetCompactIndexWidth(min
)
1578 + GetCompactIndexWidth(max
+ 1)
1589 SetForwardJumpOffset(jsbytecode
*jump
, jsbytecode
*target
)
1591 ptrdiff_t offset
= target
- jump
;
1593 /* Check that target really points forward. */
1594 JS_ASSERT(offset
>= 2);
1595 if ((size_t)offset
> OFFSET_MAX
)
1598 jump
[0] = JUMP_OFFSET_HI(offset
);
1599 jump
[1] = JUMP_OFFSET_LO(offset
);
1604 * Generate bytecode for the tree rooted at t using an explicit stack instead
1608 EmitREBytecode(CompilerState
*state
, JSRegExp
*re
, size_t treeDepth
,
1609 jsbytecode
*pc
, RENode
*t
)
1611 EmitStateStackEntry
*emitStateSP
, *emitStateStack
;
1615 if (treeDepth
== 0) {
1616 emitStateStack
= NULL
;
1619 (EmitStateStackEntry
*)JS_malloc(state
->context
,
1620 sizeof(EmitStateStackEntry
) *
1622 if (!emitStateStack
)
1625 emitStateSP
= emitStateStack
;
1627 JS_ASSERT(op
< REOP_LIMIT
);
1636 case REOP_ALTPREREQ2
:
1637 case REOP_ALTPREREQ
:
1638 JS_ASSERT(emitStateSP
);
1639 emitStateSP
->altHead
= pc
- 1;
1640 emitStateSP
->endTermFixup
= pc
;
1642 SET_ARG(pc
, t
->u
.altprereq
.ch1
);
1644 SET_ARG(pc
, t
->u
.altprereq
.ch2
);
1647 emitStateSP
->nextAltFixup
= pc
; /* offset to next alternate */
1650 emitStateSP
->continueNode
= t
;
1651 emitStateSP
->continueOp
= REOP_JUMP
;
1652 emitStateSP
->jumpToJumpFlag
= JS_FALSE
;
1654 JS_ASSERT((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
1655 t
= (RENode
*) t
->kid
;
1657 JS_ASSERT(op
< REOP_LIMIT
);
1661 emitStateSP
->nextTermFixup
= pc
; /* offset to following term */
1663 if (!SetForwardJumpOffset(emitStateSP
->nextAltFixup
, pc
))
1665 emitStateSP
->continueOp
= REOP_ENDALT
;
1667 JS_ASSERT((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
1668 t
= (RENode
*) t
->u
.kid2
;
1670 JS_ASSERT(op
< REOP_LIMIT
);
1675 * If we already patched emitStateSP->nextTermFixup to jump to
1676 * a nearer jump, to avoid 16-bit immediate offset overflow, we
1679 if (emitStateSP
->jumpToJumpFlag
)
1683 * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
1684 * REOP_ENDALT is executed only on successful match of the last
1685 * alternate in a group.
1687 if (!SetForwardJumpOffset(emitStateSP
->nextTermFixup
, pc
))
1689 if (t
->op
!= REOP_ALT
) {
1690 if (!SetForwardJumpOffset(emitStateSP
->endTermFixup
, pc
))
1695 * If the program is bigger than the REOP_JUMP offset range, then
1696 * we must check for alternates before this one that are part of
1697 * the same group, and fix up their jump offsets to target jumps
1698 * close enough to fit in a 16-bit unsigned offset immediate.
1700 if ((size_t)(pc
- re
->program
) > OFFSET_MAX
&&
1701 emitStateSP
> emitStateStack
) {
1702 EmitStateStackEntry
*esp
, *esp2
;
1703 jsbytecode
*alt
, *jump
;
1704 ptrdiff_t span
, header
;
1707 alt
= esp2
->altHead
;
1708 for (esp
= esp2
- 1; esp
>= emitStateStack
; --esp
) {
1709 if (esp
->continueOp
== REOP_ENDALT
&&
1710 !esp
->jumpToJumpFlag
&&
1711 esp
->nextTermFixup
+ OFFSET_LEN
== alt
&&
1712 (size_t)(pc
- ((esp
->continueNode
->op
!= REOP_ALT
)
1714 : esp
->nextTermFixup
)) > OFFSET_MAX
) {
1716 jump
= esp
->nextTermFixup
;
1719 * The span must be 1 less than the distance from
1720 * jump offset to jump offset, so we actually jump
1721 * to a REOP_JUMP bytecode, not to its offset!
1724 JS_ASSERT(jump
< esp2
->nextTermFixup
);
1725 span
= esp2
->nextTermFixup
- jump
- 1;
1726 if ((size_t)span
<= OFFSET_MAX
)
1731 } while (esp2
->continueOp
!= REOP_ENDALT
);
1734 jump
[0] = JUMP_OFFSET_HI(span
);
1735 jump
[1] = JUMP_OFFSET_LO(span
);
1737 if (esp
->continueNode
->op
!= REOP_ALT
) {
1739 * We must patch the offset at esp->endTermFixup
1740 * as well, for the REOP_ALTPREREQ{,2} opcodes.
1741 * If we're unlucky and endTermFixup is more than
1742 * OFFSET_MAX bytes from its target, we cheat by
1743 * jumping 6 bytes to the jump whose offset is at
1744 * esp->nextTermFixup, which has the same target.
1746 jump
= esp
->endTermFixup
;
1747 header
= esp
->nextTermFixup
- jump
;
1749 if ((size_t)span
> OFFSET_MAX
)
1752 jump
[0] = JUMP_OFFSET_HI(span
);
1753 jump
[1] = JUMP_OFFSET_LO(span
);
1756 esp
->jumpToJumpFlag
= JS_TRUE
;
1763 JS_ASSERT(emitStateSP
);
1764 emitStateSP
->altHead
= pc
- 1;
1765 emitStateSP
->nextAltFixup
= pc
; /* offset to next alternate */
1767 emitStateSP
->continueNode
= t
;
1768 emitStateSP
->continueOp
= REOP_JUMP
;
1769 emitStateSP
->jumpToJumpFlag
= JS_FALSE
;
1771 JS_ASSERT((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
1772 t
= (RENode
*) t
->kid
;
1774 JS_ASSERT(op
< REOP_LIMIT
);
1779 * Coalesce FLATs if possible and if it would not increase bytecode
1780 * beyond preallocated limit. The latter happens only when bytecode
1781 * size for coalesced string with offset p and length 2 exceeds 6
1782 * bytes preallocated for 2 single char nodes, i.e. when
1783 * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
1784 * GetCompactIndexWidth(p) > 4.
1785 * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
1786 * nodes strictly decreases bytecode size, the check has to be
1787 * done only for the first coalescing.
1790 GetCompactIndexWidth((jschar
*)t
->kid
- state
->cpbegin
) <= 4)
1793 t
->next
->op
== REOP_FLAT
&&
1794 (jschar
*)t
->kid
+ t
->u
.flat
.length
==
1795 (jschar
*)t
->next
->kid
) {
1796 t
->u
.flat
.length
+= t
->next
->u
.flat
.length
;
1797 t
->next
= t
->next
->next
;
1800 if (t
->kid
&& t
->u
.flat
.length
> 1) {
1801 pc
[-1] = (state
->flags
& JSREG_FOLD
) ? REOP_FLATi
: REOP_FLAT
;
1802 pc
= WriteCompactIndex(pc
, (jschar
*)t
->kid
- state
->cpbegin
);
1803 pc
= WriteCompactIndex(pc
, t
->u
.flat
.length
);
1804 } else if (t
->u
.flat
.chr
< 256) {
1805 pc
[-1] = (state
->flags
& JSREG_FOLD
) ? REOP_FLAT1i
: REOP_FLAT1
;
1806 *pc
++ = (jsbytecode
) t
->u
.flat
.chr
;
1808 pc
[-1] = (state
->flags
& JSREG_FOLD
)
1811 SET_ARG(pc
, t
->u
.flat
.chr
);
1817 JS_ASSERT(emitStateSP
);
1818 pc
= WriteCompactIndex(pc
, t
->u
.parenIndex
);
1819 emitStateSP
->continueNode
= t
;
1820 emitStateSP
->continueOp
= REOP_RPAREN
;
1822 JS_ASSERT((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
1823 t
= (RENode
*) t
->kid
;
1828 pc
= WriteCompactIndex(pc
, t
->u
.parenIndex
);
1832 pc
= WriteCompactIndex(pc
, t
->u
.parenIndex
);
1836 JS_ASSERT(emitStateSP
);
1837 emitStateSP
->nextTermFixup
= pc
;
1839 emitStateSP
->continueNode
= t
;
1840 emitStateSP
->continueOp
= REOP_ASSERTTEST
;
1842 JS_ASSERT((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
1843 t
= (RENode
*) t
->kid
;
1847 case REOP_ASSERTTEST
:
1848 case REOP_ASSERTNOTTEST
:
1849 if (!SetForwardJumpOffset(emitStateSP
->nextTermFixup
, pc
))
1853 case REOP_ASSERT_NOT
:
1854 JS_ASSERT(emitStateSP
);
1855 emitStateSP
->nextTermFixup
= pc
;
1857 emitStateSP
->continueNode
= t
;
1858 emitStateSP
->continueOp
= REOP_ASSERTNOTTEST
;
1860 JS_ASSERT((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
1861 t
= (RENode
*) t
->kid
;
1866 JS_ASSERT(emitStateSP
);
1867 if (t
->u
.range
.min
== 0 && t
->u
.range
.max
== (uintN
)-1) {
1868 pc
[-1] = (t
->u
.range
.greedy
) ? REOP_STAR
: REOP_MINIMALSTAR
;
1869 } else if (t
->u
.range
.min
== 0 && t
->u
.range
.max
== 1) {
1870 pc
[-1] = (t
->u
.range
.greedy
) ? REOP_OPT
: REOP_MINIMALOPT
;
1871 } else if (t
->u
.range
.min
== 1 && t
->u
.range
.max
== (uintN
) -1) {
1872 pc
[-1] = (t
->u
.range
.greedy
) ? REOP_PLUS
: REOP_MINIMALPLUS
;
1874 if (!t
->u
.range
.greedy
)
1875 pc
[-1] = REOP_MINIMALQUANT
;
1876 pc
= WriteCompactIndex(pc
, t
->u
.range
.min
);
1878 * Write max + 1 to avoid using size_t(max) + 1 bytes
1879 * for (uintN)-1 sentinel.
1881 pc
= WriteCompactIndex(pc
, t
->u
.range
.max
+ 1);
1883 emitStateSP
->nextTermFixup
= pc
;
1885 emitStateSP
->continueNode
= t
;
1886 emitStateSP
->continueOp
= REOP_ENDCHILD
;
1888 JS_ASSERT((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
1889 t
= (RENode
*) t
->kid
;
1894 if (!SetForwardJumpOffset(emitStateSP
->nextTermFixup
, pc
))
1899 if (!t
->u
.ucclass
.sense
)
1900 pc
[-1] = REOP_NCLASS
;
1901 pc
= WriteCompactIndex(pc
, t
->u
.ucclass
.index
);
1902 charSet
= &re
->classList
[t
->u
.ucclass
.index
];
1903 charSet
->converted
= JS_FALSE
;
1904 charSet
->length
= t
->u
.ucclass
.bmsize
;
1905 charSet
->u
.src
.startIndex
= t
->u
.ucclass
.startIndex
;
1906 charSet
->u
.src
.length
= t
->u
.ucclass
.kidlen
;
1907 charSet
->sense
= t
->u
.ucclass
.sense
;
1918 if (emitStateSP
== emitStateStack
)
1921 t
= emitStateSP
->continueNode
;
1922 op
= (REOp
) emitStateSP
->continueOp
;
1928 JS_free(state
->context
, emitStateStack
);
1932 ReportRegExpError(state
, JSREPORT_ERROR
, JSMSG_REGEXP_TOO_COMPLEX
);
1939 js_NewRegExp(JSContext
*cx
, JSTokenStream
*ts
,
1940 JSString
*str
, uintN flags
, JSBool flat
)
1944 CompilerState state
;
1951 mark
= JS_ARENA_MARK(&cx
->tempPool
);
1952 len
= JSSTRING_LENGTH(str
);
1955 state
.tokenStream
= ts
;
1956 state
.cp
= js_UndependString(cx
, str
);
1959 state
.cpbegin
= state
.cp
;
1960 state
.cpend
= state
.cp
+ len
;
1961 state
.flags
= flags
;
1962 state
.parenCount
= 0;
1963 state
.classCount
= 0;
1964 state
.progLength
= 0;
1965 state
.treeDepth
= 0;
1966 state
.classBitmapsMem
= 0;
1967 for (i
= 0; i
< CLASS_CACHE_SIZE
; i
++)
1968 state
.classCache
[i
].start
= NULL
;
1970 if (len
!= 0 && flat
) {
1971 state
.result
= NewRENode(&state
, REOP_FLAT
);
1974 state
.result
->u
.flat
.chr
= *state
.cpbegin
;
1975 state
.result
->u
.flat
.length
= len
;
1976 state
.result
->kid
= (void *) state
.cpbegin
;
1977 /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
1978 state
.progLength
+= 1 + GetCompactIndexWidth(0)
1979 + GetCompactIndexWidth(len
);
1981 if (!ParseRegExp(&state
))
1984 resize
= offsetof(JSRegExp
, program
) + state
.progLength
+ 1;
1985 re
= (JSRegExp
*) JS_malloc(cx
, resize
);
1990 JS_ASSERT(state
.classBitmapsMem
<= CLASS_BITMAPS_MEM_LIMIT
);
1991 re
->classCount
= state
.classCount
;
1992 if (re
->classCount
) {
1993 re
->classList
= (RECharSet
*)
1994 JS_malloc(cx
, re
->classCount
* sizeof(RECharSet
));
1995 if (!re
->classList
) {
1996 js_DestroyRegExp(cx
, re
);
2000 for (i
= 0; i
< re
->classCount
; i
++)
2001 re
->classList
[i
].converted
= JS_FALSE
;
2003 re
->classList
= NULL
;
2005 endPC
= EmitREBytecode(&state
, re
, state
.treeDepth
, re
->program
, state
.result
);
2007 js_DestroyRegExp(cx
, re
);
2011 *endPC
++ = REOP_END
;
2013 * Check whether size was overestimated and shrink using realloc.
2014 * This is safe since no pointers to newly parsed regexp or its parts
2015 * besides re exist here.
2017 if ((size_t)(endPC
- re
->program
) != state
.progLength
+ 1) {
2019 JS_ASSERT((size_t)(endPC
- re
->program
) < state
.progLength
+ 1);
2020 resize
= offsetof(JSRegExp
, program
) + (endPC
- re
->program
);
2021 tmp
= (JSRegExp
*) JS_realloc(cx
, re
, resize
);
2027 re
->parenCount
= state
.parenCount
;
2031 JS_ARENA_RELEASE(&cx
->tempPool
, mark
);
2036 js_NewRegExpOpt(JSContext
*cx
, JSString
*str
, JSString
*opt
, JSBool flat
)
2045 JSSTRING_CHARS_AND_LENGTH(opt
, s
, n
);
2046 for (i
= 0; i
< n
; i
++) {
2049 flags
|= JSREG_GLOB
;
2052 flags
|= JSREG_FOLD
;
2055 flags
|= JSREG_MULTILINE
;
2058 flags
|= JSREG_STICKY
;
2061 charBuf
[0] = (char)s
[i
];
2063 JS_ReportErrorFlagsAndNumber(cx
, JSREPORT_ERROR
,
2064 js_GetErrorMessage
, NULL
,
2065 JSMSG_BAD_FLAG
, charBuf
);
2070 return js_NewRegExp(cx
, NULL
, str
, flags
, flat
);
2074 * Save the current state of the match - the position in the input
2075 * text as well as the position in the bytecode. The state of any
2076 * parent expressions is also saved (preceding state).
2077 * Contents of parenCount parentheses from parenIndex are also saved.
2079 static REBackTrackData
*
2080 PushBackTrackState(REGlobalData
*gData
, REOp op
,
2081 jsbytecode
*target
, REMatchState
*x
, const jschar
*cp
,
2082 size_t parenIndex
, size_t parenCount
)
2085 REBackTrackData
*result
=
2086 (REBackTrackData
*) ((char *)gData
->backTrackSP
+ gData
->cursz
);
2088 size_t sz
= sizeof(REBackTrackData
) +
2089 gData
->stateStackTop
* sizeof(REProgState
) +
2090 parenCount
* sizeof(RECapture
);
2092 ptrdiff_t btsize
= gData
->backTrackStackSize
;
2093 ptrdiff_t btincr
= ((char *)result
+ sz
) -
2094 ((char *)gData
->backTrackStack
+ btsize
);
2096 re_debug("\tBT_Push: %lu,%lu",
2097 (unsigned long) parenIndex
, (unsigned long) parenCount
);
2099 JS_COUNT_OPERATION(gData
->cx
, JSOW_JUMP
* (1 + parenCount
));
2101 ptrdiff_t offset
= (char *)result
- (char *)gData
->backTrackStack
;
2103 JS_COUNT_OPERATION(gData
->cx
, JSOW_ALLOCATION
);
2104 btincr
= JS_ROUNDUP(btincr
, btsize
);
2105 JS_ARENA_GROW_CAST(gData
->backTrackStack
, REBackTrackData
*,
2106 &gData
->cx
->regexpPool
, btsize
, btincr
);
2107 if (!gData
->backTrackStack
) {
2108 js_ReportOutOfScriptQuota(gData
->cx
);
2109 gData
->ok
= JS_FALSE
;
2112 gData
->backTrackStackSize
= btsize
+ btincr
;
2113 result
= (REBackTrackData
*) ((char *)gData
->backTrackStack
+ offset
);
2115 gData
->backTrackSP
= result
;
2116 result
->sz
= gData
->cursz
;
2119 result
->backtrack_op
= op
;
2120 result
->backtrack_pc
= target
;
2122 result
->parenCount
= parenCount
;
2123 result
->parenIndex
= parenIndex
;
2125 result
->saveStateStackTop
= gData
->stateStackTop
;
2126 JS_ASSERT(gData
->stateStackTop
);
2127 memcpy(result
+ 1, gData
->stateStack
,
2128 sizeof(REProgState
) * result
->saveStateStackTop
);
2130 if (parenCount
!= 0) {
2131 memcpy((char *)(result
+ 1) +
2132 sizeof(REProgState
) * result
->saveStateStackTop
,
2133 &x
->parens
[parenIndex
],
2134 sizeof(RECapture
) * parenCount
);
2135 for (i
= 0; i
!= parenCount
; i
++)
2136 x
->parens
[parenIndex
+ i
].index
= -1;
2144 * Consecutive literal characters.
2147 static REMatchState
*
2148 FlatNMatcher(REGlobalData
*gData
, REMatchState
*x
, jschar
*matchChars
,
2152 if (length
> gData
->cpend
- x
->cp
)
2154 for (i
= 0; i
!= length
; i
++) {
2155 if (matchChars
[i
] != x
->cp
[i
])
2163 static JS_ALWAYS_INLINE REMatchState
*
2164 FlatNIMatcher(REGlobalData
*gData
, REMatchState
*x
, jschar
*matchChars
,
2168 JS_ASSERT(gData
->cpend
>= x
->cp
);
2169 if (length
> (size_t)(gData
->cpend
- x
->cp
))
2171 for (i
= 0; i
!= length
; i
++) {
2172 if (upcase(matchChars
[i
]) != upcase(x
->cp
[i
]))
2180 * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
2181 * 2. If E is not a character then go to step 6.
2182 * 3. Let ch be E's character.
2183 * 4. Let A be a one-element RECharSet containing the character ch.
2184 * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
2185 * 6. E must be an integer. Let n be that integer.
2186 * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
2187 * 8. Return an internal Matcher closure that takes two arguments, a State x
2188 * and a Continuation c, and performs the following:
2189 * 1. Let cap be x's captures internal array.
2190 * 2. Let s be cap[n].
2191 * 3. If s is undefined, then call c(x) and return its result.
2192 * 4. Let e be x's endIndex.
2193 * 5. Let len be s's length.
2194 * 6. Let f be e+len.
2195 * 7. If f>InputLength, return failure.
2196 * 8. If there exists an integer i between 0 (inclusive) and len (exclusive)
2197 * such that Canonicalize(s[i]) is not the same character as
2198 * Canonicalize(Input [e+i]), then return failure.
2199 * 9. Let y be the State (f, cap).
2200 * 10. Call c(y) and return its result.
2202 static REMatchState
*
2203 BackrefMatcher(REGlobalData
*gData
, REMatchState
*x
, size_t parenIndex
)
2206 const jschar
*parenContent
;
2207 RECapture
*cap
= &x
->parens
[parenIndex
];
2209 if (cap
->index
== -1)
2213 if (x
->cp
+ len
> gData
->cpend
)
2216 parenContent
= &gData
->cpbegin
[cap
->index
];
2217 if (gData
->regexp
->flags
& JSREG_FOLD
) {
2218 for (i
= 0; i
< len
; i
++) {
2219 if (upcase(parenContent
[i
]) != upcase(x
->cp
[i
]))
2223 for (i
= 0; i
< len
; i
++) {
2224 if (parenContent
[i
] != x
->cp
[i
])
2233 /* Add a single character to the RECharSet */
2235 AddCharacterToCharSet(RECharSet
*cs
, jschar c
)
2237 uintN byteIndex
= (uintN
)(c
>> 3);
2238 JS_ASSERT(c
<= cs
->length
);
2239 cs
->u
.bits
[byteIndex
] |= 1 << (c
& 0x7);
2243 /* Add a character range, c1 to c2 (inclusive) to the RECharSet */
2245 AddCharacterRangeToCharSet(RECharSet
*cs
, uintN c1
, uintN c2
)
2249 uintN byteIndex1
= c1
>> 3;
2250 uintN byteIndex2
= c2
>> 3;
2252 JS_ASSERT(c2
<= cs
->length
&& c1
<= c2
);
2257 if (byteIndex1
== byteIndex2
) {
2258 cs
->u
.bits
[byteIndex1
] |= ((uint8
)0xFF >> (7 - (c2
- c1
))) << c1
;
2260 cs
->u
.bits
[byteIndex1
] |= 0xFF << c1
;
2261 for (i
= byteIndex1
+ 1; i
< byteIndex2
; i
++)
2262 cs
->u
.bits
[i
] = 0xFF;
2263 cs
->u
.bits
[byteIndex2
] |= (uint8
)0xFF >> (7 - c2
);
2267 struct CharacterRange
{
2273 * The following characters are taken from the ECMA-262 standard, section 7.2
2274 * and 7.3, and the Unicode 3 standard, Table 6-1.
2276 static const CharacterRange WhiteSpaceRanges
[] = {
2277 /* TAB, LF, VT, FF, CR */
2281 /* NO-BREAK SPACE */
2284 * EN QUAD, EM QUAD, EN SPACE, EM SPACE, THREE-PER-EM SPACE, FOUR-PER-EM
2285 * SPACE, SIX-PER-EM SPACE, FIGURE SPACE, PUNCTUATION SPACE, THIN SPACE,
2286 * HAIR SPACE, ZERO WIDTH SPACE
2291 /* NARROW NO-BREAK SPACE */
2293 /* IDEOGRAPHIC SPACE */
2297 /* ECMA-262 standard, section 15.10.2.6. */
2298 static const CharacterRange WordRanges
[] = {
2299 { jschar('0'), jschar('9') },
2300 { jschar('A'), jschar('Z') },
2301 { jschar('_'), jschar('_') },
2302 { jschar('a'), jschar('z') }
2306 AddCharacterRanges(RECharSet
*charSet
,
2307 const CharacterRange
*range
,
2308 const CharacterRange
*end
)
2310 for (; range
< end
; ++range
)
2311 AddCharacterRangeToCharSet(charSet
, range
->start
, range
->end
);
2315 AddInvertedCharacterRanges(RECharSet
*charSet
,
2316 const CharacterRange
*range
,
2317 const CharacterRange
*end
)
2319 uint16 previous
= 0;
2320 for (; range
< end
; ++range
) {
2321 AddCharacterRangeToCharSet(charSet
, previous
, range
->start
- 1);
2322 previous
= range
->end
+ 1;
2324 AddCharacterRangeToCharSet(charSet
, previous
, charSet
->length
);
2327 /* Compile the source of the class into a RECharSet */
2329 ProcessCharSet(REGlobalData
*gData
, RECharSet
*charSet
)
2331 const jschar
*src
, *end
;
2332 JSBool inRange
= JS_FALSE
;
2333 jschar rangeStart
= 0;
2334 uintN byteLength
, n
;
2338 JS_ASSERT(!charSet
->converted
);
2340 * Assert that startIndex and length points to chars inside [] inside
2343 JS_ASSERT(1 <= charSet
->u
.src
.startIndex
);
2344 JS_ASSERT(charSet
->u
.src
.startIndex
2345 < JSSTRING_LENGTH(gData
->regexp
->source
));
2346 JS_ASSERT(charSet
->u
.src
.length
<= JSSTRING_LENGTH(gData
->regexp
->source
)
2347 - 1 - charSet
->u
.src
.startIndex
);
2349 charSet
->converted
= JS_TRUE
;
2350 src
= JSSTRING_CHARS(gData
->regexp
->source
) + charSet
->u
.src
.startIndex
;
2351 end
= src
+ charSet
->u
.src
.length
;
2352 JS_ASSERT(src
[-1] == '[');
2353 JS_ASSERT(end
[0] == ']');
2355 byteLength
= (charSet
->length
>> 3) + 1;
2356 charSet
->u
.bits
= (uint8
*)JS_malloc(gData
->cx
, byteLength
);
2357 if (!charSet
->u
.bits
) {
2358 JS_ReportOutOfMemory(gData
->cx
);
2359 gData
->ok
= JS_FALSE
;
2362 memset(charSet
->u
.bits
, 0, byteLength
);
2368 JS_ASSERT(charSet
->sense
== JS_FALSE
);
2371 JS_ASSERT(charSet
->sense
== JS_TRUE
);
2374 while (src
!= end
) {
2399 if (src
< end
&& JS_ISWORD(*src
)) {
2400 thisCh
= (jschar
)(*src
++ & 0x1F);
2413 for (i
= 0; (i
< nDigits
) && (src
< end
); i
++) {
2416 if (!isASCIIHexDigit(c
, &digit
)) {
2418 * Back off to accepting the original '\'
2425 n
= (n
<< 4) | digit
;
2438 * This is a non-ECMA extension - decimal escapes (in this
2439 * case, octal!) are supposed to be an error inside class
2440 * ranges, but supported here for backwards compatibility.
2444 if ('0' <= c
&& c
<= '7') {
2446 n
= 8 * n
+ JS7_UNDEC(c
);
2448 if ('0' <= c
&& c
<= '7') {
2450 i
= 8 * n
+ JS7_UNDEC(c
);
2461 AddCharacterRangeToCharSet(charSet
, '0', '9');
2462 continue; /* don't need range processing */
2464 AddCharacterRangeToCharSet(charSet
, 0, '0' - 1);
2465 AddCharacterRangeToCharSet(charSet
,
2467 (jschar
)charSet
->length
);
2470 AddCharacterRanges(charSet
, WhiteSpaceRanges
,
2471 WhiteSpaceRanges
+ JS_ARRAY_LENGTH(WhiteSpaceRanges
));
2474 AddInvertedCharacterRanges(charSet
, WhiteSpaceRanges
,
2475 WhiteSpaceRanges
+ JS_ARRAY_LENGTH(WhiteSpaceRanges
));
2478 AddCharacterRanges(charSet
, WordRanges
,
2479 WordRanges
+ JS_ARRAY_LENGTH(WordRanges
));
2482 AddInvertedCharacterRanges(charSet
, WordRanges
,
2483 WordRanges
+ JS_ARRAY_LENGTH(WordRanges
));
2498 if (gData
->regexp
->flags
& JSREG_FOLD
) {
2501 JS_ASSERT(rangeStart
<= thisCh
);
2502 for (i
= rangeStart
; i
<= thisCh
; i
++) {
2505 AddCharacterToCharSet(charSet
, i
);
2509 AddCharacterToCharSet(charSet
, uch
);
2511 AddCharacterToCharSet(charSet
, dch
);
2514 AddCharacterRangeToCharSet(charSet
, rangeStart
, thisCh
);
2518 if (gData
->regexp
->flags
& JSREG_FOLD
) {
2519 AddCharacterToCharSet(charSet
, upcase(thisCh
));
2520 AddCharacterToCharSet(charSet
, downcase(thisCh
));
2522 AddCharacterToCharSet(charSet
, thisCh
);
2524 if (src
< end
- 1) {
2528 rangeStart
= thisCh
;
2537 js_DestroyRegExp(JSContext
*cx
, JSRegExp
*re
)
2539 if (JS_ATOMIC_DECREMENT(&re
->nrefs
) == 0) {
2540 if (re
->classList
) {
2542 for (i
= 0; i
< re
->classCount
; i
++) {
2543 if (re
->classList
[i
].converted
)
2544 JS_free(cx
, re
->classList
[i
].u
.bits
);
2545 re
->classList
[i
].u
.bits
= NULL
;
2547 JS_free(cx
, re
->classList
);
2554 ReallocStateStack(REGlobalData
*gData
)
2556 size_t limit
= gData
->stateStackLimit
;
2557 size_t sz
= sizeof(REProgState
) * limit
;
2559 JS_ARENA_GROW_CAST(gData
->stateStack
, REProgState
*,
2560 &gData
->cx
->regexpPool
, sz
, sz
);
2561 if (!gData
->stateStack
) {
2562 js_ReportOutOfScriptQuota(gData
->cx
);
2563 gData
->ok
= JS_FALSE
;
2566 gData
->stateStackLimit
= limit
+ limit
;
2570 #define PUSH_STATE_STACK(data) \
2572 ++(data)->stateStackTop; \
2573 if ((data)->stateStackTop == (data)->stateStackLimit && \
2574 !ReallocStateStack((data))) { \
2580 * Apply the current op against the given input to see if it's going to match
2581 * or fail. Return false if we don't get a match, true if we do. If updatecp is
2582 * true, then update the current state's cp. Always update startpc to the next
2585 static JS_ALWAYS_INLINE REMatchState
*
2586 SimpleMatch(REGlobalData
*gData
, REMatchState
*x
, REOp op
,
2587 jsbytecode
**startpc
, JSBool updatecp
)
2589 REMatchState
*result
= NULL
;
2592 size_t offset
, length
, index
;
2593 jsbytecode
*pc
= *startpc
; /* pc has already been incremented past op */
2595 const jschar
*startcp
= x
->cp
;
2600 const char *opname
= reop_names
[op
];
2601 re_debug("\n%06d: %*s%s", pc
- gData
->regexp
->program
,
2602 gData
->stateStackTop
* 2, "", opname
);
2609 if (x
->cp
!= gData
->cpbegin
) {
2610 if (!gData
->cx
->regExpStatics
.multiline
&&
2611 !(gData
->regexp
->flags
& JSREG_MULTILINE
)) {
2614 if (!RE_IS_LINE_TERM(x
->cp
[-1]))
2620 if (x
->cp
!= gData
->cpend
) {
2621 if (!gData
->cx
->regExpStatics
.multiline
&&
2622 !(gData
->regexp
->flags
& JSREG_MULTILINE
)) {
2625 if (!RE_IS_LINE_TERM(*x
->cp
))
2631 if ((x
->cp
== gData
->cpbegin
|| !JS_ISWORD(x
->cp
[-1])) ^
2632 !(x
->cp
!= gData
->cpend
&& JS_ISWORD(*x
->cp
))) {
2637 if ((x
->cp
== gData
->cpbegin
|| !JS_ISWORD(x
->cp
[-1])) ^
2638 (x
->cp
!= gData
->cpend
&& JS_ISWORD(*x
->cp
))) {
2643 if (x
->cp
!= gData
->cpend
&& !RE_IS_LINE_TERM(*x
->cp
)) {
2649 if (x
->cp
!= gData
->cpend
&& JS7_ISDEC(*x
->cp
)) {
2655 if (x
->cp
!= gData
->cpend
&& !JS7_ISDEC(*x
->cp
)) {
2661 if (x
->cp
!= gData
->cpend
&& JS_ISWORD(*x
->cp
)) {
2667 if (x
->cp
!= gData
->cpend
&& !JS_ISWORD(*x
->cp
)) {
2673 if (x
->cp
!= gData
->cpend
&& JS_ISSPACE(*x
->cp
)) {
2679 if (x
->cp
!= gData
->cpend
&& !JS_ISSPACE(*x
->cp
)) {
2685 pc
= ReadCompactIndex(pc
, &parenIndex
);
2686 JS_ASSERT(parenIndex
< gData
->regexp
->parenCount
);
2687 result
= BackrefMatcher(gData
, x
, parenIndex
);
2690 pc
= ReadCompactIndex(pc
, &offset
);
2691 JS_ASSERT(offset
< JSSTRING_LENGTH(gData
->regexp
->source
));
2692 pc
= ReadCompactIndex(pc
, &length
);
2693 JS_ASSERT(1 <= length
);
2694 JS_ASSERT(length
<= JSSTRING_LENGTH(gData
->regexp
->source
) - offset
);
2695 if (length
<= (size_t)(gData
->cpend
- x
->cp
)) {
2696 source
= JSSTRING_CHARS(gData
->regexp
->source
) + offset
;
2697 re_debug_chars(source
, length
);
2698 for (index
= 0; index
!= length
; index
++) {
2699 if (source
[index
] != x
->cp
[index
])
2708 re_debug(" '%c' == '%c'", (char)matchCh
, (char)*x
->cp
);
2709 if (x
->cp
!= gData
->cpend
&& *x
->cp
== matchCh
) {
2715 pc
= ReadCompactIndex(pc
, &offset
);
2716 JS_ASSERT(offset
< JSSTRING_LENGTH(gData
->regexp
->source
));
2717 pc
= ReadCompactIndex(pc
, &length
);
2718 JS_ASSERT(1 <= length
);
2719 JS_ASSERT(length
<= JSSTRING_LENGTH(gData
->regexp
->source
) - offset
);
2720 source
= JSSTRING_CHARS(gData
->regexp
->source
);
2721 result
= FlatNIMatcher(gData
, x
, source
+ offset
, length
);
2725 if (x
->cp
!= gData
->cpend
&& upcase(*x
->cp
) == upcase(matchCh
)) {
2731 matchCh
= GET_ARG(pc
);
2732 re_debug(" '%c' == '%c'", (char)matchCh
, (char)*x
->cp
);
2734 if (x
->cp
!= gData
->cpend
&& *x
->cp
== matchCh
) {
2740 matchCh
= GET_ARG(pc
);
2742 if (x
->cp
!= gData
->cpend
&& upcase(*x
->cp
) == upcase(matchCh
)) {
2748 pc
= ReadCompactIndex(pc
, &index
);
2749 JS_ASSERT(index
< gData
->regexp
->classCount
);
2750 if (x
->cp
!= gData
->cpend
) {
2751 charSet
= &gData
->regexp
->classList
[index
];
2752 JS_ASSERT(charSet
->converted
);
2755 if (charSet
->length
!= 0 &&
2756 ch
<= charSet
->length
&&
2757 (charSet
->u
.bits
[index
] & (1 << (ch
& 0x7)))) {
2764 pc
= ReadCompactIndex(pc
, &index
);
2765 JS_ASSERT(index
< gData
->regexp
->classCount
);
2766 if (x
->cp
!= gData
->cpend
) {
2767 charSet
= &gData
->regexp
->classList
[index
];
2768 JS_ASSERT(charSet
->converted
);
2771 if (charSet
->length
== 0 ||
2772 ch
> charSet
->length
||
2773 !(charSet
->u
.bits
[index
] & (1 << (ch
& 0x7)))) {
2781 JS_ASSERT(JS_FALSE
);
2794 static JS_ALWAYS_INLINE REMatchState
*
2795 ExecuteREBytecode(REGlobalData
*gData
, REMatchState
*x
)
2797 REMatchState
*result
= NULL
;
2798 REBackTrackData
*backTrackData
;
2799 jsbytecode
*nextpc
, *testpc
;
2802 REProgState
*curState
;
2803 const jschar
*startcp
;
2804 size_t parenIndex
, k
;
2805 size_t parenSoFar
= 0;
2807 jschar matchCh1
, matchCh2
;
2811 jsbytecode
*pc
= gData
->regexp
->program
;
2812 REOp op
= (REOp
) *pc
++;
2815 * If the first node is a simple match, step the index into the string
2816 * until that match is made, or fail if it can't be found at all.
2818 if (REOP_IS_SIMPLE(op
) && !(gData
->regexp
->flags
& JSREG_STICKY
)) {
2820 while (x
->cp
<= gData
->cpend
) {
2821 nextpc
= pc
; /* reset back to start each time */
2822 result
= SimpleMatch(gData
, x
, op
, &nextpc
, JS_TRUE
);
2826 pc
= nextpc
; /* accept skip to next opcode */
2828 JS_ASSERT(op
< REOP_LIMIT
);
2840 const char *opname
= reop_names
[op
];
2841 re_debug("\n%06d: %*s%s", pc
- gData
->regexp
->program
,
2842 gData
->stateStackTop
* 2, "", opname
);
2844 if (REOP_IS_SIMPLE(op
)) {
2845 result
= SimpleMatch(gData
, x
, op
, &pc
, JS_TRUE
);
2847 curState
= &gData
->stateStack
[gData
->stateStackTop
];
2851 case REOP_ALTPREREQ2
:
2852 nextpc
= pc
+ GET_OFFSET(pc
); /* start of next op */
2854 matchCh2
= GET_ARG(pc
);
2859 if (x
->cp
!= gData
->cpend
) {
2860 if (*x
->cp
== matchCh2
)
2863 charSet
= &gData
->regexp
->classList
[k
];
2864 if (!charSet
->converted
&& !ProcessCharSet(gData
, charSet
))
2868 if ((charSet
->length
== 0 ||
2869 matchCh1
> charSet
->length
||
2870 !(charSet
->u
.bits
[k
] & (1 << (matchCh1
& 0x7)))) ^
2878 case REOP_ALTPREREQ
:
2879 nextpc
= pc
+ GET_OFFSET(pc
); /* start of next op */
2881 matchCh1
= GET_ARG(pc
);
2883 matchCh2
= GET_ARG(pc
);
2885 if (x
->cp
== gData
->cpend
||
2886 (*x
->cp
!= matchCh1
&& *x
->cp
!= matchCh2
)) {
2890 /* else false thru... */
2894 nextpc
= pc
+ GET_OFFSET(pc
); /* start of next alternate */
2895 pc
+= ARG_LEN
; /* start of this alternate */
2896 curState
->parenSoFar
= parenSoFar
;
2897 PUSH_STATE_STACK(gData
);
2900 if (REOP_IS_SIMPLE(op
)) {
2901 if (!SimpleMatch(gData
, x
, op
, &pc
, JS_TRUE
)) {
2902 op
= (REOp
) *nextpc
++;
2909 nextop
= (REOp
) *nextpc
++;
2910 if (!PushBackTrackState(gData
, nextop
, nextpc
, x
, startcp
, 0, 0))
2915 * Occurs at (successful) end of REOP_ALT,
2919 * If we have not gotten a result here, it is because of an
2920 * empty match. Do the same thing REOP_EMPTY would do.
2925 --gData
->stateStackTop
;
2926 pc
+= GET_OFFSET(pc
);
2931 * Occurs at last (successful) end of REOP_ALT,
2935 * If we have not gotten a result here, it is because of an
2936 * empty match. Do the same thing REOP_EMPTY would do.
2941 --gData
->stateStackTop
;
2946 pc
= ReadCompactIndex(pc
, &parenIndex
);
2947 re_debug("[ %lu ]", (unsigned long) parenIndex
);
2948 JS_ASSERT(parenIndex
< gData
->regexp
->parenCount
);
2949 if (parenIndex
+ 1 > parenSoFar
)
2950 parenSoFar
= parenIndex
+ 1;
2951 x
->parens
[parenIndex
].index
= x
->cp
- gData
->cpbegin
;
2952 x
->parens
[parenIndex
].length
= 0;
2960 pc
= ReadCompactIndex(pc
, &parenIndex
);
2961 JS_ASSERT(parenIndex
< gData
->regexp
->parenCount
);
2962 cap
= &x
->parens
[parenIndex
];
2963 delta
= x
->cp
- (gData
->cpbegin
+ cap
->index
);
2964 cap
->length
= (delta
< 0) ? 0 : (size_t) delta
;
2972 nextpc
= pc
+ GET_OFFSET(pc
); /* start of term after ASSERT */
2973 pc
+= ARG_LEN
; /* start of ASSERT child */
2976 if (REOP_IS_SIMPLE(op
) &&
2977 !SimpleMatch(gData
, x
, op
, &testpc
, JS_FALSE
)) {
2981 curState
->u
.assertion
.top
=
2982 (char *)gData
->backTrackSP
- (char *)gData
->backTrackStack
;
2983 curState
->u
.assertion
.sz
= gData
->cursz
;
2984 curState
->index
= x
->cp
- gData
->cpbegin
;
2985 curState
->parenSoFar
= parenSoFar
;
2986 PUSH_STATE_STACK(gData
);
2987 if (!PushBackTrackState(gData
, REOP_ASSERTTEST
,
2988 nextpc
, x
, x
->cp
, 0, 0)) {
2993 case REOP_ASSERT_NOT
:
2994 nextpc
= pc
+ GET_OFFSET(pc
);
2998 if (REOP_IS_SIMPLE(op
) /* Note - fail to fail! */ &&
2999 SimpleMatch(gData
, x
, op
, &testpc
, JS_FALSE
) &&
3000 *testpc
== REOP_ASSERTNOTTEST
) {
3004 curState
->u
.assertion
.top
3005 = (char *)gData
->backTrackSP
-
3006 (char *)gData
->backTrackStack
;
3007 curState
->u
.assertion
.sz
= gData
->cursz
;
3008 curState
->index
= x
->cp
- gData
->cpbegin
;
3009 curState
->parenSoFar
= parenSoFar
;
3010 PUSH_STATE_STACK(gData
);
3011 if (!PushBackTrackState(gData
, REOP_ASSERTNOTTEST
,
3012 nextpc
, x
, x
->cp
, 0, 0)) {
3017 case REOP_ASSERTTEST
:
3018 --gData
->stateStackTop
;
3020 x
->cp
= gData
->cpbegin
+ curState
->index
;
3021 gData
->backTrackSP
=
3022 (REBackTrackData
*) ((char *)gData
->backTrackStack
+
3023 curState
->u
.assertion
.top
);
3024 gData
->cursz
= curState
->u
.assertion
.sz
;
3029 case REOP_ASSERTNOTTEST
:
3030 --gData
->stateStackTop
;
3032 x
->cp
= gData
->cpbegin
+ curState
->index
;
3033 gData
->backTrackSP
=
3034 (REBackTrackData
*) ((char *)gData
->backTrackStack
+
3035 curState
->u
.assertion
.top
);
3036 gData
->cursz
= curState
->u
.assertion
.sz
;
3037 result
= (!result
) ? x
: NULL
;
3040 curState
->u
.quantifier
.min
= 0;
3041 curState
->u
.quantifier
.max
= (uintN
)-1;
3044 curState
->u
.quantifier
.min
= 1;
3045 curState
->u
.quantifier
.max
= (uintN
)-1;
3048 curState
->u
.quantifier
.min
= 0;
3049 curState
->u
.quantifier
.max
= 1;
3052 pc
= ReadCompactIndex(pc
, &k
);
3053 curState
->u
.quantifier
.min
= k
;
3054 pc
= ReadCompactIndex(pc
, &k
);
3055 /* max is k - 1 to use one byte for (uintN)-1 sentinel. */
3056 curState
->u
.quantifier
.max
= k
- 1;
3057 JS_ASSERT(curState
->u
.quantifier
.min
3058 <= curState
->u
.quantifier
.max
);
3060 if (curState
->u
.quantifier
.max
== 0) {
3061 pc
= pc
+ GET_OFFSET(pc
);
3066 /* Step over <next> */
3067 nextpc
= pc
+ ARG_LEN
;
3068 op
= (REOp
) *nextpc
++;
3070 if (REOP_IS_SIMPLE(op
)) {
3071 if (!SimpleMatch(gData
, x
, op
, &nextpc
, JS_TRUE
)) {
3072 if (curState
->u
.quantifier
.min
== 0)
3076 pc
= pc
+ GET_OFFSET(pc
);
3079 op
= (REOp
) *nextpc
++;
3082 curState
->index
= startcp
- gData
->cpbegin
;
3083 curState
->continue_op
= REOP_REPEAT
;
3084 curState
->continue_pc
= pc
;
3085 curState
->parenSoFar
= parenSoFar
;
3086 PUSH_STATE_STACK(gData
);
3087 if (curState
->u
.quantifier
.min
== 0 &&
3088 !PushBackTrackState(gData
, REOP_REPEAT
, pc
, x
, startcp
,
3095 case REOP_ENDCHILD
: /* marks the end of a quantifier child */
3096 pc
= curState
[-1].continue_pc
;
3097 op
= (REOp
) curState
[-1].continue_op
;
3106 --gData
->stateStackTop
;
3108 /* Failed, see if we have enough children. */
3109 if (curState
->u
.quantifier
.min
== 0)
3113 if (curState
->u
.quantifier
.min
== 0 &&
3114 x
->cp
== gData
->cpbegin
+ curState
->index
) {
3115 /* matched an empty string, that'll get us nowhere */
3119 if (curState
->u
.quantifier
.min
!= 0)
3120 curState
->u
.quantifier
.min
--;
3121 if (curState
->u
.quantifier
.max
!= (uintN
) -1)
3122 curState
->u
.quantifier
.max
--;
3123 if (curState
->u
.quantifier
.max
== 0)
3125 nextpc
= pc
+ ARG_LEN
;
3126 nextop
= (REOp
) *nextpc
;
3128 if (REOP_IS_SIMPLE(nextop
)) {
3130 if (!SimpleMatch(gData
, x
, nextop
, &nextpc
, JS_TRUE
)) {
3131 if (curState
->u
.quantifier
.min
== 0)
3138 curState
->index
= startcp
- gData
->cpbegin
;
3139 PUSH_STATE_STACK(gData
);
3140 if (curState
->u
.quantifier
.min
== 0 &&
3141 !PushBackTrackState(gData
, REOP_REPEAT
,
3143 curState
->parenSoFar
,
3145 curState
->parenSoFar
)) {
3148 } while (*nextpc
== REOP_ENDCHILD
);
3151 parenSoFar
= curState
->parenSoFar
;
3156 pc
+= GET_OFFSET(pc
);
3159 case REOP_MINIMALSTAR
:
3160 curState
->u
.quantifier
.min
= 0;
3161 curState
->u
.quantifier
.max
= (uintN
)-1;
3162 goto minimalquantcommon
;
3163 case REOP_MINIMALPLUS
:
3164 curState
->u
.quantifier
.min
= 1;
3165 curState
->u
.quantifier
.max
= (uintN
)-1;
3166 goto minimalquantcommon
;
3167 case REOP_MINIMALOPT
:
3168 curState
->u
.quantifier
.min
= 0;
3169 curState
->u
.quantifier
.max
= 1;
3170 goto minimalquantcommon
;
3171 case REOP_MINIMALQUANT
:
3172 pc
= ReadCompactIndex(pc
, &k
);
3173 curState
->u
.quantifier
.min
= k
;
3174 pc
= ReadCompactIndex(pc
, &k
);
3175 /* See REOP_QUANT comments about k - 1. */
3176 curState
->u
.quantifier
.max
= k
- 1;
3177 JS_ASSERT(curState
->u
.quantifier
.min
3178 <= curState
->u
.quantifier
.max
);
3180 curState
->index
= x
->cp
- gData
->cpbegin
;
3181 curState
->parenSoFar
= parenSoFar
;
3182 PUSH_STATE_STACK(gData
);
3183 if (curState
->u
.quantifier
.min
!= 0) {
3184 curState
->continue_op
= REOP_MINIMALREPEAT
;
3185 curState
->continue_pc
= pc
;
3186 /* step over <next> */
3190 if (!PushBackTrackState(gData
, REOP_MINIMALREPEAT
,
3191 pc
, x
, x
->cp
, 0, 0)) {
3194 --gData
->stateStackTop
;
3195 pc
= pc
+ GET_OFFSET(pc
);
3200 case REOP_MINIMALREPEAT
:
3201 --gData
->stateStackTop
;
3204 re_debug("{%d,%d}", curState
->u
.quantifier
.min
,
3205 curState
->u
.quantifier
.max
);
3206 #define PREPARE_REPEAT() \
3208 curState->index = x->cp - gData->cpbegin; \
3209 curState->continue_op = REOP_MINIMALREPEAT; \
3210 curState->continue_pc = pc; \
3212 for (k = curState->parenSoFar; k < parenSoFar; k++) \
3213 x->parens[k].index = -1; \
3214 PUSH_STATE_STACK(gData); \
3215 op = (REOp) *pc++; \
3216 JS_ASSERT(op < REOP_LIMIT); \
3222 * Non-greedy failure - try to consume another child.
3224 if (curState
->u
.quantifier
.max
== (uintN
) -1 ||
3225 curState
->u
.quantifier
.max
> 0) {
3229 /* Don't need to adjust pc since we're going to pop. */
3232 if (curState
->u
.quantifier
.min
== 0 &&
3233 x
->cp
== gData
->cpbegin
+ curState
->index
) {
3234 /* Matched an empty string, that'll get us nowhere. */
3238 if (curState
->u
.quantifier
.min
!= 0)
3239 curState
->u
.quantifier
.min
--;
3240 if (curState
->u
.quantifier
.max
!= (uintN
) -1)
3241 curState
->u
.quantifier
.max
--;
3242 if (curState
->u
.quantifier
.min
!= 0) {
3246 curState
->index
= x
->cp
- gData
->cpbegin
;
3247 curState
->parenSoFar
= parenSoFar
;
3248 PUSH_STATE_STACK(gData
);
3249 if (!PushBackTrackState(gData
, REOP_MINIMALREPEAT
,
3251 curState
->parenSoFar
,
3252 parenSoFar
- curState
->parenSoFar
)) {
3255 --gData
->stateStackTop
;
3256 pc
= pc
+ GET_OFFSET(pc
);
3258 JS_ASSERT(op
< REOP_LIMIT
);
3261 JS_ASSERT(JS_FALSE
);
3268 * If the match failed and there's a backtrack option, take it.
3269 * Otherwise this is a complete and utter failure.
3272 if (gData
->cursz
== 0)
3274 if (!JS_CHECK_OPERATION_LIMIT(gData
->cx
, JSOW_JUMP
)) {
3275 gData
->ok
= JS_FALSE
;
3279 /* Potentially detect explosive regex here. */
3280 gData
->backTrackCount
++;
3281 if (gData
->backTrackLimit
&&
3282 gData
->backTrackCount
>= gData
->backTrackLimit
) {
3283 JS_ReportErrorNumber(gData
->cx
, js_GetErrorMessage
, NULL
,
3284 JSMSG_REGEXP_TOO_COMPLEX
);
3285 gData
->ok
= JS_FALSE
;
3289 backTrackData
= gData
->backTrackSP
;
3290 gData
->cursz
= backTrackData
->sz
;
3291 gData
->backTrackSP
=
3292 (REBackTrackData
*) ((char *)backTrackData
- backTrackData
->sz
);
3293 x
->cp
= backTrackData
->cp
;
3294 pc
= backTrackData
->backtrack_pc
;
3295 op
= (REOp
) backTrackData
->backtrack_op
;
3296 JS_ASSERT(op
< REOP_LIMIT
);
3297 gData
->stateStackTop
= backTrackData
->saveStateStackTop
;
3298 JS_ASSERT(gData
->stateStackTop
);
3300 memcpy(gData
->stateStack
, backTrackData
+ 1,
3301 sizeof(REProgState
) * backTrackData
->saveStateStackTop
);
3302 curState
= &gData
->stateStack
[gData
->stateStackTop
- 1];
3304 if (backTrackData
->parenCount
) {
3305 memcpy(&x
->parens
[backTrackData
->parenIndex
],
3306 (char *)(backTrackData
+ 1) +
3307 sizeof(REProgState
) * backTrackData
->saveStateStackTop
,
3308 sizeof(RECapture
) * backTrackData
->parenCount
);
3309 parenSoFar
= backTrackData
->parenIndex
+ backTrackData
->parenCount
;
3311 for (k
= curState
->parenSoFar
; k
< parenSoFar
; k
++)
3312 x
->parens
[k
].index
= -1;
3313 parenSoFar
= curState
->parenSoFar
;
3316 re_debug("\tBT_Pop: %ld,%ld",
3317 (unsigned long) backTrackData
->parenIndex
,
3318 (unsigned long) backTrackData
->parenCount
);
3324 * Continue with the expression.
3327 JS_ASSERT(op
< REOP_LIMIT
);
3339 static REMatchState
*
3340 MatchRegExp(REGlobalData
*gData
, REMatchState
*x
)
3342 REMatchState
*result
;
3343 const jschar
*cp
= x
->cp
;
3348 * Have to include the position beyond the last character
3349 * in order to detect end-of-input/line condition.
3351 for (cp2
= cp
; cp2
<= gData
->cpend
; cp2
++) {
3352 gData
->skipped
= cp2
- cp
;
3354 for (j
= 0; j
< gData
->regexp
->parenCount
; j
++)
3355 x
->parens
[j
].index
= -1;
3356 result
= ExecuteREBytecode(gData
, x
);
3357 if (!gData
->ok
|| result
|| (gData
->regexp
->flags
& JSREG_STICKY
))
3359 gData
->backTrackSP
= gData
->backTrackStack
;
3361 gData
->stateStackTop
= 0;
3362 cp2
= cp
+ gData
->skipped
;
3367 #define MIN_BACKTRACK_LIMIT 400000
3369 static REMatchState
*
3370 InitMatch(JSContext
*cx
, REGlobalData
*gData
, JSRegExp
*re
, size_t length
)
3372 REMatchState
*result
;
3375 gData
->backTrackStackSize
= INITIAL_BACKTRACK
;
3376 JS_ARENA_ALLOCATE_CAST(gData
->backTrackStack
, REBackTrackData
*,
3379 if (!gData
->backTrackStack
)
3382 gData
->backTrackSP
= gData
->backTrackStack
;
3384 gData
->backTrackCount
= 0;
3385 gData
->backTrackLimit
= 0;
3386 if (JS_GetOptions(cx
) & JSOPTION_RELIMIT
) {
3387 gData
->backTrackLimit
= length
* length
* length
; /* O(n^3) */
3388 if (gData
->backTrackLimit
< MIN_BACKTRACK_LIMIT
)
3389 gData
->backTrackLimit
= MIN_BACKTRACK_LIMIT
;
3392 gData
->stateStackLimit
= INITIAL_STATESTACK
;
3393 JS_ARENA_ALLOCATE_CAST(gData
->stateStack
, REProgState
*,
3395 sizeof(REProgState
) * INITIAL_STATESTACK
);
3396 if (!gData
->stateStack
)
3399 gData
->stateStackTop
= 0;
3402 gData
->ok
= JS_TRUE
;
3404 JS_ARENA_ALLOCATE_CAST(result
, REMatchState
*,
3406 offsetof(REMatchState
, parens
)
3407 + re
->parenCount
* sizeof(RECapture
));
3411 for (i
= 0; i
< re
->classCount
; i
++) {
3412 if (!re
->classList
[i
].converted
&&
3413 !ProcessCharSet(gData
, &re
->classList
[i
])) {
3421 js_ReportOutOfScriptQuota(cx
);
3422 gData
->ok
= JS_FALSE
;
3427 js_ExecuteRegExp(JSContext
*cx
, JSRegExp
*re
, JSString
*str
, size_t *indexp
,
3428 JSBool test
, jsval
*rval
)
3431 REMatchState
*x
, *result
;
3433 const jschar
*cp
, *ep
;
3434 size_t i
, length
, start
;
3435 JSSubString
*morepar
;
3437 JSRegExpStatics
*res
;
3440 JSString
*parstr
, *matchstr
;
3443 RECapture
*parsub
= NULL
;
3448 * It's safe to load from cp because JSStrings have a zero at the end,
3449 * and we never let cp get beyond cpend.
3452 JSSTRING_CHARS_AND_LENGTH(str
, cp
, length
);
3456 gData
.cpend
= cp
+ length
;
3458 gData
.start
= start
;
3461 if (!cx
->regexpPool
.first
.next
) {
3463 * The first arena in the regexpPool must have a timestamp at its base.
3465 JS_ARENA_ALLOCATE_CAST(timestamp
, int64
*,
3466 &cx
->regexpPool
, sizeof *timestamp
);
3469 *timestamp
= JS_Now();
3471 mark
= JS_ARENA_MARK(&cx
->regexpPool
);
3473 x
= InitMatch(cx
, &gData
, re
, length
);
3482 * Call the recursive matcher to do the real work. Return null on mismatch
3483 * whether testing or not. On match, return an extended Array object.
3485 result
= MatchRegExp(&gData
, x
);
3494 i
= cp
- gData
.cpbegin
;
3496 matchlen
= i
- (start
+ gData
.skipped
);
3502 * Testing for a match and updating cx->regExpStatics: don't allocate
3503 * an array object, do return true.
3507 /* Avoid warning. (gcc doesn't detect that obj is needed iff !test); */
3511 * The array returned on match has element 0 bound to the matched
3512 * string, elements 1 through state.parenCount bound to the paren
3513 * matches, an index property telling the length of the left context,
3514 * and an input property referring to the input string.
3516 obj
= js_NewSlowArrayObject(cx
);
3521 *rval
= OBJECT_TO_JSVAL(obj
);
3523 #define DEFVAL(val, id) { \
3524 ok = js_DefineProperty(cx, obj, id, val, \
3525 JS_PropertyStub, JS_PropertyStub, \
3526 JSPROP_ENUMERATE, NULL); \
3528 cx->weakRoots.newborn[GCX_OBJECT] = NULL; \
3529 cx->weakRoots.newborn[GCX_STRING] = NULL; \
3534 matchstr
= js_NewStringCopyN(cx
, cp
, matchlen
);
3536 cx
->weakRoots
.newborn
[GCX_OBJECT
] = NULL
;
3540 DEFVAL(STRING_TO_JSVAL(matchstr
), INT_TO_JSID(0));
3543 res
= &cx
->regExpStatics
;
3545 res
->parenCount
= re
->parenCount
;
3546 if (re
->parenCount
== 0) {
3547 res
->lastParen
= js_EmptySubString
;
3549 for (num
= 0; num
< re
->parenCount
; num
++) {
3550 parsub
= &result
->parens
[num
];
3552 if (parsub
->index
== -1) {
3553 res
->parens
[num
].chars
= NULL
;
3554 res
->parens
[num
].length
= 0;
3556 res
->parens
[num
].chars
= gData
.cpbegin
+ parsub
->index
;
3557 res
->parens
[num
].length
= parsub
->length
;
3561 morepar
= res
->moreParens
;
3563 res
->moreLength
= 10;
3564 morepar
= (JSSubString
*)
3565 JS_malloc(cx
, 10 * sizeof(JSSubString
));
3566 } else if (morenum
>= res
->moreLength
) {
3567 res
->moreLength
+= 10;
3568 morepar
= (JSSubString
*)
3569 JS_realloc(cx
, morepar
,
3570 res
->moreLength
* sizeof(JSSubString
));
3573 cx
->weakRoots
.newborn
[GCX_OBJECT
] = NULL
;
3574 cx
->weakRoots
.newborn
[GCX_STRING
] = NULL
;
3578 res
->moreParens
= morepar
;
3579 if (parsub
->index
== -1) {
3580 morepar
[morenum
].chars
= NULL
;
3581 morepar
[morenum
].length
= 0;
3583 morepar
[morenum
].chars
= gData
.cpbegin
+ parsub
->index
;
3584 morepar
[morenum
].length
= parsub
->length
;
3589 if (parsub
->index
== -1) {
3590 ok
= js_DefineProperty(cx
, obj
, INT_TO_JSID(num
+ 1),
3591 JSVAL_VOID
, NULL
, NULL
,
3592 JSPROP_ENUMERATE
, NULL
);
3594 parstr
= js_NewStringCopyN(cx
, gData
.cpbegin
+ parsub
->index
,
3597 cx
->weakRoots
.newborn
[GCX_OBJECT
] = NULL
;
3598 cx
->weakRoots
.newborn
[GCX_STRING
] = NULL
;
3602 ok
= js_DefineProperty(cx
, obj
, INT_TO_JSID(num
+ 1),
3603 STRING_TO_JSVAL(parstr
), NULL
, NULL
,
3604 JSPROP_ENUMERATE
, NULL
);
3607 cx
->weakRoots
.newborn
[GCX_OBJECT
] = NULL
;
3608 cx
->weakRoots
.newborn
[GCX_STRING
] = NULL
;
3612 if (parsub
->index
== -1) {
3613 res
->lastParen
= js_EmptySubString
;
3615 res
->lastParen
.chars
= gData
.cpbegin
+ parsub
->index
;
3616 res
->lastParen
.length
= parsub
->length
;
3622 * Define the index and input properties last for better for/in loop
3623 * order (so they come after the elements).
3625 DEFVAL(INT_TO_JSVAL(start
+ gData
.skipped
),
3626 ATOM_TO_JSID(cx
->runtime
->atomState
.indexAtom
));
3627 DEFVAL(STRING_TO_JSVAL(str
),
3628 ATOM_TO_JSID(cx
->runtime
->atomState
.inputAtom
));
3633 res
->lastMatch
.chars
= cp
;
3634 res
->lastMatch
.length
= matchlen
;
3637 * For JS1.3 and ECMAv2, emulate Perl5 exactly:
3639 * js1.3 "hi", "hi there" "hihitherehi therebye"
3641 res
->leftContext
.chars
= JSSTRING_CHARS(str
);
3642 res
->leftContext
.length
= start
+ gData
.skipped
;
3643 res
->rightContext
.chars
= ep
;
3644 res
->rightContext
.length
= gData
.cpend
- ep
;
3647 JS_ARENA_RELEASE(&cx
->regexpPool
, mark
);
3651 /************************************************************************/
3653 #define REGEXP_PROP_ATTRS (JSPROP_PERMANENT | JSPROP_SHARED)
3654 #define RO_REGEXP_PROP_ATTRS (REGEXP_PROP_ATTRS | JSPROP_READONLY)
3656 static JSPropertySpec regexp_props
[] = {
3657 {"source", REGEXP_SOURCE
, RO_REGEXP_PROP_ATTRS
,0,0},
3658 {"global", REGEXP_GLOBAL
, RO_REGEXP_PROP_ATTRS
,0,0},
3659 {"ignoreCase", REGEXP_IGNORE_CASE
, RO_REGEXP_PROP_ATTRS
,0,0},
3660 {"lastIndex", REGEXP_LAST_INDEX
, REGEXP_PROP_ATTRS
,0,0},
3661 {"multiline", REGEXP_MULTILINE
, RO_REGEXP_PROP_ATTRS
,0,0},
3662 {"sticky", REGEXP_STICKY
, RO_REGEXP_PROP_ATTRS
,0,0},
3667 regexp_getProperty(JSContext
*cx
, JSObject
*obj
, jsval id
, jsval
*vp
)
3672 if (!JSVAL_IS_INT(id
))
3674 while (OBJ_GET_CLASS(cx
, obj
) != &js_RegExpClass
) {
3675 obj
= OBJ_GET_PROTO(cx
, obj
);
3679 slot
= JSVAL_TO_INT(id
);
3680 if (slot
== REGEXP_LAST_INDEX
)
3681 return JS_GetReservedSlot(cx
, obj
, 0, vp
);
3683 JS_LOCK_OBJ(cx
, obj
);
3684 re
= (JSRegExp
*) JS_GetPrivate(cx
, obj
);
3688 *vp
= STRING_TO_JSVAL(re
->source
);
3691 *vp
= BOOLEAN_TO_JSVAL((re
->flags
& JSREG_GLOB
) != 0);
3693 case REGEXP_IGNORE_CASE
:
3694 *vp
= BOOLEAN_TO_JSVAL((re
->flags
& JSREG_FOLD
) != 0);
3696 case REGEXP_MULTILINE
:
3697 *vp
= BOOLEAN_TO_JSVAL((re
->flags
& JSREG_MULTILINE
) != 0);
3700 *vp
= BOOLEAN_TO_JSVAL((re
->flags
& JSREG_STICKY
) != 0);
3704 JS_UNLOCK_OBJ(cx
, obj
);
3709 regexp_setProperty(JSContext
*cx
, JSObject
*obj
, jsval id
, jsval
*vp
)
3716 if (!JSVAL_IS_INT(id
))
3718 while (OBJ_GET_CLASS(cx
, obj
) != &js_RegExpClass
) {
3719 obj
= OBJ_GET_PROTO(cx
, obj
);
3723 slot
= JSVAL_TO_INT(id
);
3724 if (slot
== REGEXP_LAST_INDEX
) {
3725 if (!JS_ValueToNumber(cx
, *vp
, &lastIndex
))
3727 lastIndex
= js_DoubleToInteger(lastIndex
);
3728 ok
= JS_NewNumberValue(cx
, lastIndex
, vp
) &&
3729 JS_SetReservedSlot(cx
, obj
, 0, *vp
);
3735 * RegExp class static properties and their Perl counterparts:
3738 * RegExp.multiline $*
3739 * RegExp.lastMatch $&
3740 * RegExp.lastParen $+
3741 * RegExp.leftContext $`
3742 * RegExp.rightContext $'
3744 enum regexp_static_tinyid
{
3745 REGEXP_STATIC_INPUT
= -1,
3746 REGEXP_STATIC_MULTILINE
= -2,
3747 REGEXP_STATIC_LAST_MATCH
= -3,
3748 REGEXP_STATIC_LAST_PAREN
= -4,
3749 REGEXP_STATIC_LEFT_CONTEXT
= -5,
3750 REGEXP_STATIC_RIGHT_CONTEXT
= -6
3754 js_InitRegExpStatics(JSContext
*cx
, JSRegExpStatics
*res
)
3756 JS_ClearRegExpStatics(cx
);
3757 return js_AddRoot(cx
, &res
->input
, "res->input");
3761 js_FreeRegExpStatics(JSContext
*cx
, JSRegExpStatics
*res
)
3763 if (res
->moreParens
) {
3764 JS_free(cx
, res
->moreParens
);
3765 res
->moreParens
= NULL
;
3767 js_RemoveRoot(cx
->runtime
, &res
->input
);
3771 regexp_static_getProperty(JSContext
*cx
, JSObject
*obj
, jsval id
, jsval
*vp
)
3774 JSRegExpStatics
*res
;
3778 res
= &cx
->regExpStatics
;
3779 if (!JSVAL_IS_INT(id
))
3781 slot
= JSVAL_TO_INT(id
);
3783 case REGEXP_STATIC_INPUT
:
3784 *vp
= res
->input
? STRING_TO_JSVAL(res
->input
)
3785 : JS_GetEmptyStringValue(cx
);
3787 case REGEXP_STATIC_MULTILINE
:
3788 *vp
= BOOLEAN_TO_JSVAL(res
->multiline
);
3790 case REGEXP_STATIC_LAST_MATCH
:
3791 sub
= &res
->lastMatch
;
3793 case REGEXP_STATIC_LAST_PAREN
:
3794 sub
= &res
->lastParen
;
3796 case REGEXP_STATIC_LEFT_CONTEXT
:
3797 sub
= &res
->leftContext
;
3799 case REGEXP_STATIC_RIGHT_CONTEXT
:
3800 sub
= &res
->rightContext
;
3803 sub
= REGEXP_PAREN_SUBSTRING(res
, slot
);
3806 str
= js_NewStringCopyN(cx
, sub
->chars
, sub
->length
);
3809 *vp
= STRING_TO_JSVAL(str
);
3814 regexp_static_setProperty(JSContext
*cx
, JSObject
*obj
, jsval id
, jsval
*vp
)
3816 JSRegExpStatics
*res
;
3818 if (!JSVAL_IS_INT(id
))
3820 res
= &cx
->regExpStatics
;
3821 /* XXX use if-else rather than switch to keep MSVC1.52 from crashing */
3822 if (JSVAL_TO_INT(id
) == REGEXP_STATIC_INPUT
) {
3823 if (!JSVAL_IS_STRING(*vp
) &&
3824 !JS_ConvertValue(cx
, *vp
, JSTYPE_STRING
, vp
)) {
3827 res
->input
= JSVAL_TO_STRING(*vp
);
3828 } else if (JSVAL_TO_INT(id
) == REGEXP_STATIC_MULTILINE
) {
3829 if (!JSVAL_IS_BOOLEAN(*vp
) &&
3830 !JS_ConvertValue(cx
, *vp
, JSTYPE_BOOLEAN
, vp
)) {
3833 res
->multiline
= JSVAL_TO_BOOLEAN(*vp
);
3837 #define REGEXP_STATIC_PROP_ATTRS (REGEXP_PROP_ATTRS | JSPROP_ENUMERATE)
3838 #define RO_REGEXP_STATIC_PROP_ATTRS (REGEXP_STATIC_PROP_ATTRS | JSPROP_READONLY)
3840 static JSPropertySpec regexp_static_props
[] = {
3842 REGEXP_STATIC_INPUT
,
3843 REGEXP_STATIC_PROP_ATTRS
,
3844 regexp_static_getProperty
, regexp_static_setProperty
},
3846 REGEXP_STATIC_MULTILINE
,
3847 REGEXP_STATIC_PROP_ATTRS
,
3848 regexp_static_getProperty
, regexp_static_setProperty
},
3850 REGEXP_STATIC_LAST_MATCH
,
3851 RO_REGEXP_STATIC_PROP_ATTRS
,
3852 regexp_static_getProperty
, regexp_static_getProperty
},
3854 REGEXP_STATIC_LAST_PAREN
,
3855 RO_REGEXP_STATIC_PROP_ATTRS
,
3856 regexp_static_getProperty
, regexp_static_getProperty
},
3858 REGEXP_STATIC_LEFT_CONTEXT
,
3859 RO_REGEXP_STATIC_PROP_ATTRS
,
3860 regexp_static_getProperty
, regexp_static_getProperty
},
3862 REGEXP_STATIC_RIGHT_CONTEXT
,
3863 RO_REGEXP_STATIC_PROP_ATTRS
,
3864 regexp_static_getProperty
, regexp_static_getProperty
},
3866 /* XXX should have block scope and local $1, etc. */
3867 {"$1", 0, RO_REGEXP_STATIC_PROP_ATTRS
,
3868 regexp_static_getProperty
, regexp_static_getProperty
},
3869 {"$2", 1, RO_REGEXP_STATIC_PROP_ATTRS
,
3870 regexp_static_getProperty
, regexp_static_getProperty
},
3871 {"$3", 2, RO_REGEXP_STATIC_PROP_ATTRS
,
3872 regexp_static_getProperty
, regexp_static_getProperty
},
3873 {"$4", 3, RO_REGEXP_STATIC_PROP_ATTRS
,
3874 regexp_static_getProperty
, regexp_static_getProperty
},
3875 {"$5", 4, RO_REGEXP_STATIC_PROP_ATTRS
,
3876 regexp_static_getProperty
, regexp_static_getProperty
},
3877 {"$6", 5, RO_REGEXP_STATIC_PROP_ATTRS
,
3878 regexp_static_getProperty
, regexp_static_getProperty
},
3879 {"$7", 6, RO_REGEXP_STATIC_PROP_ATTRS
,
3880 regexp_static_getProperty
, regexp_static_getProperty
},
3881 {"$8", 7, RO_REGEXP_STATIC_PROP_ATTRS
,
3882 regexp_static_getProperty
, regexp_static_getProperty
},
3883 {"$9", 8, RO_REGEXP_STATIC_PROP_ATTRS
,
3884 regexp_static_getProperty
, regexp_static_getProperty
},
3890 regexp_finalize(JSContext
*cx
, JSObject
*obj
)
3894 re
= (JSRegExp
*) JS_GetPrivate(cx
, obj
);
3897 js_DestroyRegExp(cx
, re
);
3900 /* Forward static prototype. */
3902 regexp_exec_sub(JSContext
*cx
, JSObject
*obj
, uintN argc
, jsval
*argv
,
3903 JSBool test
, jsval
*rval
);
3906 regexp_call(JSContext
*cx
, JSObject
*obj
, uintN argc
, jsval
*argv
, jsval
*rval
)
3908 return regexp_exec_sub(cx
, JSVAL_TO_OBJECT(argv
[-2]), argc
, argv
,
3914 #include "jsxdrapi.h"
3917 regexp_xdrObject(JSXDRState
*xdr
, JSObject
**objp
)
3924 if (xdr
->mode
== JSXDR_ENCODE
) {
3925 re
= (JSRegExp
*) JS_GetPrivate(xdr
->cx
, *objp
);
3928 source
= re
->source
;
3929 flagsword
= (uint32
)re
->flags
;
3931 if (!JS_XDRString(xdr
, &source
) ||
3932 !JS_XDRUint32(xdr
, &flagsword
)) {
3935 if (xdr
->mode
== JSXDR_DECODE
) {
3936 obj
= js_NewObject(xdr
->cx
, &js_RegExpClass
, NULL
, NULL
, 0);
3939 STOBJ_CLEAR_PARENT(obj
);
3940 STOBJ_CLEAR_PROTO(obj
);
3941 re
= js_NewRegExp(xdr
->cx
, NULL
, source
, (uint8
)flagsword
, JS_FALSE
);
3944 if (!JS_SetPrivate(xdr
->cx
, obj
, re
) ||
3945 !js_SetLastIndex(xdr
->cx
, obj
, 0)) {
3946 js_DestroyRegExp(xdr
->cx
, re
);
3954 #else /* !JS_HAS_XDR */
3956 #define regexp_xdrObject NULL
3958 #endif /* !JS_HAS_XDR */
3961 regexp_trace(JSTracer
*trc
, JSObject
*obj
)
3965 re
= (JSRegExp
*) JS_GetPrivate(trc
->context
, obj
);
3966 if (re
&& re
->source
)
3967 JS_CALL_STRING_TRACER(trc
, re
->source
, "source");
3970 JSClass js_RegExpClass
= {
3972 JSCLASS_HAS_PRIVATE
| JSCLASS_HAS_RESERVED_SLOTS(1) |
3973 JSCLASS_MARK_IS_TRACE
| JSCLASS_HAS_CACHED_PROTO(JSProto_RegExp
),
3974 JS_PropertyStub
, JS_PropertyStub
,
3975 regexp_getProperty
, regexp_setProperty
,
3976 JS_EnumerateStub
, JS_ResolveStub
,
3977 JS_ConvertStub
, regexp_finalize
,
3980 regexp_xdrObject
, NULL
,
3981 JS_CLASS_TRACE(regexp_trace
), 0
3984 static const jschar empty_regexp_ucstr
[] = {'(', '?', ':', ')', 0};
3987 js_regexp_toString(JSContext
*cx
, JSObject
*obj
, jsval
*vp
)
3990 const jschar
*source
;
3992 size_t length
, nflags
;
3996 if (!JS_InstanceOf(cx
, obj
, &js_RegExpClass
, vp
+ 2))
3998 JS_LOCK_OBJ(cx
, obj
);
3999 re
= (JSRegExp
*) JS_GetPrivate(cx
, obj
);
4001 JS_UNLOCK_OBJ(cx
, obj
);
4002 *vp
= STRING_TO_JSVAL(cx
->runtime
->emptyString
);
4006 JSSTRING_CHARS_AND_LENGTH(re
->source
, source
, length
);
4008 source
= empty_regexp_ucstr
;
4009 length
= JS_ARRAY_LENGTH(empty_regexp_ucstr
) - 1;
4013 for (flags
= re
->flags
; flags
!= 0; flags
&= flags
- 1)
4015 chars
= (jschar
*) JS_malloc(cx
, (length
+ nflags
+ 1) * sizeof(jschar
));
4017 JS_UNLOCK_OBJ(cx
, obj
);
4022 js_strncpy(&chars
[1], source
, length
- 2);
4023 chars
[length
-1] = '/';
4025 if (re
->flags
& JSREG_GLOB
)
4026 chars
[length
++] = 'g';
4027 if (re
->flags
& JSREG_FOLD
)
4028 chars
[length
++] = 'i';
4029 if (re
->flags
& JSREG_MULTILINE
)
4030 chars
[length
++] = 'm';
4031 if (re
->flags
& JSREG_STICKY
)
4032 chars
[length
++] = 'y';
4034 JS_UNLOCK_OBJ(cx
, obj
);
4037 str
= js_NewString(cx
, chars
, length
);
4042 *vp
= STRING_TO_JSVAL(str
);
4047 regexp_toString(JSContext
*cx
, uintN argc
, jsval
*vp
)
4051 obj
= JS_THIS_OBJECT(cx
, vp
);
4052 return obj
&& js_regexp_toString(cx
, obj
, vp
);
4056 regexp_compile_sub(JSContext
*cx
, JSObject
*obj
, uintN argc
, jsval
*argv
,
4059 JSString
*opt
, *str
;
4060 JSRegExp
*oldre
, *re
;
4063 size_t length
, nbytes
;
4064 const jschar
*cp
, *start
, *end
;
4065 jschar
*nstart
, *ncp
, *tmp
;
4067 if (!JS_InstanceOf(cx
, obj
, &js_RegExpClass
, argv
))
4071 str
= cx
->runtime
->emptyString
;
4073 if (JSVAL_IS_OBJECT(argv
[0])) {
4075 * If we get passed in a RegExp object we construct a new
4076 * RegExp that is a duplicate of it by re-compiling the
4077 * original source code. ECMA requires that it be an error
4078 * here if the flags are specified. (We must use the flags
4079 * from the original RegExp also).
4081 obj2
= JSVAL_TO_OBJECT(argv
[0]);
4082 if (obj2
&& OBJ_GET_CLASS(cx
, obj2
) == &js_RegExpClass
) {
4083 if (argc
>= 2 && !JSVAL_IS_VOID(argv
[1])) { /* 'flags' passed */
4084 JS_ReportErrorNumber(cx
, js_GetErrorMessage
, NULL
,
4085 JSMSG_NEWREGEXP_FLAGGED
);
4088 JS_LOCK_OBJ(cx
, obj2
);
4089 re
= (JSRegExp
*) JS_GetPrivate(cx
, obj2
);
4091 JS_UNLOCK_OBJ(cx
, obj2
);
4094 re
= js_NewRegExp(cx
, NULL
, re
->source
, re
->flags
, JS_FALSE
);
4095 JS_UNLOCK_OBJ(cx
, obj2
);
4099 str
= js_ValueToString(cx
, argv
[0]);
4102 argv
[0] = STRING_TO_JSVAL(str
);
4104 if (JSVAL_IS_VOID(argv
[1])) {
4107 opt
= js_ValueToString(cx
, argv
[1]);
4110 argv
[1] = STRING_TO_JSVAL(opt
);
4114 /* Escape any naked slashes in the regexp source. */
4115 JSSTRING_CHARS_AND_LENGTH(str
, start
, length
);
4116 end
= start
+ length
;
4117 nstart
= ncp
= NULL
;
4118 for (cp
= start
; cp
< end
; cp
++) {
4119 if (*cp
== '/' && (cp
== start
|| cp
[-1] != '\\')) {
4120 nbytes
= (++length
+ 1) * sizeof(jschar
);
4122 nstart
= (jschar
*) JS_malloc(cx
, nbytes
);
4125 ncp
= nstart
+ (cp
- start
);
4126 js_strncpy(nstart
, start
, cp
- start
);
4128 tmp
= (jschar
*) JS_realloc(cx
, nstart
, nbytes
);
4130 JS_free(cx
, nstart
);
4133 ncp
= tmp
+ (ncp
- nstart
);
4143 /* Don't forget to store the backstop after the new string. */
4144 JS_ASSERT((size_t)(ncp
- nstart
) == length
);
4146 str
= js_NewString(cx
, nstart
, length
);
4148 JS_free(cx
, nstart
);
4151 argv
[0] = STRING_TO_JSVAL(str
);
4155 re
= js_NewRegExpOpt(cx
, str
, opt
, JS_FALSE
);
4159 JS_LOCK_OBJ(cx
, obj
);
4160 oldre
= (JSRegExp
*) JS_GetPrivate(cx
, obj
);
4161 ok
= JS_SetPrivate(cx
, obj
, re
);
4162 ok2
= js_SetLastIndex(cx
, obj
, 0);
4163 JS_UNLOCK_OBJ(cx
, obj
);
4165 js_DestroyRegExp(cx
, re
);
4169 js_DestroyRegExp(cx
, oldre
);
4170 *rval
= OBJECT_TO_JSVAL(obj
);
4175 regexp_compile(JSContext
*cx
, uintN argc
, jsval
*vp
)
4179 obj
= JS_THIS_OBJECT(cx
, vp
);
4180 return obj
&& regexp_compile_sub(cx
, obj
, argc
, vp
+ 2, vp
);
4184 regexp_exec_sub(JSContext
*cx
, JSObject
*obj
, uintN argc
, jsval
*argv
,
4185 JSBool test
, jsval
*rval
)
4193 ok
= JS_InstanceOf(cx
, obj
, &js_RegExpClass
, argv
);
4196 JS_LOCK_OBJ(cx
, obj
);
4197 re
= (JSRegExp
*) JS_GetPrivate(cx
, obj
);
4199 JS_UNLOCK_OBJ(cx
, obj
);
4203 /* NB: we must reach out: after this paragraph, in order to drop re. */
4204 HOLD_REGEXP(cx
, re
);
4205 sticky
= (re
->flags
& JSREG_STICKY
) != 0;
4206 if (re
->flags
& (JSREG_GLOB
| JSREG_STICKY
)) {
4207 ok
= js_GetLastIndex(cx
, obj
, &lastIndex
);
4211 JS_UNLOCK_OBJ(cx
, obj
);
4215 /* Now that obj is unlocked, it's safe to (potentially) grab the GC lock. */
4217 str
= cx
->regExpStatics
.input
;
4219 const char *bytes
= js_GetStringBytes(cx
, re
->source
);
4222 JS_ReportErrorNumber(cx
, js_GetErrorMessage
, NULL
,
4225 (re
->flags
& JSREG_GLOB
) ? "g" : "",
4226 (re
->flags
& JSREG_FOLD
) ? "i" : "",
4227 (re
->flags
& JSREG_MULTILINE
) ? "m" : "",
4228 (re
->flags
& JSREG_STICKY
) ? "y" : "");
4234 str
= js_ValueToString(cx
, argv
[0]);
4239 argv
[0] = STRING_TO_JSVAL(str
);
4242 if (lastIndex
< 0 || JSSTRING_LENGTH(str
) < lastIndex
) {
4243 ok
= js_SetLastIndex(cx
, obj
, 0);
4246 i
= (size_t) lastIndex
;
4247 ok
= js_ExecuteRegExp(cx
, re
, str
, &i
, test
, rval
);
4249 ((re
->flags
& JSREG_GLOB
) || (*rval
!= JSVAL_NULL
&& sticky
))) {
4250 ok
= js_SetLastIndex(cx
, obj
, (*rval
== JSVAL_NULL
) ? 0 : i
);
4255 DROP_REGEXP(cx
, re
);
4260 regexp_exec(JSContext
*cx
, uintN argc
, jsval
*vp
)
4262 return regexp_exec_sub(cx
, JS_THIS_OBJECT(cx
, vp
), argc
, vp
+ 2, JS_FALSE
,
4267 regexp_test(JSContext
*cx
, uintN argc
, jsval
*vp
)
4269 if (!regexp_exec_sub(cx
, JS_THIS_OBJECT(cx
, vp
), argc
, vp
+ 2, JS_TRUE
, vp
))
4271 if (*vp
!= JSVAL_TRUE
)
4276 static JSFunctionSpec regexp_methods
[] = {
4278 JS_FN(js_toSource_str
, regexp_toString
, 0,0),
4280 JS_FN(js_toString_str
, regexp_toString
, 0,0),
4281 JS_FN("compile", regexp_compile
, 2,0),
4282 JS_FN("exec", regexp_exec
, 1,0),
4283 JS_FN("test", regexp_test
, 1,0),
4288 RegExp(JSContext
*cx
, JSObject
*obj
, uintN argc
, jsval
*argv
, jsval
*rval
)
4290 if (!(cx
->fp
->flags
& JSFRAME_CONSTRUCTING
)) {
4292 * If first arg is regexp and no flags are given, just return the arg.
4293 * (regexp_compile_sub detects the regexp + flags case and throws a
4294 * TypeError.) See 10.15.3.1.
4296 if ((argc
< 2 || JSVAL_IS_VOID(argv
[1])) &&
4297 !JSVAL_IS_PRIMITIVE(argv
[0]) &&
4298 OBJ_GET_CLASS(cx
, JSVAL_TO_OBJECT(argv
[0])) == &js_RegExpClass
) {
4303 /* Otherwise, replace obj with a new RegExp object. */
4304 obj
= js_NewObject(cx
, &js_RegExpClass
, NULL
, NULL
, 0);
4309 * regexp_compile_sub does not use rval to root its temporaries so we
4310 * can use it to root obj.
4312 *rval
= OBJECT_TO_JSVAL(obj
);
4314 return regexp_compile_sub(cx
, obj
, argc
, argv
, rval
);
4318 js_InitRegExpClass(JSContext
*cx
, JSObject
*obj
)
4320 JSObject
*proto
, *ctor
;
4323 proto
= JS_InitClass(cx
, obj
, NULL
, &js_RegExpClass
, RegExp
, 1,
4324 regexp_props
, regexp_methods
,
4325 regexp_static_props
, NULL
);
4327 if (!proto
|| !(ctor
= JS_GetConstructor(cx
, proto
)))
4329 if (!JS_AliasProperty(cx
, ctor
, "input", "$_") ||
4330 !JS_AliasProperty(cx
, ctor
, "multiline", "$*") ||
4331 !JS_AliasProperty(cx
, ctor
, "lastMatch", "$&") ||
4332 !JS_AliasProperty(cx
, ctor
, "lastParen", "$+") ||
4333 !JS_AliasProperty(cx
, ctor
, "leftContext", "$`") ||
4334 !JS_AliasProperty(cx
, ctor
, "rightContext", "$'")) {
4338 /* Give RegExp.prototype private data so it matches the empty string. */
4339 if (!regexp_compile_sub(cx
, proto
, 0, NULL
, &rval
))
4344 JS_DeleteProperty(cx
, obj
, js_RegExpClass
.name
);
4349 js_NewRegExpObject(JSContext
*cx
, JSTokenStream
*ts
,
4350 jschar
*chars
, size_t length
, uintN flags
)
4355 JSTempValueRooter tvr
;
4357 str
= js_NewStringCopyN(cx
, chars
, length
);
4360 re
= js_NewRegExp(cx
, ts
, str
, flags
, JS_FALSE
);
4363 JS_PUSH_TEMP_ROOT_STRING(cx
, str
, &tvr
);
4364 obj
= js_NewObject(cx
, &js_RegExpClass
, NULL
, NULL
, 0);
4365 if (!obj
|| !JS_SetPrivate(cx
, obj
, re
)) {
4366 js_DestroyRegExp(cx
, re
);
4369 if (obj
&& !js_SetLastIndex(cx
, obj
, 0))
4371 JS_POP_TEMP_ROOT(cx
, &tvr
);
4376 js_CloneRegExpObject(JSContext
*cx
, JSObject
*obj
, JSObject
*parent
)
4381 JS_ASSERT(OBJ_GET_CLASS(cx
, obj
) == &js_RegExpClass
);
4382 clone
= js_NewObject(cx
, &js_RegExpClass
, NULL
, parent
, 0);
4385 re
= (JSRegExp
*) JS_GetPrivate(cx
, obj
);
4386 if (!JS_SetPrivate(cx
, clone
, re
) || !js_SetLastIndex(cx
, clone
, 0)) {
4387 cx
->weakRoots
.newborn
[GCX_OBJECT
] = NULL
;
4390 HOLD_REGEXP(cx
, re
);
4395 js_GetLastIndex(JSContext
*cx
, JSObject
*obj
, jsdouble
*lastIndex
)
4399 return JS_GetReservedSlot(cx
, obj
, 0, &v
) &&
4400 JS_ValueToNumber(cx
, v
, lastIndex
);
4404 js_SetLastIndex(JSContext
*cx
, JSObject
*obj
, jsdouble lastIndex
)
4408 return JS_NewNumberValue(cx
, lastIndex
, &v
) &&
4409 JS_SetReservedSlot(cx
, obj
, 0, v
);