Bug 449522 - Context menu for HTML5 <video> elements. r=gavin, ui-r=boriss
[wine-gecko.git] / js / src / jsregexp.cpp
blobf12e3ce81e14f6c0ee5fb9c59856f4f57bfc0354
1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set sw=4 ts=8 et tw=78:
4 * ***** BEGIN LICENSE BLOCK *****
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
7 * The contents of this file are subject to the Mozilla Public License Version
8 * 1.1 (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 * for the specific language governing rights and limitations under the
15 * License.
17 * The Original Code is Mozilla Communicator client code, released
18 * March 31, 1998.
20 * The Initial Developer of the Original Code is
21 * Netscape Communications Corporation.
22 * Portions created by the Initial Developer are Copyright (C) 1998
23 * the Initial Developer. All Rights Reserved.
25 * Contributor(s):
27 * Alternatively, the contents of this file may be used under the terms of
28 * either of the GNU General Public License Version 2 or later (the "GPL"),
29 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
39 * ***** END LICENSE BLOCK ***** */
42 * JS regular expressions, after Perl.
44 #include "jsstddef.h"
45 #include <stdlib.h>
46 #include <string.h>
47 #include <stdarg.h>
48 #include "jstypes.h"
49 #include "jsarena.h" /* Added by JSIFY */
50 #include "jsutil.h" /* Added by JSIFY */
51 #include "jsapi.h"
52 #include "jsarray.h"
53 #include "jsatom.h"
54 #include "jscntxt.h"
55 #include "jsversion.h"
56 #include "jsfun.h"
57 #include "jsgc.h"
58 #include "jsinterp.h"
59 #include "jslock.h"
60 #include "jsnum.h"
61 #include "jsobj.h"
62 #include "jsopcode.h"
63 #include "jsregexp.h"
64 #include "jsscan.h"
65 #include "jsscope.h"
66 #include "jsstr.h"
68 typedef enum REOp {
69 #define REOP_DEF(opcode, name) opcode,
70 #include "jsreops.tbl"
71 #undef REOP_DEF
72 REOP_LIMIT /* META: no operator >= to this */
73 } REOp;
75 #define REOP_IS_SIMPLE(op) ((op) <= REOP_NCLASS)
77 #ifdef REGEXP_DEBUG
78 const char *reop_names[] = {
79 #define REOP_DEF(opcode, name) name,
80 #include "jsreops.tbl"
81 #undef REOP_DEF
82 NULL
84 #endif
86 #ifdef __GNUC__
87 static int
88 re_debug(const char *fmt, ...) __attribute__ ((format(printf, 1, 2)));
89 #endif
91 #ifdef REGEXP_DEBUG
92 static int
93 re_debug(const char *fmt, ...)
95 va_list ap;
96 int retval;
98 va_start(ap, fmt);
99 retval = vprintf(fmt, ap);
100 va_end(ap);
101 return retval;
104 static void
105 re_debug_chars(const jschar *chrs, size_t length)
107 int i = 0;
109 printf(" \"");
110 while (*chrs && i++ < length) {
111 putchar((char)*chrs++);
113 printf("\"");
115 #else /* !REGEXP_DEBUG */
116 /* This should be optimized to a no-op by our tier-1 compilers. */
117 static int
118 re_debug(const char *fmt, ...)
120 return 0;
123 static void
124 re_debug_chars(const jschar *chrs, size_t length)
127 #endif /* !REGEXP_DEBUG */
129 struct RENode {
130 REOp op; /* r.e. op bytecode */
131 RENode *next; /* next in concatenation order */
132 void *kid; /* first operand */
133 union {
134 void *kid2; /* second operand */
135 jsint num; /* could be a number */
136 size_t parenIndex; /* or a parenthesis index */
137 struct { /* or a quantifier range */
138 uintN min;
139 uintN max;
140 JSPackedBool greedy;
141 } range;
142 struct { /* or a character class */
143 size_t startIndex;
144 size_t kidlen; /* length of string at kid, in jschars */
145 size_t index; /* index into class list */
146 uint16 bmsize; /* bitmap size, based on max char code */
147 JSPackedBool sense;
148 } ucclass;
149 struct { /* or a literal sequence */
150 jschar chr; /* of one character */
151 size_t length; /* or many (via the kid) */
152 } flat;
153 struct {
154 RENode *kid2; /* second operand from ALT */
155 jschar ch1; /* match char for ALTPREREQ */
156 jschar ch2; /* ditto, or class index for ALTPREREQ2 */
157 } altprereq;
158 } u;
161 #define RE_IS_LETTER(c) (((c >= 'A') && (c <= 'Z')) || \
162 ((c >= 'a') && (c <= 'z')) )
163 #define RE_IS_LINE_TERM(c) ((c == '\n') || (c == '\r') || \
164 (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
166 #define CLASS_CACHE_SIZE 4
168 typedef struct CompilerState {
169 JSContext *context;
170 JSTokenStream *tokenStream; /* For reporting errors */
171 const jschar *cpbegin;
172 const jschar *cpend;
173 const jschar *cp;
174 size_t parenCount;
175 size_t classCount; /* number of [] encountered */
176 size_t treeDepth; /* maximum depth of parse tree */
177 size_t progLength; /* estimated bytecode length */
178 RENode *result;
179 size_t classBitmapsMem; /* memory to hold all class bitmaps */
180 struct {
181 const jschar *start; /* small cache of class strings */
182 size_t length; /* since they're often the same */
183 size_t index;
184 } classCache[CLASS_CACHE_SIZE];
185 uint16 flags;
186 } CompilerState;
188 typedef struct EmitStateStackEntry {
189 jsbytecode *altHead; /* start of REOP_ALT* opcode */
190 jsbytecode *nextAltFixup; /* fixup pointer to next-alt offset */
191 jsbytecode *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */
192 jsbytecode *endTermFixup; /* fixup ptr. to REOPT_ALTPREREQ* offset */
193 RENode *continueNode; /* original REOP_ALT* node being stacked */
194 jsbytecode continueOp; /* REOP_JUMP or REOP_ENDALT continuation */
195 JSPackedBool jumpToJumpFlag; /* true if we've patched jump-to-jump to
196 avoid 16-bit unsigned offset overflow */
197 } EmitStateStackEntry;
200 * Immediate operand sizes and getter/setters. Unlike the ones in jsopcode.h,
201 * the getters and setters take the pc of the offset, not of the opcode before
202 * the offset.
204 #define ARG_LEN 2
205 #define GET_ARG(pc) ((uint16)(((pc)[0] << 8) | (pc)[1]))
206 #define SET_ARG(pc, arg) ((pc)[0] = (jsbytecode) ((arg) >> 8), \
207 (pc)[1] = (jsbytecode) (arg))
209 #define OFFSET_LEN ARG_LEN
210 #define OFFSET_MAX (JS_BIT(ARG_LEN * 8) - 1)
211 #define GET_OFFSET(pc) GET_ARG(pc)
214 * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
215 * For sanity, we limit it to 2^24 bytes.
217 #define TREE_DEPTH_MAX (JS_BIT(24) / sizeof(EmitStateStackEntry))
220 * The maximum memory that can be allocated for class bitmaps.
221 * For sanity, we limit it to 2^24 bytes.
223 #define CLASS_BITMAPS_MEM_LIMIT JS_BIT(24)
226 * Functions to get size and write/read bytecode that represent small indexes
227 * compactly.
228 * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
229 * indicates that the following byte brings more bits to the index. Otherwise
230 * this is the last byte in the index bytecode representing highest index bits.
232 static size_t
233 GetCompactIndexWidth(size_t index)
235 size_t width;
237 for (width = 1; (index >>= 7) != 0; ++width) { }
238 return width;
241 static JS_ALWAYS_INLINE jsbytecode *
242 WriteCompactIndex(jsbytecode *pc, size_t index)
244 size_t next;
246 while ((next = index >> 7) != 0) {
247 *pc++ = (jsbytecode)(index | 0x80);
248 index = next;
250 *pc++ = (jsbytecode)index;
251 return pc;
254 static JS_ALWAYS_INLINE jsbytecode *
255 ReadCompactIndex(jsbytecode *pc, size_t *result)
257 size_t nextByte;
259 nextByte = *pc++;
260 if ((nextByte & 0x80) == 0) {
262 * Short-circuit the most common case when compact index <= 127.
264 *result = nextByte;
265 } else {
266 size_t shift = 7;
267 *result = 0x7F & nextByte;
268 do {
269 nextByte = *pc++;
270 *result |= (nextByte & 0x7F) << shift;
271 shift += 7;
272 } while ((nextByte & 0x80) != 0);
274 return pc;
277 typedef struct RECapture {
278 ptrdiff_t index; /* start of contents, -1 for empty */
279 size_t length; /* length of capture */
280 } RECapture;
282 typedef struct REMatchState {
283 const jschar *cp;
284 RECapture parens[1]; /* first of 're->parenCount' captures,
285 allocated at end of this struct */
286 } REMatchState;
288 struct REBackTrackData;
290 typedef struct REProgState {
291 jsbytecode *continue_pc; /* current continuation data */
292 jsbytecode continue_op;
293 ptrdiff_t index; /* progress in text */
294 size_t parenSoFar; /* highest indexed paren started */
295 union {
296 struct {
297 uintN min; /* current quantifier limits */
298 uintN max;
299 } quantifier;
300 struct {
301 size_t top; /* backtrack stack state */
302 size_t sz;
303 } assertion;
304 } u;
305 } REProgState;
307 typedef struct REBackTrackData {
308 size_t sz; /* size of previous stack entry */
309 jsbytecode *backtrack_pc; /* where to backtrack to */
310 jsbytecode backtrack_op;
311 const jschar *cp; /* index in text of match at backtrack */
312 size_t parenIndex; /* start index of saved paren contents */
313 size_t parenCount; /* # of saved paren contents */
314 size_t saveStateStackTop; /* number of parent states */
315 /* saved parent states follow */
316 /* saved paren contents follow */
317 } REBackTrackData;
319 #define INITIAL_STATESTACK 100
320 #define INITIAL_BACKTRACK 8000
322 typedef struct REGlobalData {
323 JSContext *cx;
324 JSRegExp *regexp; /* the RE in execution */
325 JSBool ok; /* runtime error (out_of_memory only?) */
326 size_t start; /* offset to start at */
327 ptrdiff_t skipped; /* chars skipped anchoring this r.e. */
328 const jschar *cpbegin; /* text base address */
329 const jschar *cpend; /* text limit address */
331 REProgState *stateStack; /* stack of state of current parents */
332 size_t stateStackTop;
333 size_t stateStackLimit;
335 REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
336 REBackTrackData *backTrackSP;
337 size_t backTrackStackSize;
338 size_t cursz; /* size of current stack entry */
339 size_t backTrackCount; /* how many times we've backtracked */
340 size_t backTrackLimit; /* upper limit on backtrack states */
341 } REGlobalData;
344 * 1. If IgnoreCase is false, return ch.
345 * 2. Let u be ch converted to upper case as if by calling
346 * String.prototype.toUpperCase on the one-character string ch.
347 * 3. If u does not consist of a single character, return ch.
348 * 4. Let cu be u's character.
349 * 5. If ch's code point value is greater than or equal to decimal 128 and cu's
350 * code point value is less than decimal 128, then return ch.
351 * 6. Return cu.
353 static JS_ALWAYS_INLINE uintN
354 upcase(uintN ch)
356 uintN cu;
358 JS_ASSERT((uintN) (jschar) ch == ch);
359 if (ch < 128) {
360 if (ch - (uintN) 'a' <= (uintN) ('z' - 'a'))
361 ch -= (uintN) ('a' - 'A');
362 return ch;
365 cu = JS_TOUPPER(ch);
366 return (cu < 128) ? ch : cu;
369 static JS_ALWAYS_INLINE uintN
370 downcase(uintN ch)
372 JS_ASSERT((uintN) (jschar) ch == ch);
373 if (ch < 128) {
374 if (ch - (uintN) 'A' <= (uintN) ('Z' - 'A'))
375 ch += (uintN) ('a' - 'A');
376 return ch;
379 return JS_TOLOWER(ch);
382 /* Construct and initialize an RENode, returning NULL for out-of-memory */
383 static RENode *
384 NewRENode(CompilerState *state, REOp op)
386 JSContext *cx;
387 RENode *ren;
389 cx = state->context;
390 JS_ARENA_ALLOCATE_CAST(ren, RENode *, &cx->tempPool, sizeof *ren);
391 if (!ren) {
392 js_ReportOutOfScriptQuota(cx);
393 return NULL;
395 ren->op = op;
396 ren->next = NULL;
397 ren->kid = NULL;
398 return ren;
402 * Validates and converts hex ascii value.
404 static JSBool
405 isASCIIHexDigit(jschar c, uintN *digit)
407 uintN cv = c;
409 if (cv < '0')
410 return JS_FALSE;
411 if (cv <= '9') {
412 *digit = cv - '0';
413 return JS_TRUE;
415 cv |= 0x20;
416 if (cv >= 'a' && cv <= 'f') {
417 *digit = cv - 'a' + 10;
418 return JS_TRUE;
420 return JS_FALSE;
424 typedef struct {
425 REOp op;
426 const jschar *errPos;
427 size_t parenIndex;
428 } REOpData;
430 static JSBool
431 ReportRegExpErrorHelper(CompilerState *state, uintN flags, uintN errorNumber,
432 const jschar *arg)
434 if (state->tokenStream) {
435 return js_ReportCompileErrorNumber(state->context, state->tokenStream,
436 NULL, JSREPORT_UC | flags,
437 errorNumber, arg);
439 return JS_ReportErrorFlagsAndNumberUC(state->context, flags,
440 js_GetErrorMessage, NULL,
441 errorNumber, arg);
444 static JSBool
445 ReportRegExpError(CompilerState *state, uintN flags, uintN errorNumber)
447 return ReportRegExpErrorHelper(state, flags, errorNumber, NULL);
451 * Process the op against the two top operands, reducing them to a single
452 * operand in the penultimate slot. Update progLength and treeDepth.
454 static JSBool
455 ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack,
456 intN operandSP)
458 RENode *result;
460 switch (opData->op) {
461 case REOP_ALT:
462 result = NewRENode(state, REOP_ALT);
463 if (!result)
464 return JS_FALSE;
465 result->kid = operandStack[operandSP - 2];
466 result->u.kid2 = operandStack[operandSP - 1];
467 operandStack[operandSP - 2] = result;
469 if (state->treeDepth == TREE_DEPTH_MAX) {
470 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
471 return JS_FALSE;
473 ++state->treeDepth;
476 * Look at both alternates to see if there's a FLAT or a CLASS at
477 * the start of each. If so, use a prerequisite match.
479 if (((RENode *) result->kid)->op == REOP_FLAT &&
480 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
481 (state->flags & JSREG_FOLD) == 0) {
482 result->op = REOP_ALTPREREQ;
483 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
484 result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
485 /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
486 JUMP, <end> ... ENDALT */
487 state->progLength += 13;
489 else
490 if (((RENode *) result->kid)->op == REOP_CLASS &&
491 ((RENode *) result->kid)->u.ucclass.index < 256 &&
492 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
493 (state->flags & JSREG_FOLD) == 0) {
494 result->op = REOP_ALTPREREQ2;
495 result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
496 result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index;
497 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
498 JUMP, <end> ... ENDALT */
499 state->progLength += 13;
501 else
502 if (((RENode *) result->kid)->op == REOP_FLAT &&
503 ((RENode *) result->u.kid2)->op == REOP_CLASS &&
504 ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
505 (state->flags & JSREG_FOLD) == 0) {
506 result->op = REOP_ALTPREREQ2;
507 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
508 result->u.altprereq.ch2 =
509 ((RENode *) result->u.kid2)->u.ucclass.index;
510 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
511 JUMP, <end> ... ENDALT */
512 state->progLength += 13;
514 else {
515 /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
516 state->progLength += 7;
518 break;
520 case REOP_CONCAT:
521 result = operandStack[operandSP - 2];
522 while (result->next)
523 result = result->next;
524 result->next = operandStack[operandSP - 1];
525 break;
527 case REOP_ASSERT:
528 case REOP_ASSERT_NOT:
529 case REOP_LPARENNON:
530 case REOP_LPAREN:
531 /* These should have been processed by a close paren. */
532 ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_MISSING_PAREN,
533 opData->errPos);
534 return JS_FALSE;
536 default:;
538 return JS_TRUE;
542 * Parser forward declarations.
544 static JSBool ParseTerm(CompilerState *state);
545 static JSBool ParseQuantifier(CompilerState *state);
546 static intN ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues);
549 * Top-down regular expression grammar, based closely on Perl4.
551 * regexp: altern A regular expression is one or more
552 * altern '|' regexp alternatives separated by vertical bar.
554 #define INITIAL_STACK_SIZE 128
556 static JSBool
557 ParseRegExp(CompilerState *state)
559 size_t parenIndex;
560 RENode *operand;
561 REOpData *operatorStack;
562 RENode **operandStack;
563 REOp op;
564 intN i;
565 JSBool result = JS_FALSE;
567 intN operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
568 intN operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
570 /* Watch out for empty regexp */
571 if (state->cp == state->cpend) {
572 state->result = NewRENode(state, REOP_EMPTY);
573 return (state->result != NULL);
576 operatorStack = (REOpData *)
577 JS_malloc(state->context, sizeof(REOpData) * operatorStackSize);
578 if (!operatorStack)
579 return JS_FALSE;
581 operandStack = (RENode **)
582 JS_malloc(state->context, sizeof(RENode *) * operandStackSize);
583 if (!operandStack)
584 goto out;
586 for (;;) {
587 parenIndex = state->parenCount;
588 if (state->cp == state->cpend) {
590 * If we are at the end of the regexp and we're short one or more
591 * operands, the regexp must have the form /x|/ or some such, with
592 * left parentheses making us short more than one operand.
594 if (operatorSP >= operandSP) {
595 operand = NewRENode(state, REOP_EMPTY);
596 if (!operand)
597 goto out;
598 goto pushOperand;
600 } else {
601 switch (*state->cp) {
602 case '(':
603 ++state->cp;
604 if (state->cp + 1 < state->cpend &&
605 *state->cp == '?' &&
606 (state->cp[1] == '=' ||
607 state->cp[1] == '!' ||
608 state->cp[1] == ':')) {
609 switch (state->cp[1]) {
610 case '=':
611 op = REOP_ASSERT;
612 /* ASSERT, <next>, ... ASSERTTEST */
613 state->progLength += 4;
614 break;
615 case '!':
616 op = REOP_ASSERT_NOT;
617 /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
618 state->progLength += 4;
619 break;
620 default:
621 op = REOP_LPARENNON;
622 break;
624 state->cp += 2;
625 } else {
626 op = REOP_LPAREN;
627 /* LPAREN, <index>, ... RPAREN, <index> */
628 state->progLength
629 += 2 * (1 + GetCompactIndexWidth(parenIndex));
630 state->parenCount++;
631 if (state->parenCount == 65535) {
632 ReportRegExpError(state, JSREPORT_ERROR,
633 JSMSG_TOO_MANY_PARENS);
634 goto out;
637 goto pushOperator;
639 case ')':
641 * If there's no stacked open parenthesis, throw syntax error.
643 for (i = operatorSP - 1; ; i--) {
644 if (i < 0) {
645 ReportRegExpError(state, JSREPORT_ERROR,
646 JSMSG_UNMATCHED_RIGHT_PAREN);
647 goto out;
649 if (operatorStack[i].op == REOP_ASSERT ||
650 operatorStack[i].op == REOP_ASSERT_NOT ||
651 operatorStack[i].op == REOP_LPARENNON ||
652 operatorStack[i].op == REOP_LPAREN) {
653 break;
656 /* FALL THROUGH */
658 case '|':
659 /* Expected an operand before these, so make an empty one */
660 operand = NewRENode(state, REOP_EMPTY);
661 if (!operand)
662 goto out;
663 goto pushOperand;
665 default:
666 if (!ParseTerm(state))
667 goto out;
668 operand = state->result;
669 pushOperand:
670 if (operandSP == operandStackSize) {
671 RENode **tmp;
672 operandStackSize += operandStackSize;
673 tmp = (RENode **)
674 JS_realloc(state->context, operandStack,
675 sizeof(RENode *) * operandStackSize);
676 if (!tmp)
677 goto out;
678 operandStack = tmp;
680 operandStack[operandSP++] = operand;
681 break;
685 /* At the end; process remaining operators. */
686 restartOperator:
687 if (state->cp == state->cpend) {
688 while (operatorSP) {
689 --operatorSP;
690 if (!ProcessOp(state, &operatorStack[operatorSP],
691 operandStack, operandSP))
692 goto out;
693 --operandSP;
695 JS_ASSERT(operandSP == 1);
696 state->result = operandStack[0];
697 result = JS_TRUE;
698 goto out;
701 switch (*state->cp) {
702 case '|':
703 /* Process any stacked 'concat' operators */
704 ++state->cp;
705 while (operatorSP &&
706 operatorStack[operatorSP - 1].op == REOP_CONCAT) {
707 --operatorSP;
708 if (!ProcessOp(state, &operatorStack[operatorSP],
709 operandStack, operandSP)) {
710 goto out;
712 --operandSP;
714 op = REOP_ALT;
715 goto pushOperator;
717 case ')':
719 * If there's no stacked open parenthesis, throw syntax error.
721 for (i = operatorSP - 1; ; i--) {
722 if (i < 0) {
723 ReportRegExpError(state, JSREPORT_ERROR,
724 JSMSG_UNMATCHED_RIGHT_PAREN);
725 goto out;
727 if (operatorStack[i].op == REOP_ASSERT ||
728 operatorStack[i].op == REOP_ASSERT_NOT ||
729 operatorStack[i].op == REOP_LPARENNON ||
730 operatorStack[i].op == REOP_LPAREN) {
731 break;
734 ++state->cp;
736 /* Process everything on the stack until the open parenthesis. */
737 for (;;) {
738 JS_ASSERT(operatorSP);
739 --operatorSP;
740 switch (operatorStack[operatorSP].op) {
741 case REOP_ASSERT:
742 case REOP_ASSERT_NOT:
743 case REOP_LPAREN:
744 operand = NewRENode(state, operatorStack[operatorSP].op);
745 if (!operand)
746 goto out;
747 operand->u.parenIndex =
748 operatorStack[operatorSP].parenIndex;
749 JS_ASSERT(operandSP);
750 operand->kid = operandStack[operandSP - 1];
751 operandStack[operandSP - 1] = operand;
752 if (state->treeDepth == TREE_DEPTH_MAX) {
753 ReportRegExpError(state, JSREPORT_ERROR,
754 JSMSG_REGEXP_TOO_COMPLEX);
755 goto out;
757 ++state->treeDepth;
758 /* FALL THROUGH */
760 case REOP_LPARENNON:
761 state->result = operandStack[operandSP - 1];
762 if (!ParseQuantifier(state))
763 goto out;
764 operandStack[operandSP - 1] = state->result;
765 goto restartOperator;
766 default:
767 if (!ProcessOp(state, &operatorStack[operatorSP],
768 operandStack, operandSP))
769 goto out;
770 --operandSP;
771 break;
774 break;
776 case '{':
778 const jschar *errp = state->cp;
780 if (ParseMinMaxQuantifier(state, JS_TRUE) < 0) {
782 * This didn't even scan correctly as a quantifier, so we should
783 * treat it as flat.
785 op = REOP_CONCAT;
786 goto pushOperator;
789 state->cp = errp;
790 /* FALL THROUGH */
793 case '+':
794 case '*':
795 case '?':
796 ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_BAD_QUANTIFIER,
797 state->cp);
798 result = JS_FALSE;
799 goto out;
801 default:
802 /* Anything else is the start of the next term. */
803 op = REOP_CONCAT;
804 pushOperator:
805 if (operatorSP == operatorStackSize) {
806 REOpData *tmp;
807 operatorStackSize += operatorStackSize;
808 tmp = (REOpData *)
809 JS_realloc(state->context, operatorStack,
810 sizeof(REOpData) * operatorStackSize);
811 if (!tmp)
812 goto out;
813 operatorStack = tmp;
815 operatorStack[operatorSP].op = op;
816 operatorStack[operatorSP].errPos = state->cp;
817 operatorStack[operatorSP++].parenIndex = parenIndex;
818 break;
821 out:
822 if (operatorStack)
823 JS_free(state->context, operatorStack);
824 if (operandStack)
825 JS_free(state->context, operandStack);
826 return result;
830 * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
831 * its being on the stack, and to propagate errors to its callers.
833 #define JSREG_FIND_PAREN_COUNT 0x8000
834 #define JSREG_FIND_PAREN_ERROR 0x4000
837 * Magic return value from FindParenCount and GetDecimalValue, to indicate
838 * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
839 * its findMax parameter is non-null.
841 #define OVERFLOW_VALUE ((uintN)-1)
843 static uintN
844 FindParenCount(CompilerState *state)
846 CompilerState temp;
847 int i;
849 if (state->flags & JSREG_FIND_PAREN_COUNT)
850 return OVERFLOW_VALUE;
853 * Copy state into temp, flag it so we never report an invalid backref,
854 * and reset its members to parse the entire regexp. This is obviously
855 * suboptimal, but GetDecimalValue calls us only if a backref appears to
856 * refer to a forward parenthetical, which is rare.
858 temp = *state;
859 temp.flags |= JSREG_FIND_PAREN_COUNT;
860 temp.cp = temp.cpbegin;
861 temp.parenCount = 0;
862 temp.classCount = 0;
863 temp.progLength = 0;
864 temp.treeDepth = 0;
865 temp.classBitmapsMem = 0;
866 for (i = 0; i < CLASS_CACHE_SIZE; i++)
867 temp.classCache[i].start = NULL;
869 if (!ParseRegExp(&temp)) {
870 state->flags |= JSREG_FIND_PAREN_ERROR;
871 return OVERFLOW_VALUE;
873 return temp.parenCount;
877 * Extract and return a decimal value at state->cp. The initial character c
878 * has already been read. Return OVERFLOW_VALUE if the result exceeds max.
879 * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
880 * state->flags to discover whether an error occurred under findMax.
882 static uintN
883 GetDecimalValue(jschar c, uintN max, uintN (*findMax)(CompilerState *state),
884 CompilerState *state)
886 uintN value = JS7_UNDEC(c);
887 JSBool overflow = (value > max && (!findMax || value > findMax(state)));
889 /* The following restriction allows simpler overflow checks. */
890 JS_ASSERT(max <= ((uintN)-1 - 9) / 10);
891 while (state->cp < state->cpend) {
892 c = *state->cp;
893 if (!JS7_ISDEC(c))
894 break;
895 value = 10 * value + JS7_UNDEC(c);
896 if (!overflow && value > max && (!findMax || value > findMax(state)))
897 overflow = JS_TRUE;
898 ++state->cp;
900 return overflow ? OVERFLOW_VALUE : value;
904 * Calculate the total size of the bitmap required for a class expression.
906 static JSBool
907 CalculateBitmapSize(CompilerState *state, RENode *target, const jschar *src,
908 const jschar *end)
910 uintN max = 0;
911 JSBool inRange = JS_FALSE;
912 jschar c, rangeStart = 0;
913 uintN n, digit, nDigits, i;
915 target->u.ucclass.bmsize = 0;
916 target->u.ucclass.sense = JS_TRUE;
918 if (src == end)
919 return JS_TRUE;
921 if (*src == '^') {
922 ++src;
923 target->u.ucclass.sense = JS_FALSE;
926 while (src != end) {
927 JSBool canStartRange = JS_TRUE;
928 uintN localMax = 0;
930 switch (*src) {
931 case '\\':
932 ++src;
933 c = *src++;
934 switch (c) {
935 case 'b':
936 localMax = 0x8;
937 break;
938 case 'f':
939 localMax = 0xC;
940 break;
941 case 'n':
942 localMax = 0xA;
943 break;
944 case 'r':
945 localMax = 0xD;
946 break;
947 case 't':
948 localMax = 0x9;
949 break;
950 case 'v':
951 localMax = 0xB;
952 break;
953 case 'c':
954 if (src < end && RE_IS_LETTER(*src)) {
955 localMax = (uintN) (*src++) & 0x1F;
956 } else {
957 --src;
958 localMax = '\\';
960 break;
961 case 'x':
962 nDigits = 2;
963 goto lexHex;
964 case 'u':
965 nDigits = 4;
966 lexHex:
967 n = 0;
968 for (i = 0; (i < nDigits) && (src < end); i++) {
969 c = *src++;
970 if (!isASCIIHexDigit(c, &digit)) {
972 * Back off to accepting the original
973 *'\' as a literal.
975 src -= i + 1;
976 n = '\\';
977 break;
979 n = (n << 4) | digit;
981 localMax = n;
982 break;
983 case 'd':
984 canStartRange = JS_FALSE;
985 if (inRange) {
986 JS_ReportErrorNumber(state->context,
987 js_GetErrorMessage, NULL,
988 JSMSG_BAD_CLASS_RANGE);
989 return JS_FALSE;
991 localMax = '9';
992 break;
993 case 'D':
994 case 's':
995 case 'S':
996 case 'w':
997 case 'W':
998 canStartRange = JS_FALSE;
999 if (inRange) {
1000 JS_ReportErrorNumber(state->context,
1001 js_GetErrorMessage, NULL,
1002 JSMSG_BAD_CLASS_RANGE);
1003 return JS_FALSE;
1005 max = 65535;
1008 * If this is the start of a range, ensure that it's less than
1009 * the end.
1011 localMax = 0;
1012 break;
1013 case '0':
1014 case '1':
1015 case '2':
1016 case '3':
1017 case '4':
1018 case '5':
1019 case '6':
1020 case '7':
1022 * This is a non-ECMA extension - decimal escapes (in this
1023 * case, octal!) are supposed to be an error inside class
1024 * ranges, but supported here for backwards compatibility.
1027 n = JS7_UNDEC(c);
1028 c = *src;
1029 if ('0' <= c && c <= '7') {
1030 src++;
1031 n = 8 * n + JS7_UNDEC(c);
1032 c = *src;
1033 if ('0' <= c && c <= '7') {
1034 src++;
1035 i = 8 * n + JS7_UNDEC(c);
1036 if (i <= 0377)
1037 n = i;
1038 else
1039 src--;
1042 localMax = n;
1043 break;
1045 default:
1046 localMax = c;
1047 break;
1049 break;
1050 default:
1051 localMax = *src++;
1052 break;
1055 if (inRange) {
1056 /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
1057 if (rangeStart > localMax) {
1058 JS_ReportErrorNumber(state->context,
1059 js_GetErrorMessage, NULL,
1060 JSMSG_BAD_CLASS_RANGE);
1061 return JS_FALSE;
1063 inRange = JS_FALSE;
1064 } else {
1065 if (canStartRange && src < end - 1) {
1066 if (*src == '-') {
1067 ++src;
1068 inRange = JS_TRUE;
1069 rangeStart = (jschar)localMax;
1070 continue;
1073 if (state->flags & JSREG_FOLD)
1074 rangeStart = localMax; /* one run of the uc/dc loop below */
1077 if (state->flags & JSREG_FOLD) {
1078 jschar maxch = localMax;
1080 for (i = rangeStart; i <= localMax; i++) {
1081 jschar uch, dch;
1083 uch = upcase(i);
1084 dch = downcase(i);
1085 maxch = JS_MAX(maxch, uch);
1086 maxch = JS_MAX(maxch, dch);
1088 localMax = maxch;
1091 if (localMax > max)
1092 max = localMax;
1094 target->u.ucclass.bmsize = max;
1095 return JS_TRUE;
1099 * item: assertion An item is either an assertion or
1100 * quantatom a quantified atom.
1102 * assertion: '^' Assertions match beginning of string
1103 * (or line if the class static property
1104 * RegExp.multiline is true).
1105 * '$' End of string (or line if the class
1106 * static property RegExp.multiline is
1107 * true).
1108 * '\b' Word boundary (between \w and \W).
1109 * '\B' Word non-boundary.
1111 * quantatom: atom An unquantified atom.
1112 * quantatom '{' n ',' m '}'
1113 * Atom must occur between n and m times.
1114 * quantatom '{' n ',' '}' Atom must occur at least n times.
1115 * quantatom '{' n '}' Atom must occur exactly n times.
1116 * quantatom '*' Zero or more times (same as {0,}).
1117 * quantatom '+' One or more times (same as {1,}).
1118 * quantatom '?' Zero or one time (same as {0,1}).
1120 * any of which can be optionally followed by '?' for ungreedy
1122 * atom: '(' regexp ')' A parenthesized regexp (what matched
1123 * can be addressed using a backreference,
1124 * see '\' n below).
1125 * '.' Matches any char except '\n'.
1126 * '[' classlist ']' A character class.
1127 * '[' '^' classlist ']' A negated character class.
1128 * '\f' Form Feed.
1129 * '\n' Newline (Line Feed).
1130 * '\r' Carriage Return.
1131 * '\t' Horizontal Tab.
1132 * '\v' Vertical Tab.
1133 * '\d' A digit (same as [0-9]).
1134 * '\D' A non-digit.
1135 * '\w' A word character, [0-9a-z_A-Z].
1136 * '\W' A non-word character.
1137 * '\s' A whitespace character, [ \b\f\n\r\t\v].
1138 * '\S' A non-whitespace character.
1139 * '\' n A backreference to the nth (n decimal
1140 * and positive) parenthesized expression.
1141 * '\' octal An octal escape sequence (octal must be
1142 * two or three digits long, unless it is
1143 * 0 for the null character).
1144 * '\x' hex A hex escape (hex must be two digits).
1145 * '\u' unicode A unicode escape (must be four digits).
1146 * '\c' ctrl A control character, ctrl is a letter.
1147 * '\' literalatomchar Any character except one of the above
1148 * that follow '\' in an atom.
1149 * otheratomchar Any character not first among the other
1150 * atom right-hand sides.
1152 static JSBool
1153 ParseTerm(CompilerState *state)
1155 jschar c = *state->cp++;
1156 uintN nDigits;
1157 uintN num, tmp, n, i;
1158 const jschar *termStart;
1160 switch (c) {
1161 /* assertions and atoms */
1162 case '^':
1163 state->result = NewRENode(state, REOP_BOL);
1164 if (!state->result)
1165 return JS_FALSE;
1166 state->progLength++;
1167 return JS_TRUE;
1168 case '$':
1169 state->result = NewRENode(state, REOP_EOL);
1170 if (!state->result)
1171 return JS_FALSE;
1172 state->progLength++;
1173 return JS_TRUE;
1174 case '\\':
1175 if (state->cp >= state->cpend) {
1176 /* a trailing '\' is an error */
1177 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_TRAILING_SLASH);
1178 return JS_FALSE;
1180 c = *state->cp++;
1181 switch (c) {
1182 /* assertion escapes */
1183 case 'b' :
1184 state->result = NewRENode(state, REOP_WBDRY);
1185 if (!state->result)
1186 return JS_FALSE;
1187 state->progLength++;
1188 return JS_TRUE;
1189 case 'B':
1190 state->result = NewRENode(state, REOP_WNONBDRY);
1191 if (!state->result)
1192 return JS_FALSE;
1193 state->progLength++;
1194 return JS_TRUE;
1195 /* Decimal escape */
1196 case '0':
1197 /* Give a strict warning. See also the note below. */
1198 if (!ReportRegExpError(state, JSREPORT_WARNING | JSREPORT_STRICT,
1199 JSMSG_INVALID_BACKREF)) {
1200 return JS_FALSE;
1202 doOctal:
1203 num = 0;
1204 while (state->cp < state->cpend) {
1205 c = *state->cp;
1206 if (c < '0' || '7' < c)
1207 break;
1208 state->cp++;
1209 tmp = 8 * num + (uintN)JS7_UNDEC(c);
1210 if (tmp > 0377)
1211 break;
1212 num = tmp;
1214 c = (jschar)num;
1215 doFlat:
1216 state->result = NewRENode(state, REOP_FLAT);
1217 if (!state->result)
1218 return JS_FALSE;
1219 state->result->u.flat.chr = c;
1220 state->result->u.flat.length = 1;
1221 state->progLength += 3;
1222 break;
1223 case '1':
1224 case '2':
1225 case '3':
1226 case '4':
1227 case '5':
1228 case '6':
1229 case '7':
1230 case '8':
1231 case '9':
1232 termStart = state->cp - 1;
1233 num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
1234 if (state->flags & JSREG_FIND_PAREN_ERROR)
1235 return JS_FALSE;
1236 if (num == OVERFLOW_VALUE) {
1237 /* Give a strict mode warning. */
1238 if (!ReportRegExpError(state,
1239 JSREPORT_WARNING | JSREPORT_STRICT,
1240 (c >= '8')
1241 ? JSMSG_INVALID_BACKREF
1242 : JSMSG_BAD_BACKREF)) {
1243 return JS_FALSE;
1247 * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
1248 * error here. However, for compatibility with IE, we treat the
1249 * whole backref as flat if the first character in it is not a
1250 * valid octal character, and as an octal escape otherwise.
1252 state->cp = termStart;
1253 if (c >= '8') {
1254 /* Treat this as flat. termStart - 1 is the \. */
1255 c = '\\';
1256 goto asFlat;
1259 /* Treat this as an octal escape. */
1260 goto doOctal;
1262 JS_ASSERT(1 <= num && num <= 0x10000);
1263 state->result = NewRENode(state, REOP_BACKREF);
1264 if (!state->result)
1265 return JS_FALSE;
1266 state->result->u.parenIndex = num - 1;
1267 state->progLength
1268 += 1 + GetCompactIndexWidth(state->result->u.parenIndex);
1269 break;
1270 /* Control escape */
1271 case 'f':
1272 c = 0xC;
1273 goto doFlat;
1274 case 'n':
1275 c = 0xA;
1276 goto doFlat;
1277 case 'r':
1278 c = 0xD;
1279 goto doFlat;
1280 case 't':
1281 c = 0x9;
1282 goto doFlat;
1283 case 'v':
1284 c = 0xB;
1285 goto doFlat;
1286 /* Control letter */
1287 case 'c':
1288 if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) {
1289 c = (jschar) (*state->cp++ & 0x1F);
1290 } else {
1291 /* back off to accepting the original '\' as a literal */
1292 --state->cp;
1293 c = '\\';
1295 goto doFlat;
1296 /* HexEscapeSequence */
1297 case 'x':
1298 nDigits = 2;
1299 goto lexHex;
1300 /* UnicodeEscapeSequence */
1301 case 'u':
1302 nDigits = 4;
1303 lexHex:
1304 n = 0;
1305 for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
1306 uintN digit;
1307 c = *state->cp++;
1308 if (!isASCIIHexDigit(c, &digit)) {
1310 * Back off to accepting the original 'u' or 'x' as a
1311 * literal.
1313 state->cp -= i + 2;
1314 n = *state->cp++;
1315 break;
1317 n = (n << 4) | digit;
1319 c = (jschar) n;
1320 goto doFlat;
1321 /* Character class escapes */
1322 case 'd':
1323 state->result = NewRENode(state, REOP_DIGIT);
1324 doSimple:
1325 if (!state->result)
1326 return JS_FALSE;
1327 state->progLength++;
1328 break;
1329 case 'D':
1330 state->result = NewRENode(state, REOP_NONDIGIT);
1331 goto doSimple;
1332 case 's':
1333 state->result = NewRENode(state, REOP_SPACE);
1334 goto doSimple;
1335 case 'S':
1336 state->result = NewRENode(state, REOP_NONSPACE);
1337 goto doSimple;
1338 case 'w':
1339 state->result = NewRENode(state, REOP_ALNUM);
1340 goto doSimple;
1341 case 'W':
1342 state->result = NewRENode(state, REOP_NONALNUM);
1343 goto doSimple;
1344 /* IdentityEscape */
1345 default:
1346 state->result = NewRENode(state, REOP_FLAT);
1347 if (!state->result)
1348 return JS_FALSE;
1349 state->result->u.flat.chr = c;
1350 state->result->u.flat.length = 1;
1351 state->result->kid = (void *) (state->cp - 1);
1352 state->progLength += 3;
1353 break;
1355 break;
1356 case '[':
1357 state->result = NewRENode(state, REOP_CLASS);
1358 if (!state->result)
1359 return JS_FALSE;
1360 termStart = state->cp;
1361 state->result->u.ucclass.startIndex = termStart - state->cpbegin;
1362 for (;;) {
1363 if (state->cp == state->cpend) {
1364 ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1365 JSMSG_UNTERM_CLASS, termStart);
1367 return JS_FALSE;
1369 if (*state->cp == '\\') {
1370 state->cp++;
1371 if (state->cp != state->cpend)
1372 state->cp++;
1373 continue;
1375 if (*state->cp == ']') {
1376 state->result->u.ucclass.kidlen = state->cp - termStart;
1377 break;
1379 state->cp++;
1381 for (i = 0; i < CLASS_CACHE_SIZE; i++) {
1382 if (!state->classCache[i].start) {
1383 state->classCache[i].start = termStart;
1384 state->classCache[i].length = state->result->u.ucclass.kidlen;
1385 state->classCache[i].index = state->classCount;
1386 break;
1388 if (state->classCache[i].length ==
1389 state->result->u.ucclass.kidlen) {
1390 for (n = 0; ; n++) {
1391 if (n == state->classCache[i].length) {
1392 state->result->u.ucclass.index
1393 = state->classCache[i].index;
1394 goto claim;
1396 if (state->classCache[i].start[n] != termStart[n])
1397 break;
1401 state->result->u.ucclass.index = state->classCount++;
1403 claim:
1405 * Call CalculateBitmapSize now as we want any errors it finds
1406 * to be reported during the parse phase, not at execution.
1408 if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
1409 return JS_FALSE;
1411 * Update classBitmapsMem with number of bytes to hold bmsize bits,
1412 * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
1413 * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
1415 n = (state->result->u.ucclass.bmsize >> 3) + 1;
1416 if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
1417 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1418 return JS_FALSE;
1420 state->classBitmapsMem += n;
1421 /* CLASS, <index> */
1422 state->progLength
1423 += 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
1424 break;
1426 case '.':
1427 state->result = NewRENode(state, REOP_DOT);
1428 goto doSimple;
1430 case '{':
1432 const jschar *errp = state->cp--;
1433 intN err;
1435 err = ParseMinMaxQuantifier(state, JS_TRUE);
1436 state->cp = errp;
1438 if (err < 0)
1439 goto asFlat;
1441 /* FALL THROUGH */
1443 case '*':
1444 case '+':
1445 case '?':
1446 ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1447 JSMSG_BAD_QUANTIFIER, state->cp - 1);
1448 return JS_FALSE;
1449 default:
1450 asFlat:
1451 state->result = NewRENode(state, REOP_FLAT);
1452 if (!state->result)
1453 return JS_FALSE;
1454 state->result->u.flat.chr = c;
1455 state->result->u.flat.length = 1;
1456 state->result->kid = (void *) (state->cp - 1);
1457 state->progLength += 3;
1458 break;
1460 return ParseQuantifier(state);
1463 static JSBool
1464 ParseQuantifier(CompilerState *state)
1466 RENode *term;
1467 term = state->result;
1468 if (state->cp < state->cpend) {
1469 switch (*state->cp) {
1470 case '+':
1471 state->result = NewRENode(state, REOP_QUANT);
1472 if (!state->result)
1473 return JS_FALSE;
1474 state->result->u.range.min = 1;
1475 state->result->u.range.max = (uintN)-1;
1476 /* <PLUS>, <next> ... <ENDCHILD> */
1477 state->progLength += 4;
1478 goto quantifier;
1479 case '*':
1480 state->result = NewRENode(state, REOP_QUANT);
1481 if (!state->result)
1482 return JS_FALSE;
1483 state->result->u.range.min = 0;
1484 state->result->u.range.max = (uintN)-1;
1485 /* <STAR>, <next> ... <ENDCHILD> */
1486 state->progLength += 4;
1487 goto quantifier;
1488 case '?':
1489 state->result = NewRENode(state, REOP_QUANT);
1490 if (!state->result)
1491 return JS_FALSE;
1492 state->result->u.range.min = 0;
1493 state->result->u.range.max = 1;
1494 /* <OPT>, <next> ... <ENDCHILD> */
1495 state->progLength += 4;
1496 goto quantifier;
1497 case '{': /* balance '}' */
1499 intN err;
1500 const jschar *errp = state->cp;
1502 err = ParseMinMaxQuantifier(state, JS_FALSE);
1503 if (err == 0)
1504 goto quantifier;
1505 if (err == -1)
1506 return JS_TRUE;
1508 ReportRegExpErrorHelper(state, JSREPORT_ERROR, err, errp);
1509 return JS_FALSE;
1511 default:;
1514 return JS_TRUE;
1516 quantifier:
1517 if (state->treeDepth == TREE_DEPTH_MAX) {
1518 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1519 return JS_FALSE;
1522 ++state->treeDepth;
1523 ++state->cp;
1524 state->result->kid = term;
1525 if (state->cp < state->cpend && *state->cp == '?') {
1526 ++state->cp;
1527 state->result->u.range.greedy = JS_FALSE;
1528 } else {
1529 state->result->u.range.greedy = JS_TRUE;
1531 return JS_TRUE;
1534 static intN
1535 ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues)
1537 uintN min, max;
1538 jschar c;
1539 const jschar *errp = state->cp++;
1541 c = *state->cp;
1542 if (JS7_ISDEC(c)) {
1543 ++state->cp;
1544 min = GetDecimalValue(c, 0xFFFF, NULL, state);
1545 c = *state->cp;
1547 if (!ignoreValues && min == OVERFLOW_VALUE)
1548 return JSMSG_MIN_TOO_BIG;
1550 if (c == ',') {
1551 c = *++state->cp;
1552 if (JS7_ISDEC(c)) {
1553 ++state->cp;
1554 max = GetDecimalValue(c, 0xFFFF, NULL, state);
1555 c = *state->cp;
1556 if (!ignoreValues && max == OVERFLOW_VALUE)
1557 return JSMSG_MAX_TOO_BIG;
1558 if (!ignoreValues && min > max)
1559 return JSMSG_OUT_OF_ORDER;
1560 } else {
1561 max = (uintN)-1;
1563 } else {
1564 max = min;
1566 if (c == '}') {
1567 state->result = NewRENode(state, REOP_QUANT);
1568 if (!state->result)
1569 return JSMSG_OUT_OF_MEMORY;
1570 state->result->u.range.min = min;
1571 state->result->u.range.max = max;
1573 * QUANT, <min>, <max>, <next> ... <ENDCHILD>
1574 * where <max> is written as compact(max+1) to make
1575 * (uintN)-1 sentinel to occupy 1 byte, not width_of(max)+1.
1577 state->progLength += (1 + GetCompactIndexWidth(min)
1578 + GetCompactIndexWidth(max + 1)
1579 +3);
1580 return 0;
1584 state->cp = errp;
1585 return -1;
1588 static JSBool
1589 SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
1591 ptrdiff_t offset = target - jump;
1593 /* Check that target really points forward. */
1594 JS_ASSERT(offset >= 2);
1595 if ((size_t)offset > OFFSET_MAX)
1596 return JS_FALSE;
1598 jump[0] = JUMP_OFFSET_HI(offset);
1599 jump[1] = JUMP_OFFSET_LO(offset);
1600 return JS_TRUE;
1604 * Generate bytecode for the tree rooted at t using an explicit stack instead
1605 * of recursion.
1607 static jsbytecode *
1608 EmitREBytecode(CompilerState *state, JSRegExp *re, size_t treeDepth,
1609 jsbytecode *pc, RENode *t)
1611 EmitStateStackEntry *emitStateSP, *emitStateStack;
1612 RECharSet *charSet;
1613 REOp op;
1615 if (treeDepth == 0) {
1616 emitStateStack = NULL;
1617 } else {
1618 emitStateStack =
1619 (EmitStateStackEntry *)JS_malloc(state->context,
1620 sizeof(EmitStateStackEntry) *
1621 treeDepth);
1622 if (!emitStateStack)
1623 return NULL;
1625 emitStateSP = emitStateStack;
1626 op = t->op;
1627 JS_ASSERT(op < REOP_LIMIT);
1629 for (;;) {
1630 *pc++ = op;
1631 switch (op) {
1632 case REOP_EMPTY:
1633 --pc;
1634 break;
1636 case REOP_ALTPREREQ2:
1637 case REOP_ALTPREREQ:
1638 JS_ASSERT(emitStateSP);
1639 emitStateSP->altHead = pc - 1;
1640 emitStateSP->endTermFixup = pc;
1641 pc += OFFSET_LEN;
1642 SET_ARG(pc, t->u.altprereq.ch1);
1643 pc += ARG_LEN;
1644 SET_ARG(pc, t->u.altprereq.ch2);
1645 pc += ARG_LEN;
1647 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
1648 pc += OFFSET_LEN;
1650 emitStateSP->continueNode = t;
1651 emitStateSP->continueOp = REOP_JUMP;
1652 emitStateSP->jumpToJumpFlag = JS_FALSE;
1653 ++emitStateSP;
1654 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1655 t = (RENode *) t->kid;
1656 op = t->op;
1657 JS_ASSERT(op < REOP_LIMIT);
1658 continue;
1660 case REOP_JUMP:
1661 emitStateSP->nextTermFixup = pc; /* offset to following term */
1662 pc += OFFSET_LEN;
1663 if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc))
1664 goto jump_too_big;
1665 emitStateSP->continueOp = REOP_ENDALT;
1666 ++emitStateSP;
1667 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1668 t = (RENode *) t->u.kid2;
1669 op = t->op;
1670 JS_ASSERT(op < REOP_LIMIT);
1671 continue;
1673 case REOP_ENDALT:
1675 * If we already patched emitStateSP->nextTermFixup to jump to
1676 * a nearer jump, to avoid 16-bit immediate offset overflow, we
1677 * are done here.
1679 if (emitStateSP->jumpToJumpFlag)
1680 break;
1683 * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
1684 * REOP_ENDALT is executed only on successful match of the last
1685 * alternate in a group.
1687 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
1688 goto jump_too_big;
1689 if (t->op != REOP_ALT) {
1690 if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc))
1691 goto jump_too_big;
1695 * If the program is bigger than the REOP_JUMP offset range, then
1696 * we must check for alternates before this one that are part of
1697 * the same group, and fix up their jump offsets to target jumps
1698 * close enough to fit in a 16-bit unsigned offset immediate.
1700 if ((size_t)(pc - re->program) > OFFSET_MAX &&
1701 emitStateSP > emitStateStack) {
1702 EmitStateStackEntry *esp, *esp2;
1703 jsbytecode *alt, *jump;
1704 ptrdiff_t span, header;
1706 esp2 = emitStateSP;
1707 alt = esp2->altHead;
1708 for (esp = esp2 - 1; esp >= emitStateStack; --esp) {
1709 if (esp->continueOp == REOP_ENDALT &&
1710 !esp->jumpToJumpFlag &&
1711 esp->nextTermFixup + OFFSET_LEN == alt &&
1712 (size_t)(pc - ((esp->continueNode->op != REOP_ALT)
1713 ? esp->endTermFixup
1714 : esp->nextTermFixup)) > OFFSET_MAX) {
1715 alt = esp->altHead;
1716 jump = esp->nextTermFixup;
1719 * The span must be 1 less than the distance from
1720 * jump offset to jump offset, so we actually jump
1721 * to a REOP_JUMP bytecode, not to its offset!
1723 for (;;) {
1724 JS_ASSERT(jump < esp2->nextTermFixup);
1725 span = esp2->nextTermFixup - jump - 1;
1726 if ((size_t)span <= OFFSET_MAX)
1727 break;
1728 do {
1729 if (--esp2 == esp)
1730 goto jump_too_big;
1731 } while (esp2->continueOp != REOP_ENDALT);
1734 jump[0] = JUMP_OFFSET_HI(span);
1735 jump[1] = JUMP_OFFSET_LO(span);
1737 if (esp->continueNode->op != REOP_ALT) {
1739 * We must patch the offset at esp->endTermFixup
1740 * as well, for the REOP_ALTPREREQ{,2} opcodes.
1741 * If we're unlucky and endTermFixup is more than
1742 * OFFSET_MAX bytes from its target, we cheat by
1743 * jumping 6 bytes to the jump whose offset is at
1744 * esp->nextTermFixup, which has the same target.
1746 jump = esp->endTermFixup;
1747 header = esp->nextTermFixup - jump;
1748 span += header;
1749 if ((size_t)span > OFFSET_MAX)
1750 span = header;
1752 jump[0] = JUMP_OFFSET_HI(span);
1753 jump[1] = JUMP_OFFSET_LO(span);
1756 esp->jumpToJumpFlag = JS_TRUE;
1760 break;
1762 case REOP_ALT:
1763 JS_ASSERT(emitStateSP);
1764 emitStateSP->altHead = pc - 1;
1765 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
1766 pc += OFFSET_LEN;
1767 emitStateSP->continueNode = t;
1768 emitStateSP->continueOp = REOP_JUMP;
1769 emitStateSP->jumpToJumpFlag = JS_FALSE;
1770 ++emitStateSP;
1771 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1772 t = (RENode *) t->kid;
1773 op = t->op;
1774 JS_ASSERT(op < REOP_LIMIT);
1775 continue;
1777 case REOP_FLAT:
1779 * Coalesce FLATs if possible and if it would not increase bytecode
1780 * beyond preallocated limit. The latter happens only when bytecode
1781 * size for coalesced string with offset p and length 2 exceeds 6
1782 * bytes preallocated for 2 single char nodes, i.e. when
1783 * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
1784 * GetCompactIndexWidth(p) > 4.
1785 * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
1786 * nodes strictly decreases bytecode size, the check has to be
1787 * done only for the first coalescing.
1789 if (t->kid &&
1790 GetCompactIndexWidth((jschar *)t->kid - state->cpbegin) <= 4)
1792 while (t->next &&
1793 t->next->op == REOP_FLAT &&
1794 (jschar*)t->kid + t->u.flat.length ==
1795 (jschar*)t->next->kid) {
1796 t->u.flat.length += t->next->u.flat.length;
1797 t->next = t->next->next;
1800 if (t->kid && t->u.flat.length > 1) {
1801 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLATi : REOP_FLAT;
1802 pc = WriteCompactIndex(pc, (jschar *)t->kid - state->cpbegin);
1803 pc = WriteCompactIndex(pc, t->u.flat.length);
1804 } else if (t->u.flat.chr < 256) {
1805 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLAT1i : REOP_FLAT1;
1806 *pc++ = (jsbytecode) t->u.flat.chr;
1807 } else {
1808 pc[-1] = (state->flags & JSREG_FOLD)
1809 ? REOP_UCFLAT1i
1810 : REOP_UCFLAT1;
1811 SET_ARG(pc, t->u.flat.chr);
1812 pc += ARG_LEN;
1814 break;
1816 case REOP_LPAREN:
1817 JS_ASSERT(emitStateSP);
1818 pc = WriteCompactIndex(pc, t->u.parenIndex);
1819 emitStateSP->continueNode = t;
1820 emitStateSP->continueOp = REOP_RPAREN;
1821 ++emitStateSP;
1822 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1823 t = (RENode *) t->kid;
1824 op = t->op;
1825 continue;
1827 case REOP_RPAREN:
1828 pc = WriteCompactIndex(pc, t->u.parenIndex);
1829 break;
1831 case REOP_BACKREF:
1832 pc = WriteCompactIndex(pc, t->u.parenIndex);
1833 break;
1835 case REOP_ASSERT:
1836 JS_ASSERT(emitStateSP);
1837 emitStateSP->nextTermFixup = pc;
1838 pc += OFFSET_LEN;
1839 emitStateSP->continueNode = t;
1840 emitStateSP->continueOp = REOP_ASSERTTEST;
1841 ++emitStateSP;
1842 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1843 t = (RENode *) t->kid;
1844 op = t->op;
1845 continue;
1847 case REOP_ASSERTTEST:
1848 case REOP_ASSERTNOTTEST:
1849 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
1850 goto jump_too_big;
1851 break;
1853 case REOP_ASSERT_NOT:
1854 JS_ASSERT(emitStateSP);
1855 emitStateSP->nextTermFixup = pc;
1856 pc += OFFSET_LEN;
1857 emitStateSP->continueNode = t;
1858 emitStateSP->continueOp = REOP_ASSERTNOTTEST;
1859 ++emitStateSP;
1860 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1861 t = (RENode *) t->kid;
1862 op = t->op;
1863 continue;
1865 case REOP_QUANT:
1866 JS_ASSERT(emitStateSP);
1867 if (t->u.range.min == 0 && t->u.range.max == (uintN)-1) {
1868 pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
1869 } else if (t->u.range.min == 0 && t->u.range.max == 1) {
1870 pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
1871 } else if (t->u.range.min == 1 && t->u.range.max == (uintN) -1) {
1872 pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
1873 } else {
1874 if (!t->u.range.greedy)
1875 pc[-1] = REOP_MINIMALQUANT;
1876 pc = WriteCompactIndex(pc, t->u.range.min);
1878 * Write max + 1 to avoid using size_t(max) + 1 bytes
1879 * for (uintN)-1 sentinel.
1881 pc = WriteCompactIndex(pc, t->u.range.max + 1);
1883 emitStateSP->nextTermFixup = pc;
1884 pc += OFFSET_LEN;
1885 emitStateSP->continueNode = t;
1886 emitStateSP->continueOp = REOP_ENDCHILD;
1887 ++emitStateSP;
1888 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1889 t = (RENode *) t->kid;
1890 op = t->op;
1891 continue;
1893 case REOP_ENDCHILD:
1894 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
1895 goto jump_too_big;
1896 break;
1898 case REOP_CLASS:
1899 if (!t->u.ucclass.sense)
1900 pc[-1] = REOP_NCLASS;
1901 pc = WriteCompactIndex(pc, t->u.ucclass.index);
1902 charSet = &re->classList[t->u.ucclass.index];
1903 charSet->converted = JS_FALSE;
1904 charSet->length = t->u.ucclass.bmsize;
1905 charSet->u.src.startIndex = t->u.ucclass.startIndex;
1906 charSet->u.src.length = t->u.ucclass.kidlen;
1907 charSet->sense = t->u.ucclass.sense;
1908 break;
1910 default:
1911 break;
1914 t = t->next;
1915 if (t) {
1916 op = t->op;
1917 } else {
1918 if (emitStateSP == emitStateStack)
1919 break;
1920 --emitStateSP;
1921 t = emitStateSP->continueNode;
1922 op = (REOp) emitStateSP->continueOp;
1926 cleanup:
1927 if (emitStateStack)
1928 JS_free(state->context, emitStateStack);
1929 return pc;
1931 jump_too_big:
1932 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1933 pc = NULL;
1934 goto cleanup;
1938 JSRegExp *
1939 js_NewRegExp(JSContext *cx, JSTokenStream *ts,
1940 JSString *str, uintN flags, JSBool flat)
1942 JSRegExp *re;
1943 void *mark;
1944 CompilerState state;
1945 size_t resize;
1946 jsbytecode *endPC;
1947 uintN i;
1948 size_t len;
1950 re = NULL;
1951 mark = JS_ARENA_MARK(&cx->tempPool);
1952 len = JSSTRING_LENGTH(str);
1954 state.context = cx;
1955 state.tokenStream = ts;
1956 state.cp = js_UndependString(cx, str);
1957 if (!state.cp)
1958 goto out;
1959 state.cpbegin = state.cp;
1960 state.cpend = state.cp + len;
1961 state.flags = flags;
1962 state.parenCount = 0;
1963 state.classCount = 0;
1964 state.progLength = 0;
1965 state.treeDepth = 0;
1966 state.classBitmapsMem = 0;
1967 for (i = 0; i < CLASS_CACHE_SIZE; i++)
1968 state.classCache[i].start = NULL;
1970 if (len != 0 && flat) {
1971 state.result = NewRENode(&state, REOP_FLAT);
1972 if (!state.result)
1973 goto out;
1974 state.result->u.flat.chr = *state.cpbegin;
1975 state.result->u.flat.length = len;
1976 state.result->kid = (void *) state.cpbegin;
1977 /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
1978 state.progLength += 1 + GetCompactIndexWidth(0)
1979 + GetCompactIndexWidth(len);
1980 } else {
1981 if (!ParseRegExp(&state))
1982 goto out;
1984 resize = offsetof(JSRegExp, program) + state.progLength + 1;
1985 re = (JSRegExp *) JS_malloc(cx, resize);
1986 if (!re)
1987 goto out;
1989 re->nrefs = 1;
1990 JS_ASSERT(state.classBitmapsMem <= CLASS_BITMAPS_MEM_LIMIT);
1991 re->classCount = state.classCount;
1992 if (re->classCount) {
1993 re->classList = (RECharSet *)
1994 JS_malloc(cx, re->classCount * sizeof(RECharSet));
1995 if (!re->classList) {
1996 js_DestroyRegExp(cx, re);
1997 re = NULL;
1998 goto out;
2000 for (i = 0; i < re->classCount; i++)
2001 re->classList[i].converted = JS_FALSE;
2002 } else {
2003 re->classList = NULL;
2005 endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
2006 if (!endPC) {
2007 js_DestroyRegExp(cx, re);
2008 re = NULL;
2009 goto out;
2011 *endPC++ = REOP_END;
2013 * Check whether size was overestimated and shrink using realloc.
2014 * This is safe since no pointers to newly parsed regexp or its parts
2015 * besides re exist here.
2017 if ((size_t)(endPC - re->program) != state.progLength + 1) {
2018 JSRegExp *tmp;
2019 JS_ASSERT((size_t)(endPC - re->program) < state.progLength + 1);
2020 resize = offsetof(JSRegExp, program) + (endPC - re->program);
2021 tmp = (JSRegExp *) JS_realloc(cx, re, resize);
2022 if (tmp)
2023 re = tmp;
2026 re->flags = flags;
2027 re->parenCount = state.parenCount;
2028 re->source = str;
2030 out:
2031 JS_ARENA_RELEASE(&cx->tempPool, mark);
2032 return re;
2035 JSRegExp *
2036 js_NewRegExpOpt(JSContext *cx, JSString *str, JSString *opt, JSBool flat)
2038 uintN flags;
2039 jschar *s;
2040 size_t i, n;
2041 char charBuf[2];
2043 flags = 0;
2044 if (opt) {
2045 JSSTRING_CHARS_AND_LENGTH(opt, s, n);
2046 for (i = 0; i < n; i++) {
2047 switch (s[i]) {
2048 case 'g':
2049 flags |= JSREG_GLOB;
2050 break;
2051 case 'i':
2052 flags |= JSREG_FOLD;
2053 break;
2054 case 'm':
2055 flags |= JSREG_MULTILINE;
2056 break;
2057 case 'y':
2058 flags |= JSREG_STICKY;
2059 break;
2060 default:
2061 charBuf[0] = (char)s[i];
2062 charBuf[1] = '\0';
2063 JS_ReportErrorFlagsAndNumber(cx, JSREPORT_ERROR,
2064 js_GetErrorMessage, NULL,
2065 JSMSG_BAD_FLAG, charBuf);
2066 return NULL;
2070 return js_NewRegExp(cx, NULL, str, flags, flat);
2074 * Save the current state of the match - the position in the input
2075 * text as well as the position in the bytecode. The state of any
2076 * parent expressions is also saved (preceding state).
2077 * Contents of parenCount parentheses from parenIndex are also saved.
2079 static REBackTrackData *
2080 PushBackTrackState(REGlobalData *gData, REOp op,
2081 jsbytecode *target, REMatchState *x, const jschar *cp,
2082 size_t parenIndex, size_t parenCount)
2084 size_t i;
2085 REBackTrackData *result =
2086 (REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz);
2088 size_t sz = sizeof(REBackTrackData) +
2089 gData->stateStackTop * sizeof(REProgState) +
2090 parenCount * sizeof(RECapture);
2092 ptrdiff_t btsize = gData->backTrackStackSize;
2093 ptrdiff_t btincr = ((char *)result + sz) -
2094 ((char *)gData->backTrackStack + btsize);
2096 re_debug("\tBT_Push: %lu,%lu",
2097 (unsigned long) parenIndex, (unsigned long) parenCount);
2099 JS_COUNT_OPERATION(gData->cx, JSOW_JUMP * (1 + parenCount));
2100 if (btincr > 0) {
2101 ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack;
2103 JS_COUNT_OPERATION(gData->cx, JSOW_ALLOCATION);
2104 btincr = JS_ROUNDUP(btincr, btsize);
2105 JS_ARENA_GROW_CAST(gData->backTrackStack, REBackTrackData *,
2106 &gData->cx->regexpPool, btsize, btincr);
2107 if (!gData->backTrackStack) {
2108 js_ReportOutOfScriptQuota(gData->cx);
2109 gData->ok = JS_FALSE;
2110 return NULL;
2112 gData->backTrackStackSize = btsize + btincr;
2113 result = (REBackTrackData *) ((char *)gData->backTrackStack + offset);
2115 gData->backTrackSP = result;
2116 result->sz = gData->cursz;
2117 gData->cursz = sz;
2119 result->backtrack_op = op;
2120 result->backtrack_pc = target;
2121 result->cp = cp;
2122 result->parenCount = parenCount;
2123 result->parenIndex = parenIndex;
2125 result->saveStateStackTop = gData->stateStackTop;
2126 JS_ASSERT(gData->stateStackTop);
2127 memcpy(result + 1, gData->stateStack,
2128 sizeof(REProgState) * result->saveStateStackTop);
2130 if (parenCount != 0) {
2131 memcpy((char *)(result + 1) +
2132 sizeof(REProgState) * result->saveStateStackTop,
2133 &x->parens[parenIndex],
2134 sizeof(RECapture) * parenCount);
2135 for (i = 0; i != parenCount; i++)
2136 x->parens[parenIndex + i].index = -1;
2139 return result;
2144 * Consecutive literal characters.
2146 #if 0
2147 static REMatchState *
2148 FlatNMatcher(REGlobalData *gData, REMatchState *x, jschar *matchChars,
2149 size_t length)
2151 size_t i;
2152 if (length > gData->cpend - x->cp)
2153 return NULL;
2154 for (i = 0; i != length; i++) {
2155 if (matchChars[i] != x->cp[i])
2156 return NULL;
2158 x->cp += length;
2159 return x;
2161 #endif
2163 static JS_ALWAYS_INLINE REMatchState *
2164 FlatNIMatcher(REGlobalData *gData, REMatchState *x, jschar *matchChars,
2165 size_t length)
2167 size_t i;
2168 JS_ASSERT(gData->cpend >= x->cp);
2169 if (length > (size_t)(gData->cpend - x->cp))
2170 return NULL;
2171 for (i = 0; i != length; i++) {
2172 if (upcase(matchChars[i]) != upcase(x->cp[i]))
2173 return NULL;
2175 x->cp += length;
2176 return x;
2180 * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
2181 * 2. If E is not a character then go to step 6.
2182 * 3. Let ch be E's character.
2183 * 4. Let A be a one-element RECharSet containing the character ch.
2184 * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
2185 * 6. E must be an integer. Let n be that integer.
2186 * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
2187 * 8. Return an internal Matcher closure that takes two arguments, a State x
2188 * and a Continuation c, and performs the following:
2189 * 1. Let cap be x's captures internal array.
2190 * 2. Let s be cap[n].
2191 * 3. If s is undefined, then call c(x) and return its result.
2192 * 4. Let e be x's endIndex.
2193 * 5. Let len be s's length.
2194 * 6. Let f be e+len.
2195 * 7. If f>InputLength, return failure.
2196 * 8. If there exists an integer i between 0 (inclusive) and len (exclusive)
2197 * such that Canonicalize(s[i]) is not the same character as
2198 * Canonicalize(Input [e+i]), then return failure.
2199 * 9. Let y be the State (f, cap).
2200 * 10. Call c(y) and return its result.
2202 static REMatchState *
2203 BackrefMatcher(REGlobalData *gData, REMatchState *x, size_t parenIndex)
2205 size_t len, i;
2206 const jschar *parenContent;
2207 RECapture *cap = &x->parens[parenIndex];
2209 if (cap->index == -1)
2210 return x;
2212 len = cap->length;
2213 if (x->cp + len > gData->cpend)
2214 return NULL;
2216 parenContent = &gData->cpbegin[cap->index];
2217 if (gData->regexp->flags & JSREG_FOLD) {
2218 for (i = 0; i < len; i++) {
2219 if (upcase(parenContent[i]) != upcase(x->cp[i]))
2220 return NULL;
2222 } else {
2223 for (i = 0; i < len; i++) {
2224 if (parenContent[i] != x->cp[i])
2225 return NULL;
2228 x->cp += len;
2229 return x;
2233 /* Add a single character to the RECharSet */
2234 static void
2235 AddCharacterToCharSet(RECharSet *cs, jschar c)
2237 uintN byteIndex = (uintN)(c >> 3);
2238 JS_ASSERT(c <= cs->length);
2239 cs->u.bits[byteIndex] |= 1 << (c & 0x7);
2243 /* Add a character range, c1 to c2 (inclusive) to the RECharSet */
2244 static void
2245 AddCharacterRangeToCharSet(RECharSet *cs, uintN c1, uintN c2)
2247 uintN i;
2249 uintN byteIndex1 = c1 >> 3;
2250 uintN byteIndex2 = c2 >> 3;
2252 JS_ASSERT(c2 <= cs->length && c1 <= c2);
2254 c1 &= 0x7;
2255 c2 &= 0x7;
2257 if (byteIndex1 == byteIndex2) {
2258 cs->u.bits[byteIndex1] |= ((uint8)0xFF >> (7 - (c2 - c1))) << c1;
2259 } else {
2260 cs->u.bits[byteIndex1] |= 0xFF << c1;
2261 for (i = byteIndex1 + 1; i < byteIndex2; i++)
2262 cs->u.bits[i] = 0xFF;
2263 cs->u.bits[byteIndex2] |= (uint8)0xFF >> (7 - c2);
2267 struct CharacterRange {
2268 jschar start;
2269 jschar end;
2273 * The following characters are taken from the ECMA-262 standard, section 7.2
2274 * and 7.3, and the Unicode 3 standard, Table 6-1.
2276 static const CharacterRange WhiteSpaceRanges[] = {
2277 /* TAB, LF, VT, FF, CR */
2278 { 0x0009, 0x000D },
2279 /* SPACE */
2280 { 0x0020, 0x0020 },
2281 /* NO-BREAK SPACE */
2282 { 0x00A0, 0x00A0 },
2284 * EN QUAD, EM QUAD, EN SPACE, EM SPACE, THREE-PER-EM SPACE, FOUR-PER-EM
2285 * SPACE, SIX-PER-EM SPACE, FIGURE SPACE, PUNCTUATION SPACE, THIN SPACE,
2286 * HAIR SPACE, ZERO WIDTH SPACE
2288 { 0x2000, 0x200B },
2289 /* LS, PS */
2290 { 0x2028, 0x2029 },
2291 /* NARROW NO-BREAK SPACE */
2292 { 0x202F, 0x202F },
2293 /* IDEOGRAPHIC SPACE */
2294 { 0x3000, 0x3000 }
2297 /* ECMA-262 standard, section 15.10.2.6. */
2298 static const CharacterRange WordRanges[] = {
2299 { jschar('0'), jschar('9') },
2300 { jschar('A'), jschar('Z') },
2301 { jschar('_'), jschar('_') },
2302 { jschar('a'), jschar('z') }
2305 static void
2306 AddCharacterRanges(RECharSet *charSet,
2307 const CharacterRange *range,
2308 const CharacterRange *end)
2310 for (; range < end; ++range)
2311 AddCharacterRangeToCharSet(charSet, range->start, range->end);
2314 static void
2315 AddInvertedCharacterRanges(RECharSet *charSet,
2316 const CharacterRange *range,
2317 const CharacterRange *end)
2319 uint16 previous = 0;
2320 for (; range < end; ++range) {
2321 AddCharacterRangeToCharSet(charSet, previous, range->start - 1);
2322 previous = range->end + 1;
2324 AddCharacterRangeToCharSet(charSet, previous, charSet->length);
2327 /* Compile the source of the class into a RECharSet */
2328 static JSBool
2329 ProcessCharSet(REGlobalData *gData, RECharSet *charSet)
2331 const jschar *src, *end;
2332 JSBool inRange = JS_FALSE;
2333 jschar rangeStart = 0;
2334 uintN byteLength, n;
2335 jschar c, thisCh;
2336 intN nDigits, i;
2338 JS_ASSERT(!charSet->converted);
2340 * Assert that startIndex and length points to chars inside [] inside
2341 * source string.
2343 JS_ASSERT(1 <= charSet->u.src.startIndex);
2344 JS_ASSERT(charSet->u.src.startIndex
2345 < JSSTRING_LENGTH(gData->regexp->source));
2346 JS_ASSERT(charSet->u.src.length <= JSSTRING_LENGTH(gData->regexp->source)
2347 - 1 - charSet->u.src.startIndex);
2349 charSet->converted = JS_TRUE;
2350 src = JSSTRING_CHARS(gData->regexp->source) + charSet->u.src.startIndex;
2351 end = src + charSet->u.src.length;
2352 JS_ASSERT(src[-1] == '[');
2353 JS_ASSERT(end[0] == ']');
2355 byteLength = (charSet->length >> 3) + 1;
2356 charSet->u.bits = (uint8 *)JS_malloc(gData->cx, byteLength);
2357 if (!charSet->u.bits) {
2358 JS_ReportOutOfMemory(gData->cx);
2359 gData->ok = JS_FALSE;
2360 return JS_FALSE;
2362 memset(charSet->u.bits, 0, byteLength);
2364 if (src == end)
2365 return JS_TRUE;
2367 if (*src == '^') {
2368 JS_ASSERT(charSet->sense == JS_FALSE);
2369 ++src;
2370 } else {
2371 JS_ASSERT(charSet->sense == JS_TRUE);
2374 while (src != end) {
2375 switch (*src) {
2376 case '\\':
2377 ++src;
2378 c = *src++;
2379 switch (c) {
2380 case 'b':
2381 thisCh = 0x8;
2382 break;
2383 case 'f':
2384 thisCh = 0xC;
2385 break;
2386 case 'n':
2387 thisCh = 0xA;
2388 break;
2389 case 'r':
2390 thisCh = 0xD;
2391 break;
2392 case 't':
2393 thisCh = 0x9;
2394 break;
2395 case 'v':
2396 thisCh = 0xB;
2397 break;
2398 case 'c':
2399 if (src < end && JS_ISWORD(*src)) {
2400 thisCh = (jschar)(*src++ & 0x1F);
2401 } else {
2402 --src;
2403 thisCh = '\\';
2405 break;
2406 case 'x':
2407 nDigits = 2;
2408 goto lexHex;
2409 case 'u':
2410 nDigits = 4;
2411 lexHex:
2412 n = 0;
2413 for (i = 0; (i < nDigits) && (src < end); i++) {
2414 uintN digit;
2415 c = *src++;
2416 if (!isASCIIHexDigit(c, &digit)) {
2418 * Back off to accepting the original '\'
2419 * as a literal
2421 src -= i + 1;
2422 n = '\\';
2423 break;
2425 n = (n << 4) | digit;
2427 thisCh = (jschar)n;
2428 break;
2429 case '0':
2430 case '1':
2431 case '2':
2432 case '3':
2433 case '4':
2434 case '5':
2435 case '6':
2436 case '7':
2438 * This is a non-ECMA extension - decimal escapes (in this
2439 * case, octal!) are supposed to be an error inside class
2440 * ranges, but supported here for backwards compatibility.
2442 n = JS7_UNDEC(c);
2443 c = *src;
2444 if ('0' <= c && c <= '7') {
2445 src++;
2446 n = 8 * n + JS7_UNDEC(c);
2447 c = *src;
2448 if ('0' <= c && c <= '7') {
2449 src++;
2450 i = 8 * n + JS7_UNDEC(c);
2451 if (i <= 0377)
2452 n = i;
2453 else
2454 src--;
2457 thisCh = (jschar)n;
2458 break;
2460 case 'd':
2461 AddCharacterRangeToCharSet(charSet, '0', '9');
2462 continue; /* don't need range processing */
2463 case 'D':
2464 AddCharacterRangeToCharSet(charSet, 0, '0' - 1);
2465 AddCharacterRangeToCharSet(charSet,
2466 (jschar)('9' + 1),
2467 (jschar)charSet->length);
2468 continue;
2469 case 's':
2470 AddCharacterRanges(charSet, WhiteSpaceRanges,
2471 WhiteSpaceRanges + JS_ARRAY_LENGTH(WhiteSpaceRanges));
2472 continue;
2473 case 'S':
2474 AddInvertedCharacterRanges(charSet, WhiteSpaceRanges,
2475 WhiteSpaceRanges + JS_ARRAY_LENGTH(WhiteSpaceRanges));
2476 continue;
2477 case 'w':
2478 AddCharacterRanges(charSet, WordRanges,
2479 WordRanges + JS_ARRAY_LENGTH(WordRanges));
2480 continue;
2481 case 'W':
2482 AddInvertedCharacterRanges(charSet, WordRanges,
2483 WordRanges + JS_ARRAY_LENGTH(WordRanges));
2484 continue;
2485 default:
2486 thisCh = c;
2487 break;
2490 break;
2492 default:
2493 thisCh = *src++;
2494 break;
2497 if (inRange) {
2498 if (gData->regexp->flags & JSREG_FOLD) {
2499 int i;
2501 JS_ASSERT(rangeStart <= thisCh);
2502 for (i = rangeStart; i <= thisCh; i++) {
2503 jschar uch, dch;
2505 AddCharacterToCharSet(charSet, i);
2506 uch = upcase(i);
2507 dch = downcase(i);
2508 if (i != uch)
2509 AddCharacterToCharSet(charSet, uch);
2510 if (i != dch)
2511 AddCharacterToCharSet(charSet, dch);
2513 } else {
2514 AddCharacterRangeToCharSet(charSet, rangeStart, thisCh);
2516 inRange = JS_FALSE;
2517 } else {
2518 if (gData->regexp->flags & JSREG_FOLD) {
2519 AddCharacterToCharSet(charSet, upcase(thisCh));
2520 AddCharacterToCharSet(charSet, downcase(thisCh));
2521 } else {
2522 AddCharacterToCharSet(charSet, thisCh);
2524 if (src < end - 1) {
2525 if (*src == '-') {
2526 ++src;
2527 inRange = JS_TRUE;
2528 rangeStart = thisCh;
2533 return JS_TRUE;
2536 void
2537 js_DestroyRegExp(JSContext *cx, JSRegExp *re)
2539 if (JS_ATOMIC_DECREMENT(&re->nrefs) == 0) {
2540 if (re->classList) {
2541 uintN i;
2542 for (i = 0; i < re->classCount; i++) {
2543 if (re->classList[i].converted)
2544 JS_free(cx, re->classList[i].u.bits);
2545 re->classList[i].u.bits = NULL;
2547 JS_free(cx, re->classList);
2549 JS_free(cx, re);
2553 static JSBool
2554 ReallocStateStack(REGlobalData *gData)
2556 size_t limit = gData->stateStackLimit;
2557 size_t sz = sizeof(REProgState) * limit;
2559 JS_ARENA_GROW_CAST(gData->stateStack, REProgState *,
2560 &gData->cx->regexpPool, sz, sz);
2561 if (!gData->stateStack) {
2562 js_ReportOutOfScriptQuota(gData->cx);
2563 gData->ok = JS_FALSE;
2564 return JS_FALSE;
2566 gData->stateStackLimit = limit + limit;
2567 return JS_TRUE;
2570 #define PUSH_STATE_STACK(data) \
2571 JS_BEGIN_MACRO \
2572 ++(data)->stateStackTop; \
2573 if ((data)->stateStackTop == (data)->stateStackLimit && \
2574 !ReallocStateStack((data))) { \
2575 return NULL; \
2577 JS_END_MACRO
2580 * Apply the current op against the given input to see if it's going to match
2581 * or fail. Return false if we don't get a match, true if we do. If updatecp is
2582 * true, then update the current state's cp. Always update startpc to the next
2583 * op.
2585 static JS_ALWAYS_INLINE REMatchState *
2586 SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op,
2587 jsbytecode **startpc, JSBool updatecp)
2589 REMatchState *result = NULL;
2590 jschar matchCh;
2591 size_t parenIndex;
2592 size_t offset, length, index;
2593 jsbytecode *pc = *startpc; /* pc has already been incremented past op */
2594 jschar *source;
2595 const jschar *startcp = x->cp;
2596 jschar ch;
2597 RECharSet *charSet;
2599 #ifdef REGEXP_DEBUG
2600 const char *opname = reop_names[op];
2601 re_debug("\n%06d: %*s%s", pc - gData->regexp->program,
2602 gData->stateStackTop * 2, "", opname);
2603 #endif
2604 switch (op) {
2605 case REOP_EMPTY:
2606 result = x;
2607 break;
2608 case REOP_BOL:
2609 if (x->cp != gData->cpbegin) {
2610 if (!gData->cx->regExpStatics.multiline &&
2611 !(gData->regexp->flags & JSREG_MULTILINE)) {
2612 break;
2614 if (!RE_IS_LINE_TERM(x->cp[-1]))
2615 break;
2617 result = x;
2618 break;
2619 case REOP_EOL:
2620 if (x->cp != gData->cpend) {
2621 if (!gData->cx->regExpStatics.multiline &&
2622 !(gData->regexp->flags & JSREG_MULTILINE)) {
2623 break;
2625 if (!RE_IS_LINE_TERM(*x->cp))
2626 break;
2628 result = x;
2629 break;
2630 case REOP_WBDRY:
2631 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2632 !(x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2633 result = x;
2635 break;
2636 case REOP_WNONBDRY:
2637 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2638 (x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2639 result = x;
2641 break;
2642 case REOP_DOT:
2643 if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) {
2644 result = x;
2645 result->cp++;
2647 break;
2648 case REOP_DIGIT:
2649 if (x->cp != gData->cpend && JS7_ISDEC(*x->cp)) {
2650 result = x;
2651 result->cp++;
2653 break;
2654 case REOP_NONDIGIT:
2655 if (x->cp != gData->cpend && !JS7_ISDEC(*x->cp)) {
2656 result = x;
2657 result->cp++;
2659 break;
2660 case REOP_ALNUM:
2661 if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) {
2662 result = x;
2663 result->cp++;
2665 break;
2666 case REOP_NONALNUM:
2667 if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) {
2668 result = x;
2669 result->cp++;
2671 break;
2672 case REOP_SPACE:
2673 if (x->cp != gData->cpend && JS_ISSPACE(*x->cp)) {
2674 result = x;
2675 result->cp++;
2677 break;
2678 case REOP_NONSPACE:
2679 if (x->cp != gData->cpend && !JS_ISSPACE(*x->cp)) {
2680 result = x;
2681 result->cp++;
2683 break;
2684 case REOP_BACKREF:
2685 pc = ReadCompactIndex(pc, &parenIndex);
2686 JS_ASSERT(parenIndex < gData->regexp->parenCount);
2687 result = BackrefMatcher(gData, x, parenIndex);
2688 break;
2689 case REOP_FLAT:
2690 pc = ReadCompactIndex(pc, &offset);
2691 JS_ASSERT(offset < JSSTRING_LENGTH(gData->regexp->source));
2692 pc = ReadCompactIndex(pc, &length);
2693 JS_ASSERT(1 <= length);
2694 JS_ASSERT(length <= JSSTRING_LENGTH(gData->regexp->source) - offset);
2695 if (length <= (size_t)(gData->cpend - x->cp)) {
2696 source = JSSTRING_CHARS(gData->regexp->source) + offset;
2697 re_debug_chars(source, length);
2698 for (index = 0; index != length; index++) {
2699 if (source[index] != x->cp[index])
2700 return NULL;
2702 x->cp += length;
2703 result = x;
2705 break;
2706 case REOP_FLAT1:
2707 matchCh = *pc++;
2708 re_debug(" '%c' == '%c'", (char)matchCh, (char)*x->cp);
2709 if (x->cp != gData->cpend && *x->cp == matchCh) {
2710 result = x;
2711 result->cp++;
2713 break;
2714 case REOP_FLATi:
2715 pc = ReadCompactIndex(pc, &offset);
2716 JS_ASSERT(offset < JSSTRING_LENGTH(gData->regexp->source));
2717 pc = ReadCompactIndex(pc, &length);
2718 JS_ASSERT(1 <= length);
2719 JS_ASSERT(length <= JSSTRING_LENGTH(gData->regexp->source) - offset);
2720 source = JSSTRING_CHARS(gData->regexp->source);
2721 result = FlatNIMatcher(gData, x, source + offset, length);
2722 break;
2723 case REOP_FLAT1i:
2724 matchCh = *pc++;
2725 if (x->cp != gData->cpend && upcase(*x->cp) == upcase(matchCh)) {
2726 result = x;
2727 result->cp++;
2729 break;
2730 case REOP_UCFLAT1:
2731 matchCh = GET_ARG(pc);
2732 re_debug(" '%c' == '%c'", (char)matchCh, (char)*x->cp);
2733 pc += ARG_LEN;
2734 if (x->cp != gData->cpend && *x->cp == matchCh) {
2735 result = x;
2736 result->cp++;
2738 break;
2739 case REOP_UCFLAT1i:
2740 matchCh = GET_ARG(pc);
2741 pc += ARG_LEN;
2742 if (x->cp != gData->cpend && upcase(*x->cp) == upcase(matchCh)) {
2743 result = x;
2744 result->cp++;
2746 break;
2747 case REOP_CLASS:
2748 pc = ReadCompactIndex(pc, &index);
2749 JS_ASSERT(index < gData->regexp->classCount);
2750 if (x->cp != gData->cpend) {
2751 charSet = &gData->regexp->classList[index];
2752 JS_ASSERT(charSet->converted);
2753 ch = *x->cp;
2754 index = ch >> 3;
2755 if (charSet->length != 0 &&
2756 ch <= charSet->length &&
2757 (charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2758 result = x;
2759 result->cp++;
2762 break;
2763 case REOP_NCLASS:
2764 pc = ReadCompactIndex(pc, &index);
2765 JS_ASSERT(index < gData->regexp->classCount);
2766 if (x->cp != gData->cpend) {
2767 charSet = &gData->regexp->classList[index];
2768 JS_ASSERT(charSet->converted);
2769 ch = *x->cp;
2770 index = ch >> 3;
2771 if (charSet->length == 0 ||
2772 ch > charSet->length ||
2773 !(charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2774 result = x;
2775 result->cp++;
2778 break;
2780 default:
2781 JS_ASSERT(JS_FALSE);
2783 if (result) {
2784 if (!updatecp)
2785 x->cp = startcp;
2786 *startpc = pc;
2787 re_debug(" * ");
2788 return result;
2790 x->cp = startcp;
2791 return NULL;
2794 static JS_ALWAYS_INLINE REMatchState *
2795 ExecuteREBytecode(REGlobalData *gData, REMatchState *x)
2797 REMatchState *result = NULL;
2798 REBackTrackData *backTrackData;
2799 jsbytecode *nextpc, *testpc;
2800 REOp nextop;
2801 RECapture *cap;
2802 REProgState *curState;
2803 const jschar *startcp;
2804 size_t parenIndex, k;
2805 size_t parenSoFar = 0;
2807 jschar matchCh1, matchCh2;
2808 RECharSet *charSet;
2810 JSBool anchor;
2811 jsbytecode *pc = gData->regexp->program;
2812 REOp op = (REOp) *pc++;
2815 * If the first node is a simple match, step the index into the string
2816 * until that match is made, or fail if it can't be found at all.
2818 if (REOP_IS_SIMPLE(op) && !(gData->regexp->flags & JSREG_STICKY)) {
2819 anchor = JS_FALSE;
2820 while (x->cp <= gData->cpend) {
2821 nextpc = pc; /* reset back to start each time */
2822 result = SimpleMatch(gData, x, op, &nextpc, JS_TRUE);
2823 if (result) {
2824 anchor = JS_TRUE;
2825 x = result;
2826 pc = nextpc; /* accept skip to next opcode */
2827 op = (REOp) *pc++;
2828 JS_ASSERT(op < REOP_LIMIT);
2829 break;
2831 gData->skipped++;
2832 x->cp++;
2834 if (!anchor)
2835 goto bad;
2838 for (;;) {
2839 #ifdef REGEXP_DEBUG
2840 const char *opname = reop_names[op];
2841 re_debug("\n%06d: %*s%s", pc - gData->regexp->program,
2842 gData->stateStackTop * 2, "", opname);
2843 #endif
2844 if (REOP_IS_SIMPLE(op)) {
2845 result = SimpleMatch(gData, x, op, &pc, JS_TRUE);
2846 } else {
2847 curState = &gData->stateStack[gData->stateStackTop];
2848 switch (op) {
2849 case REOP_END:
2850 goto good;
2851 case REOP_ALTPREREQ2:
2852 nextpc = pc + GET_OFFSET(pc); /* start of next op */
2853 pc += ARG_LEN;
2854 matchCh2 = GET_ARG(pc);
2855 pc += ARG_LEN;
2856 k = GET_ARG(pc);
2857 pc += ARG_LEN;
2859 if (x->cp != gData->cpend) {
2860 if (*x->cp == matchCh2)
2861 goto doAlt;
2863 charSet = &gData->regexp->classList[k];
2864 if (!charSet->converted && !ProcessCharSet(gData, charSet))
2865 goto bad;
2866 matchCh1 = *x->cp;
2867 k = matchCh1 >> 3;
2868 if ((charSet->length == 0 ||
2869 matchCh1 > charSet->length ||
2870 !(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^
2871 charSet->sense) {
2872 goto doAlt;
2875 result = NULL;
2876 break;
2878 case REOP_ALTPREREQ:
2879 nextpc = pc + GET_OFFSET(pc); /* start of next op */
2880 pc += ARG_LEN;
2881 matchCh1 = GET_ARG(pc);
2882 pc += ARG_LEN;
2883 matchCh2 = GET_ARG(pc);
2884 pc += ARG_LEN;
2885 if (x->cp == gData->cpend ||
2886 (*x->cp != matchCh1 && *x->cp != matchCh2)) {
2887 result = NULL;
2888 break;
2890 /* else false thru... */
2892 case REOP_ALT:
2893 doAlt:
2894 nextpc = pc + GET_OFFSET(pc); /* start of next alternate */
2895 pc += ARG_LEN; /* start of this alternate */
2896 curState->parenSoFar = parenSoFar;
2897 PUSH_STATE_STACK(gData);
2898 op = (REOp) *pc++;
2899 startcp = x->cp;
2900 if (REOP_IS_SIMPLE(op)) {
2901 if (!SimpleMatch(gData, x, op, &pc, JS_TRUE)) {
2902 op = (REOp) *nextpc++;
2903 pc = nextpc;
2904 continue;
2906 result = x;
2907 op = (REOp) *pc++;
2909 nextop = (REOp) *nextpc++;
2910 if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0))
2911 goto bad;
2912 continue;
2915 * Occurs at (successful) end of REOP_ALT,
2917 case REOP_JUMP:
2919 * If we have not gotten a result here, it is because of an
2920 * empty match. Do the same thing REOP_EMPTY would do.
2922 if (!result)
2923 result = x;
2925 --gData->stateStackTop;
2926 pc += GET_OFFSET(pc);
2927 op = (REOp) *pc++;
2928 continue;
2931 * Occurs at last (successful) end of REOP_ALT,
2933 case REOP_ENDALT:
2935 * If we have not gotten a result here, it is because of an
2936 * empty match. Do the same thing REOP_EMPTY would do.
2938 if (!result)
2939 result = x;
2941 --gData->stateStackTop;
2942 op = (REOp) *pc++;
2943 continue;
2945 case REOP_LPAREN:
2946 pc = ReadCompactIndex(pc, &parenIndex);
2947 re_debug("[ %lu ]", (unsigned long) parenIndex);
2948 JS_ASSERT(parenIndex < gData->regexp->parenCount);
2949 if (parenIndex + 1 > parenSoFar)
2950 parenSoFar = parenIndex + 1;
2951 x->parens[parenIndex].index = x->cp - gData->cpbegin;
2952 x->parens[parenIndex].length = 0;
2953 op = (REOp) *pc++;
2954 continue;
2956 case REOP_RPAREN:
2958 ptrdiff_t delta;
2960 pc = ReadCompactIndex(pc, &parenIndex);
2961 JS_ASSERT(parenIndex < gData->regexp->parenCount);
2962 cap = &x->parens[parenIndex];
2963 delta = x->cp - (gData->cpbegin + cap->index);
2964 cap->length = (delta < 0) ? 0 : (size_t) delta;
2965 op = (REOp) *pc++;
2967 if (!result)
2968 result = x;
2969 continue;
2971 case REOP_ASSERT:
2972 nextpc = pc + GET_OFFSET(pc); /* start of term after ASSERT */
2973 pc += ARG_LEN; /* start of ASSERT child */
2974 op = (REOp) *pc++;
2975 testpc = pc;
2976 if (REOP_IS_SIMPLE(op) &&
2977 !SimpleMatch(gData, x, op, &testpc, JS_FALSE)) {
2978 result = NULL;
2979 break;
2981 curState->u.assertion.top =
2982 (char *)gData->backTrackSP - (char *)gData->backTrackStack;
2983 curState->u.assertion.sz = gData->cursz;
2984 curState->index = x->cp - gData->cpbegin;
2985 curState->parenSoFar = parenSoFar;
2986 PUSH_STATE_STACK(gData);
2987 if (!PushBackTrackState(gData, REOP_ASSERTTEST,
2988 nextpc, x, x->cp, 0, 0)) {
2989 goto bad;
2991 continue;
2993 case REOP_ASSERT_NOT:
2994 nextpc = pc + GET_OFFSET(pc);
2995 pc += ARG_LEN;
2996 op = (REOp) *pc++;
2997 testpc = pc;
2998 if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ &&
2999 SimpleMatch(gData, x, op, &testpc, JS_FALSE) &&
3000 *testpc == REOP_ASSERTNOTTEST) {
3001 result = NULL;
3002 break;
3004 curState->u.assertion.top
3005 = (char *)gData->backTrackSP -
3006 (char *)gData->backTrackStack;
3007 curState->u.assertion.sz = gData->cursz;
3008 curState->index = x->cp - gData->cpbegin;
3009 curState->parenSoFar = parenSoFar;
3010 PUSH_STATE_STACK(gData);
3011 if (!PushBackTrackState(gData, REOP_ASSERTNOTTEST,
3012 nextpc, x, x->cp, 0, 0)) {
3013 goto bad;
3015 continue;
3017 case REOP_ASSERTTEST:
3018 --gData->stateStackTop;
3019 --curState;
3020 x->cp = gData->cpbegin + curState->index;
3021 gData->backTrackSP =
3022 (REBackTrackData *) ((char *)gData->backTrackStack +
3023 curState->u.assertion.top);
3024 gData->cursz = curState->u.assertion.sz;
3025 if (result)
3026 result = x;
3027 break;
3029 case REOP_ASSERTNOTTEST:
3030 --gData->stateStackTop;
3031 --curState;
3032 x->cp = gData->cpbegin + curState->index;
3033 gData->backTrackSP =
3034 (REBackTrackData *) ((char *)gData->backTrackStack +
3035 curState->u.assertion.top);
3036 gData->cursz = curState->u.assertion.sz;
3037 result = (!result) ? x : NULL;
3038 break;
3039 case REOP_STAR:
3040 curState->u.quantifier.min = 0;
3041 curState->u.quantifier.max = (uintN)-1;
3042 goto quantcommon;
3043 case REOP_PLUS:
3044 curState->u.quantifier.min = 1;
3045 curState->u.quantifier.max = (uintN)-1;
3046 goto quantcommon;
3047 case REOP_OPT:
3048 curState->u.quantifier.min = 0;
3049 curState->u.quantifier.max = 1;
3050 goto quantcommon;
3051 case REOP_QUANT:
3052 pc = ReadCompactIndex(pc, &k);
3053 curState->u.quantifier.min = k;
3054 pc = ReadCompactIndex(pc, &k);
3055 /* max is k - 1 to use one byte for (uintN)-1 sentinel. */
3056 curState->u.quantifier.max = k - 1;
3057 JS_ASSERT(curState->u.quantifier.min
3058 <= curState->u.quantifier.max);
3059 quantcommon:
3060 if (curState->u.quantifier.max == 0) {
3061 pc = pc + GET_OFFSET(pc);
3062 op = (REOp) *pc++;
3063 result = x;
3064 continue;
3066 /* Step over <next> */
3067 nextpc = pc + ARG_LEN;
3068 op = (REOp) *nextpc++;
3069 startcp = x->cp;
3070 if (REOP_IS_SIMPLE(op)) {
3071 if (!SimpleMatch(gData, x, op, &nextpc, JS_TRUE)) {
3072 if (curState->u.quantifier.min == 0)
3073 result = x;
3074 else
3075 result = NULL;
3076 pc = pc + GET_OFFSET(pc);
3077 break;
3079 op = (REOp) *nextpc++;
3080 result = x;
3082 curState->index = startcp - gData->cpbegin;
3083 curState->continue_op = REOP_REPEAT;
3084 curState->continue_pc = pc;
3085 curState->parenSoFar = parenSoFar;
3086 PUSH_STATE_STACK(gData);
3087 if (curState->u.quantifier.min == 0 &&
3088 !PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp,
3089 0, 0)) {
3090 goto bad;
3092 pc = nextpc;
3093 continue;
3095 case REOP_ENDCHILD: /* marks the end of a quantifier child */
3096 pc = curState[-1].continue_pc;
3097 op = (REOp) curState[-1].continue_op;
3099 if (!result)
3100 result = x;
3101 continue;
3103 case REOP_REPEAT:
3104 --curState;
3105 do {
3106 --gData->stateStackTop;
3107 if (!result) {
3108 /* Failed, see if we have enough children. */
3109 if (curState->u.quantifier.min == 0)
3110 goto repeatDone;
3111 goto break_switch;
3113 if (curState->u.quantifier.min == 0 &&
3114 x->cp == gData->cpbegin + curState->index) {
3115 /* matched an empty string, that'll get us nowhere */
3116 result = NULL;
3117 goto break_switch;
3119 if (curState->u.quantifier.min != 0)
3120 curState->u.quantifier.min--;
3121 if (curState->u.quantifier.max != (uintN) -1)
3122 curState->u.quantifier.max--;
3123 if (curState->u.quantifier.max == 0)
3124 goto repeatDone;
3125 nextpc = pc + ARG_LEN;
3126 nextop = (REOp) *nextpc;
3127 startcp = x->cp;
3128 if (REOP_IS_SIMPLE(nextop)) {
3129 nextpc++;
3130 if (!SimpleMatch(gData, x, nextop, &nextpc, JS_TRUE)) {
3131 if (curState->u.quantifier.min == 0)
3132 goto repeatDone;
3133 result = NULL;
3134 goto break_switch;
3136 result = x;
3138 curState->index = startcp - gData->cpbegin;
3139 PUSH_STATE_STACK(gData);
3140 if (curState->u.quantifier.min == 0 &&
3141 !PushBackTrackState(gData, REOP_REPEAT,
3142 pc, x, startcp,
3143 curState->parenSoFar,
3144 parenSoFar -
3145 curState->parenSoFar)) {
3146 goto bad;
3148 } while (*nextpc == REOP_ENDCHILD);
3149 pc = nextpc;
3150 op = (REOp) *pc++;
3151 parenSoFar = curState->parenSoFar;
3152 continue;
3154 repeatDone:
3155 result = x;
3156 pc += GET_OFFSET(pc);
3157 goto break_switch;
3159 case REOP_MINIMALSTAR:
3160 curState->u.quantifier.min = 0;
3161 curState->u.quantifier.max = (uintN)-1;
3162 goto minimalquantcommon;
3163 case REOP_MINIMALPLUS:
3164 curState->u.quantifier.min = 1;
3165 curState->u.quantifier.max = (uintN)-1;
3166 goto minimalquantcommon;
3167 case REOP_MINIMALOPT:
3168 curState->u.quantifier.min = 0;
3169 curState->u.quantifier.max = 1;
3170 goto minimalquantcommon;
3171 case REOP_MINIMALQUANT:
3172 pc = ReadCompactIndex(pc, &k);
3173 curState->u.quantifier.min = k;
3174 pc = ReadCompactIndex(pc, &k);
3175 /* See REOP_QUANT comments about k - 1. */
3176 curState->u.quantifier.max = k - 1;
3177 JS_ASSERT(curState->u.quantifier.min
3178 <= curState->u.quantifier.max);
3179 minimalquantcommon:
3180 curState->index = x->cp - gData->cpbegin;
3181 curState->parenSoFar = parenSoFar;
3182 PUSH_STATE_STACK(gData);
3183 if (curState->u.quantifier.min != 0) {
3184 curState->continue_op = REOP_MINIMALREPEAT;
3185 curState->continue_pc = pc;
3186 /* step over <next> */
3187 pc += OFFSET_LEN;
3188 op = (REOp) *pc++;
3189 } else {
3190 if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
3191 pc, x, x->cp, 0, 0)) {
3192 goto bad;
3194 --gData->stateStackTop;
3195 pc = pc + GET_OFFSET(pc);
3196 op = (REOp) *pc++;
3198 continue;
3200 case REOP_MINIMALREPEAT:
3201 --gData->stateStackTop;
3202 --curState;
3204 re_debug("{%d,%d}", curState->u.quantifier.min,
3205 curState->u.quantifier.max);
3206 #define PREPARE_REPEAT() \
3207 JS_BEGIN_MACRO \
3208 curState->index = x->cp - gData->cpbegin; \
3209 curState->continue_op = REOP_MINIMALREPEAT; \
3210 curState->continue_pc = pc; \
3211 pc += ARG_LEN; \
3212 for (k = curState->parenSoFar; k < parenSoFar; k++) \
3213 x->parens[k].index = -1; \
3214 PUSH_STATE_STACK(gData); \
3215 op = (REOp) *pc++; \
3216 JS_ASSERT(op < REOP_LIMIT); \
3217 JS_END_MACRO
3219 if (!result) {
3220 re_debug(" - ");
3222 * Non-greedy failure - try to consume another child.
3224 if (curState->u.quantifier.max == (uintN) -1 ||
3225 curState->u.quantifier.max > 0) {
3226 PREPARE_REPEAT();
3227 continue;
3229 /* Don't need to adjust pc since we're going to pop. */
3230 break;
3232 if (curState->u.quantifier.min == 0 &&
3233 x->cp == gData->cpbegin + curState->index) {
3234 /* Matched an empty string, that'll get us nowhere. */
3235 result = NULL;
3236 break;
3238 if (curState->u.quantifier.min != 0)
3239 curState->u.quantifier.min--;
3240 if (curState->u.quantifier.max != (uintN) -1)
3241 curState->u.quantifier.max--;
3242 if (curState->u.quantifier.min != 0) {
3243 PREPARE_REPEAT();
3244 continue;
3246 curState->index = x->cp - gData->cpbegin;
3247 curState->parenSoFar = parenSoFar;
3248 PUSH_STATE_STACK(gData);
3249 if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
3250 pc, x, x->cp,
3251 curState->parenSoFar,
3252 parenSoFar - curState->parenSoFar)) {
3253 goto bad;
3255 --gData->stateStackTop;
3256 pc = pc + GET_OFFSET(pc);
3257 op = (REOp) *pc++;
3258 JS_ASSERT(op < REOP_LIMIT);
3259 continue;
3260 default:
3261 JS_ASSERT(JS_FALSE);
3262 result = NULL;
3264 break_switch:;
3268 * If the match failed and there's a backtrack option, take it.
3269 * Otherwise this is a complete and utter failure.
3271 if (!result) {
3272 if (gData->cursz == 0)
3273 return NULL;
3274 if (!JS_CHECK_OPERATION_LIMIT(gData->cx, JSOW_JUMP)) {
3275 gData->ok = JS_FALSE;
3276 return NULL;
3279 /* Potentially detect explosive regex here. */
3280 gData->backTrackCount++;
3281 if (gData->backTrackLimit &&
3282 gData->backTrackCount >= gData->backTrackLimit) {
3283 JS_ReportErrorNumber(gData->cx, js_GetErrorMessage, NULL,
3284 JSMSG_REGEXP_TOO_COMPLEX);
3285 gData->ok = JS_FALSE;
3286 return NULL;
3289 backTrackData = gData->backTrackSP;
3290 gData->cursz = backTrackData->sz;
3291 gData->backTrackSP =
3292 (REBackTrackData *) ((char *)backTrackData - backTrackData->sz);
3293 x->cp = backTrackData->cp;
3294 pc = backTrackData->backtrack_pc;
3295 op = (REOp) backTrackData->backtrack_op;
3296 JS_ASSERT(op < REOP_LIMIT);
3297 gData->stateStackTop = backTrackData->saveStateStackTop;
3298 JS_ASSERT(gData->stateStackTop);
3300 memcpy(gData->stateStack, backTrackData + 1,
3301 sizeof(REProgState) * backTrackData->saveStateStackTop);
3302 curState = &gData->stateStack[gData->stateStackTop - 1];
3304 if (backTrackData->parenCount) {
3305 memcpy(&x->parens[backTrackData->parenIndex],
3306 (char *)(backTrackData + 1) +
3307 sizeof(REProgState) * backTrackData->saveStateStackTop,
3308 sizeof(RECapture) * backTrackData->parenCount);
3309 parenSoFar = backTrackData->parenIndex + backTrackData->parenCount;
3310 } else {
3311 for (k = curState->parenSoFar; k < parenSoFar; k++)
3312 x->parens[k].index = -1;
3313 parenSoFar = curState->parenSoFar;
3316 re_debug("\tBT_Pop: %ld,%ld",
3317 (unsigned long) backTrackData->parenIndex,
3318 (unsigned long) backTrackData->parenCount);
3319 continue;
3321 x = result;
3324 * Continue with the expression.
3326 op = (REOp)*pc++;
3327 JS_ASSERT(op < REOP_LIMIT);
3330 bad:
3331 re_debug("\n");
3332 return NULL;
3334 good:
3335 re_debug("\n");
3336 return x;
3339 static REMatchState *
3340 MatchRegExp(REGlobalData *gData, REMatchState *x)
3342 REMatchState *result;
3343 const jschar *cp = x->cp;
3344 const jschar *cp2;
3345 uintN j;
3348 * Have to include the position beyond the last character
3349 * in order to detect end-of-input/line condition.
3351 for (cp2 = cp; cp2 <= gData->cpend; cp2++) {
3352 gData->skipped = cp2 - cp;
3353 x->cp = cp2;
3354 for (j = 0; j < gData->regexp->parenCount; j++)
3355 x->parens[j].index = -1;
3356 result = ExecuteREBytecode(gData, x);
3357 if (!gData->ok || result || (gData->regexp->flags & JSREG_STICKY))
3358 return result;
3359 gData->backTrackSP = gData->backTrackStack;
3360 gData->cursz = 0;
3361 gData->stateStackTop = 0;
3362 cp2 = cp + gData->skipped;
3364 return NULL;
3367 #define MIN_BACKTRACK_LIMIT 400000
3369 static REMatchState *
3370 InitMatch(JSContext *cx, REGlobalData *gData, JSRegExp *re, size_t length)
3372 REMatchState *result;
3373 uintN i;
3375 gData->backTrackStackSize = INITIAL_BACKTRACK;
3376 JS_ARENA_ALLOCATE_CAST(gData->backTrackStack, REBackTrackData *,
3377 &cx->regexpPool,
3378 INITIAL_BACKTRACK);
3379 if (!gData->backTrackStack)
3380 goto bad;
3382 gData->backTrackSP = gData->backTrackStack;
3383 gData->cursz = 0;
3384 gData->backTrackCount = 0;
3385 gData->backTrackLimit = 0;
3386 if (JS_GetOptions(cx) & JSOPTION_RELIMIT) {
3387 gData->backTrackLimit = length * length * length; /* O(n^3) */
3388 if (gData->backTrackLimit < MIN_BACKTRACK_LIMIT)
3389 gData->backTrackLimit = MIN_BACKTRACK_LIMIT;
3392 gData->stateStackLimit = INITIAL_STATESTACK;
3393 JS_ARENA_ALLOCATE_CAST(gData->stateStack, REProgState *,
3394 &cx->regexpPool,
3395 sizeof(REProgState) * INITIAL_STATESTACK);
3396 if (!gData->stateStack)
3397 goto bad;
3399 gData->stateStackTop = 0;
3400 gData->cx = cx;
3401 gData->regexp = re;
3402 gData->ok = JS_TRUE;
3404 JS_ARENA_ALLOCATE_CAST(result, REMatchState *,
3405 &cx->regexpPool,
3406 offsetof(REMatchState, parens)
3407 + re->parenCount * sizeof(RECapture));
3408 if (!result)
3409 goto bad;
3411 for (i = 0; i < re->classCount; i++) {
3412 if (!re->classList[i].converted &&
3413 !ProcessCharSet(gData, &re->classList[i])) {
3414 return NULL;
3418 return result;
3420 bad:
3421 js_ReportOutOfScriptQuota(cx);
3422 gData->ok = JS_FALSE;
3423 return NULL;
3426 JSBool
3427 js_ExecuteRegExp(JSContext *cx, JSRegExp *re, JSString *str, size_t *indexp,
3428 JSBool test, jsval *rval)
3430 REGlobalData gData;
3431 REMatchState *x, *result;
3433 const jschar *cp, *ep;
3434 size_t i, length, start;
3435 JSSubString *morepar;
3436 JSBool ok;
3437 JSRegExpStatics *res;
3438 ptrdiff_t matchlen;
3439 uintN num, morenum;
3440 JSString *parstr, *matchstr;
3441 JSObject *obj;
3443 RECapture *parsub = NULL;
3444 void *mark;
3445 int64 *timestamp;
3448 * It's safe to load from cp because JSStrings have a zero at the end,
3449 * and we never let cp get beyond cpend.
3451 start = *indexp;
3452 JSSTRING_CHARS_AND_LENGTH(str, cp, length);
3453 if (start > length)
3454 start = length;
3455 gData.cpbegin = cp;
3456 gData.cpend = cp + length;
3457 cp += start;
3458 gData.start = start;
3459 gData.skipped = 0;
3461 if (!cx->regexpPool.first.next) {
3463 * The first arena in the regexpPool must have a timestamp at its base.
3465 JS_ARENA_ALLOCATE_CAST(timestamp, int64 *,
3466 &cx->regexpPool, sizeof *timestamp);
3467 if (!timestamp)
3468 return JS_FALSE;
3469 *timestamp = JS_Now();
3471 mark = JS_ARENA_MARK(&cx->regexpPool);
3473 x = InitMatch(cx, &gData, re, length);
3475 if (!x) {
3476 ok = JS_FALSE;
3477 goto out;
3479 x->cp = cp;
3482 * Call the recursive matcher to do the real work. Return null on mismatch
3483 * whether testing or not. On match, return an extended Array object.
3485 result = MatchRegExp(&gData, x);
3486 ok = gData.ok;
3487 if (!ok)
3488 goto out;
3489 if (!result) {
3490 *rval = JSVAL_NULL;
3491 goto out;
3493 cp = result->cp;
3494 i = cp - gData.cpbegin;
3495 *indexp = i;
3496 matchlen = i - (start + gData.skipped);
3497 ep = cp;
3498 cp -= matchlen;
3500 if (test) {
3502 * Testing for a match and updating cx->regExpStatics: don't allocate
3503 * an array object, do return true.
3505 *rval = JSVAL_TRUE;
3507 /* Avoid warning. (gcc doesn't detect that obj is needed iff !test); */
3508 obj = NULL;
3509 } else {
3511 * The array returned on match has element 0 bound to the matched
3512 * string, elements 1 through state.parenCount bound to the paren
3513 * matches, an index property telling the length of the left context,
3514 * and an input property referring to the input string.
3516 obj = js_NewSlowArrayObject(cx);
3517 if (!obj) {
3518 ok = JS_FALSE;
3519 goto out;
3521 *rval = OBJECT_TO_JSVAL(obj);
3523 #define DEFVAL(val, id) { \
3524 ok = js_DefineProperty(cx, obj, id, val, \
3525 JS_PropertyStub, JS_PropertyStub, \
3526 JSPROP_ENUMERATE, NULL); \
3527 if (!ok) { \
3528 cx->weakRoots.newborn[GCX_OBJECT] = NULL; \
3529 cx->weakRoots.newborn[GCX_STRING] = NULL; \
3530 goto out; \
3534 matchstr = js_NewStringCopyN(cx, cp, matchlen);
3535 if (!matchstr) {
3536 cx->weakRoots.newborn[GCX_OBJECT] = NULL;
3537 ok = JS_FALSE;
3538 goto out;
3540 DEFVAL(STRING_TO_JSVAL(matchstr), INT_TO_JSID(0));
3543 res = &cx->regExpStatics;
3544 res->input = str;
3545 res->parenCount = re->parenCount;
3546 if (re->parenCount == 0) {
3547 res->lastParen = js_EmptySubString;
3548 } else {
3549 for (num = 0; num < re->parenCount; num++) {
3550 parsub = &result->parens[num];
3551 if (num < 9) {
3552 if (parsub->index == -1) {
3553 res->parens[num].chars = NULL;
3554 res->parens[num].length = 0;
3555 } else {
3556 res->parens[num].chars = gData.cpbegin + parsub->index;
3557 res->parens[num].length = parsub->length;
3559 } else {
3560 morenum = num - 9;
3561 morepar = res->moreParens;
3562 if (!morepar) {
3563 res->moreLength = 10;
3564 morepar = (JSSubString*)
3565 JS_malloc(cx, 10 * sizeof(JSSubString));
3566 } else if (morenum >= res->moreLength) {
3567 res->moreLength += 10;
3568 morepar = (JSSubString*)
3569 JS_realloc(cx, morepar,
3570 res->moreLength * sizeof(JSSubString));
3572 if (!morepar) {
3573 cx->weakRoots.newborn[GCX_OBJECT] = NULL;
3574 cx->weakRoots.newborn[GCX_STRING] = NULL;
3575 ok = JS_FALSE;
3576 goto out;
3578 res->moreParens = morepar;
3579 if (parsub->index == -1) {
3580 morepar[morenum].chars = NULL;
3581 morepar[morenum].length = 0;
3582 } else {
3583 morepar[morenum].chars = gData.cpbegin + parsub->index;
3584 morepar[morenum].length = parsub->length;
3587 if (test)
3588 continue;
3589 if (parsub->index == -1) {
3590 ok = js_DefineProperty(cx, obj, INT_TO_JSID(num + 1),
3591 JSVAL_VOID, NULL, NULL,
3592 JSPROP_ENUMERATE, NULL);
3593 } else {
3594 parstr = js_NewStringCopyN(cx, gData.cpbegin + parsub->index,
3595 parsub->length);
3596 if (!parstr) {
3597 cx->weakRoots.newborn[GCX_OBJECT] = NULL;
3598 cx->weakRoots.newborn[GCX_STRING] = NULL;
3599 ok = JS_FALSE;
3600 goto out;
3602 ok = js_DefineProperty(cx, obj, INT_TO_JSID(num + 1),
3603 STRING_TO_JSVAL(parstr), NULL, NULL,
3604 JSPROP_ENUMERATE, NULL);
3606 if (!ok) {
3607 cx->weakRoots.newborn[GCX_OBJECT] = NULL;
3608 cx->weakRoots.newborn[GCX_STRING] = NULL;
3609 goto out;
3612 if (parsub->index == -1) {
3613 res->lastParen = js_EmptySubString;
3614 } else {
3615 res->lastParen.chars = gData.cpbegin + parsub->index;
3616 res->lastParen.length = parsub->length;
3620 if (!test) {
3622 * Define the index and input properties last for better for/in loop
3623 * order (so they come after the elements).
3625 DEFVAL(INT_TO_JSVAL(start + gData.skipped),
3626 ATOM_TO_JSID(cx->runtime->atomState.indexAtom));
3627 DEFVAL(STRING_TO_JSVAL(str),
3628 ATOM_TO_JSID(cx->runtime->atomState.inputAtom));
3631 #undef DEFVAL
3633 res->lastMatch.chars = cp;
3634 res->lastMatch.length = matchlen;
3637 * For JS1.3 and ECMAv2, emulate Perl5 exactly:
3639 * js1.3 "hi", "hi there" "hihitherehi therebye"
3641 res->leftContext.chars = JSSTRING_CHARS(str);
3642 res->leftContext.length = start + gData.skipped;
3643 res->rightContext.chars = ep;
3644 res->rightContext.length = gData.cpend - ep;
3646 out:
3647 JS_ARENA_RELEASE(&cx->regexpPool, mark);
3648 return ok;
3651 /************************************************************************/
3653 #define REGEXP_PROP_ATTRS (JSPROP_PERMANENT | JSPROP_SHARED)
3654 #define RO_REGEXP_PROP_ATTRS (REGEXP_PROP_ATTRS | JSPROP_READONLY)
3656 static JSPropertySpec regexp_props[] = {
3657 {"source", REGEXP_SOURCE, RO_REGEXP_PROP_ATTRS,0,0},
3658 {"global", REGEXP_GLOBAL, RO_REGEXP_PROP_ATTRS,0,0},
3659 {"ignoreCase", REGEXP_IGNORE_CASE, RO_REGEXP_PROP_ATTRS,0,0},
3660 {"lastIndex", REGEXP_LAST_INDEX, REGEXP_PROP_ATTRS,0,0},
3661 {"multiline", REGEXP_MULTILINE, RO_REGEXP_PROP_ATTRS,0,0},
3662 {"sticky", REGEXP_STICKY, RO_REGEXP_PROP_ATTRS,0,0},
3663 {0,0,0,0,0}
3666 static JSBool
3667 regexp_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3669 jsint slot;
3670 JSRegExp *re;
3672 if (!JSVAL_IS_INT(id))
3673 return JS_TRUE;
3674 while (OBJ_GET_CLASS(cx, obj) != &js_RegExpClass) {
3675 obj = OBJ_GET_PROTO(cx, obj);
3676 if (!obj)
3677 return JS_TRUE;
3679 slot = JSVAL_TO_INT(id);
3680 if (slot == REGEXP_LAST_INDEX)
3681 return JS_GetReservedSlot(cx, obj, 0, vp);
3683 JS_LOCK_OBJ(cx, obj);
3684 re = (JSRegExp *) JS_GetPrivate(cx, obj);
3685 if (re) {
3686 switch (slot) {
3687 case REGEXP_SOURCE:
3688 *vp = STRING_TO_JSVAL(re->source);
3689 break;
3690 case REGEXP_GLOBAL:
3691 *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_GLOB) != 0);
3692 break;
3693 case REGEXP_IGNORE_CASE:
3694 *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_FOLD) != 0);
3695 break;
3696 case REGEXP_MULTILINE:
3697 *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_MULTILINE) != 0);
3698 break;
3699 case REGEXP_STICKY:
3700 *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_STICKY) != 0);
3701 break;
3704 JS_UNLOCK_OBJ(cx, obj);
3705 return JS_TRUE;
3708 static JSBool
3709 regexp_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3711 JSBool ok;
3712 jsint slot;
3713 jsdouble lastIndex;
3715 ok = JS_TRUE;
3716 if (!JSVAL_IS_INT(id))
3717 return ok;
3718 while (OBJ_GET_CLASS(cx, obj) != &js_RegExpClass) {
3719 obj = OBJ_GET_PROTO(cx, obj);
3720 if (!obj)
3721 return JS_TRUE;
3723 slot = JSVAL_TO_INT(id);
3724 if (slot == REGEXP_LAST_INDEX) {
3725 if (!JS_ValueToNumber(cx, *vp, &lastIndex))
3726 return JS_FALSE;
3727 lastIndex = js_DoubleToInteger(lastIndex);
3728 ok = JS_NewNumberValue(cx, lastIndex, vp) &&
3729 JS_SetReservedSlot(cx, obj, 0, *vp);
3731 return ok;
3735 * RegExp class static properties and their Perl counterparts:
3737 * RegExp.input $_
3738 * RegExp.multiline $*
3739 * RegExp.lastMatch $&
3740 * RegExp.lastParen $+
3741 * RegExp.leftContext $`
3742 * RegExp.rightContext $'
3744 enum regexp_static_tinyid {
3745 REGEXP_STATIC_INPUT = -1,
3746 REGEXP_STATIC_MULTILINE = -2,
3747 REGEXP_STATIC_LAST_MATCH = -3,
3748 REGEXP_STATIC_LAST_PAREN = -4,
3749 REGEXP_STATIC_LEFT_CONTEXT = -5,
3750 REGEXP_STATIC_RIGHT_CONTEXT = -6
3753 JSBool
3754 js_InitRegExpStatics(JSContext *cx, JSRegExpStatics *res)
3756 JS_ClearRegExpStatics(cx);
3757 return js_AddRoot(cx, &res->input, "res->input");
3760 void
3761 js_FreeRegExpStatics(JSContext *cx, JSRegExpStatics *res)
3763 if (res->moreParens) {
3764 JS_free(cx, res->moreParens);
3765 res->moreParens = NULL;
3767 js_RemoveRoot(cx->runtime, &res->input);
3770 static JSBool
3771 regexp_static_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3773 jsint slot;
3774 JSRegExpStatics *res;
3775 JSString *str;
3776 JSSubString *sub;
3778 res = &cx->regExpStatics;
3779 if (!JSVAL_IS_INT(id))
3780 return JS_TRUE;
3781 slot = JSVAL_TO_INT(id);
3782 switch (slot) {
3783 case REGEXP_STATIC_INPUT:
3784 *vp = res->input ? STRING_TO_JSVAL(res->input)
3785 : JS_GetEmptyStringValue(cx);
3786 return JS_TRUE;
3787 case REGEXP_STATIC_MULTILINE:
3788 *vp = BOOLEAN_TO_JSVAL(res->multiline);
3789 return JS_TRUE;
3790 case REGEXP_STATIC_LAST_MATCH:
3791 sub = &res->lastMatch;
3792 break;
3793 case REGEXP_STATIC_LAST_PAREN:
3794 sub = &res->lastParen;
3795 break;
3796 case REGEXP_STATIC_LEFT_CONTEXT:
3797 sub = &res->leftContext;
3798 break;
3799 case REGEXP_STATIC_RIGHT_CONTEXT:
3800 sub = &res->rightContext;
3801 break;
3802 default:
3803 sub = REGEXP_PAREN_SUBSTRING(res, slot);
3804 break;
3806 str = js_NewStringCopyN(cx, sub->chars, sub->length);
3807 if (!str)
3808 return JS_FALSE;
3809 *vp = STRING_TO_JSVAL(str);
3810 return JS_TRUE;
3813 static JSBool
3814 regexp_static_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3816 JSRegExpStatics *res;
3818 if (!JSVAL_IS_INT(id))
3819 return JS_TRUE;
3820 res = &cx->regExpStatics;
3821 /* XXX use if-else rather than switch to keep MSVC1.52 from crashing */
3822 if (JSVAL_TO_INT(id) == REGEXP_STATIC_INPUT) {
3823 if (!JSVAL_IS_STRING(*vp) &&
3824 !JS_ConvertValue(cx, *vp, JSTYPE_STRING, vp)) {
3825 return JS_FALSE;
3827 res->input = JSVAL_TO_STRING(*vp);
3828 } else if (JSVAL_TO_INT(id) == REGEXP_STATIC_MULTILINE) {
3829 if (!JSVAL_IS_BOOLEAN(*vp) &&
3830 !JS_ConvertValue(cx, *vp, JSTYPE_BOOLEAN, vp)) {
3831 return JS_FALSE;
3833 res->multiline = JSVAL_TO_BOOLEAN(*vp);
3835 return JS_TRUE;
3837 #define REGEXP_STATIC_PROP_ATTRS (REGEXP_PROP_ATTRS | JSPROP_ENUMERATE)
3838 #define RO_REGEXP_STATIC_PROP_ATTRS (REGEXP_STATIC_PROP_ATTRS | JSPROP_READONLY)
3840 static JSPropertySpec regexp_static_props[] = {
3841 {"input",
3842 REGEXP_STATIC_INPUT,
3843 REGEXP_STATIC_PROP_ATTRS,
3844 regexp_static_getProperty, regexp_static_setProperty},
3845 {"multiline",
3846 REGEXP_STATIC_MULTILINE,
3847 REGEXP_STATIC_PROP_ATTRS,
3848 regexp_static_getProperty, regexp_static_setProperty},
3849 {"lastMatch",
3850 REGEXP_STATIC_LAST_MATCH,
3851 RO_REGEXP_STATIC_PROP_ATTRS,
3852 regexp_static_getProperty, regexp_static_getProperty},
3853 {"lastParen",
3854 REGEXP_STATIC_LAST_PAREN,
3855 RO_REGEXP_STATIC_PROP_ATTRS,
3856 regexp_static_getProperty, regexp_static_getProperty},
3857 {"leftContext",
3858 REGEXP_STATIC_LEFT_CONTEXT,
3859 RO_REGEXP_STATIC_PROP_ATTRS,
3860 regexp_static_getProperty, regexp_static_getProperty},
3861 {"rightContext",
3862 REGEXP_STATIC_RIGHT_CONTEXT,
3863 RO_REGEXP_STATIC_PROP_ATTRS,
3864 regexp_static_getProperty, regexp_static_getProperty},
3866 /* XXX should have block scope and local $1, etc. */
3867 {"$1", 0, RO_REGEXP_STATIC_PROP_ATTRS,
3868 regexp_static_getProperty, regexp_static_getProperty},
3869 {"$2", 1, RO_REGEXP_STATIC_PROP_ATTRS,
3870 regexp_static_getProperty, regexp_static_getProperty},
3871 {"$3", 2, RO_REGEXP_STATIC_PROP_ATTRS,
3872 regexp_static_getProperty, regexp_static_getProperty},
3873 {"$4", 3, RO_REGEXP_STATIC_PROP_ATTRS,
3874 regexp_static_getProperty, regexp_static_getProperty},
3875 {"$5", 4, RO_REGEXP_STATIC_PROP_ATTRS,
3876 regexp_static_getProperty, regexp_static_getProperty},
3877 {"$6", 5, RO_REGEXP_STATIC_PROP_ATTRS,
3878 regexp_static_getProperty, regexp_static_getProperty},
3879 {"$7", 6, RO_REGEXP_STATIC_PROP_ATTRS,
3880 regexp_static_getProperty, regexp_static_getProperty},
3881 {"$8", 7, RO_REGEXP_STATIC_PROP_ATTRS,
3882 regexp_static_getProperty, regexp_static_getProperty},
3883 {"$9", 8, RO_REGEXP_STATIC_PROP_ATTRS,
3884 regexp_static_getProperty, regexp_static_getProperty},
3886 {0,0,0,0,0}
3889 static void
3890 regexp_finalize(JSContext *cx, JSObject *obj)
3892 JSRegExp *re;
3894 re = (JSRegExp *) JS_GetPrivate(cx, obj);
3895 if (!re)
3896 return;
3897 js_DestroyRegExp(cx, re);
3900 /* Forward static prototype. */
3901 static JSBool
3902 regexp_exec_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
3903 JSBool test, jsval *rval);
3905 static JSBool
3906 regexp_call(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
3908 return regexp_exec_sub(cx, JSVAL_TO_OBJECT(argv[-2]), argc, argv,
3909 JS_FALSE, rval);
3912 #if JS_HAS_XDR
3914 #include "jsxdrapi.h"
3916 static JSBool
3917 regexp_xdrObject(JSXDRState *xdr, JSObject **objp)
3919 JSRegExp *re;
3920 JSString *source;
3921 uint32 flagsword;
3922 JSObject *obj;
3924 if (xdr->mode == JSXDR_ENCODE) {
3925 re = (JSRegExp *) JS_GetPrivate(xdr->cx, *objp);
3926 if (!re)
3927 return JS_FALSE;
3928 source = re->source;
3929 flagsword = (uint32)re->flags;
3931 if (!JS_XDRString(xdr, &source) ||
3932 !JS_XDRUint32(xdr, &flagsword)) {
3933 return JS_FALSE;
3935 if (xdr->mode == JSXDR_DECODE) {
3936 obj = js_NewObject(xdr->cx, &js_RegExpClass, NULL, NULL, 0);
3937 if (!obj)
3938 return JS_FALSE;
3939 STOBJ_CLEAR_PARENT(obj);
3940 STOBJ_CLEAR_PROTO(obj);
3941 re = js_NewRegExp(xdr->cx, NULL, source, (uint8)flagsword, JS_FALSE);
3942 if (!re)
3943 return JS_FALSE;
3944 if (!JS_SetPrivate(xdr->cx, obj, re) ||
3945 !js_SetLastIndex(xdr->cx, obj, 0)) {
3946 js_DestroyRegExp(xdr->cx, re);
3947 return JS_FALSE;
3949 *objp = obj;
3951 return JS_TRUE;
3954 #else /* !JS_HAS_XDR */
3956 #define regexp_xdrObject NULL
3958 #endif /* !JS_HAS_XDR */
3960 static void
3961 regexp_trace(JSTracer *trc, JSObject *obj)
3963 JSRegExp *re;
3965 re = (JSRegExp *) JS_GetPrivate(trc->context, obj);
3966 if (re && re->source)
3967 JS_CALL_STRING_TRACER(trc, re->source, "source");
3970 JSClass js_RegExpClass = {
3971 js_RegExp_str,
3972 JSCLASS_HAS_PRIVATE | JSCLASS_HAS_RESERVED_SLOTS(1) |
3973 JSCLASS_MARK_IS_TRACE | JSCLASS_HAS_CACHED_PROTO(JSProto_RegExp),
3974 JS_PropertyStub, JS_PropertyStub,
3975 regexp_getProperty, regexp_setProperty,
3976 JS_EnumerateStub, JS_ResolveStub,
3977 JS_ConvertStub, regexp_finalize,
3978 NULL, NULL,
3979 regexp_call, NULL,
3980 regexp_xdrObject, NULL,
3981 JS_CLASS_TRACE(regexp_trace), 0
3984 static const jschar empty_regexp_ucstr[] = {'(', '?', ':', ')', 0};
3986 JSBool
3987 js_regexp_toString(JSContext *cx, JSObject *obj, jsval *vp)
3989 JSRegExp *re;
3990 const jschar *source;
3991 jschar *chars;
3992 size_t length, nflags;
3993 uintN flags;
3994 JSString *str;
3996 if (!JS_InstanceOf(cx, obj, &js_RegExpClass, vp + 2))
3997 return JS_FALSE;
3998 JS_LOCK_OBJ(cx, obj);
3999 re = (JSRegExp *) JS_GetPrivate(cx, obj);
4000 if (!re) {
4001 JS_UNLOCK_OBJ(cx, obj);
4002 *vp = STRING_TO_JSVAL(cx->runtime->emptyString);
4003 return JS_TRUE;
4006 JSSTRING_CHARS_AND_LENGTH(re->source, source, length);
4007 if (length == 0) {
4008 source = empty_regexp_ucstr;
4009 length = JS_ARRAY_LENGTH(empty_regexp_ucstr) - 1;
4011 length += 2;
4012 nflags = 0;
4013 for (flags = re->flags; flags != 0; flags &= flags - 1)
4014 nflags++;
4015 chars = (jschar*) JS_malloc(cx, (length + nflags + 1) * sizeof(jschar));
4016 if (!chars) {
4017 JS_UNLOCK_OBJ(cx, obj);
4018 return JS_FALSE;
4021 chars[0] = '/';
4022 js_strncpy(&chars[1], source, length - 2);
4023 chars[length-1] = '/';
4024 if (nflags) {
4025 if (re->flags & JSREG_GLOB)
4026 chars[length++] = 'g';
4027 if (re->flags & JSREG_FOLD)
4028 chars[length++] = 'i';
4029 if (re->flags & JSREG_MULTILINE)
4030 chars[length++] = 'm';
4031 if (re->flags & JSREG_STICKY)
4032 chars[length++] = 'y';
4034 JS_UNLOCK_OBJ(cx, obj);
4035 chars[length] = 0;
4037 str = js_NewString(cx, chars, length);
4038 if (!str) {
4039 JS_free(cx, chars);
4040 return JS_FALSE;
4042 *vp = STRING_TO_JSVAL(str);
4043 return JS_TRUE;
4046 static JSBool
4047 regexp_toString(JSContext *cx, uintN argc, jsval *vp)
4049 JSObject *obj;
4051 obj = JS_THIS_OBJECT(cx, vp);
4052 return obj && js_regexp_toString(cx, obj, vp);
4055 static JSBool
4056 regexp_compile_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
4057 jsval *rval)
4059 JSString *opt, *str;
4060 JSRegExp *oldre, *re;
4061 JSBool ok, ok2;
4062 JSObject *obj2;
4063 size_t length, nbytes;
4064 const jschar *cp, *start, *end;
4065 jschar *nstart, *ncp, *tmp;
4067 if (!JS_InstanceOf(cx, obj, &js_RegExpClass, argv))
4068 return JS_FALSE;
4069 opt = NULL;
4070 if (argc == 0) {
4071 str = cx->runtime->emptyString;
4072 } else {
4073 if (JSVAL_IS_OBJECT(argv[0])) {
4075 * If we get passed in a RegExp object we construct a new
4076 * RegExp that is a duplicate of it by re-compiling the
4077 * original source code. ECMA requires that it be an error
4078 * here if the flags are specified. (We must use the flags
4079 * from the original RegExp also).
4081 obj2 = JSVAL_TO_OBJECT(argv[0]);
4082 if (obj2 && OBJ_GET_CLASS(cx, obj2) == &js_RegExpClass) {
4083 if (argc >= 2 && !JSVAL_IS_VOID(argv[1])) { /* 'flags' passed */
4084 JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
4085 JSMSG_NEWREGEXP_FLAGGED);
4086 return JS_FALSE;
4088 JS_LOCK_OBJ(cx, obj2);
4089 re = (JSRegExp *) JS_GetPrivate(cx, obj2);
4090 if (!re) {
4091 JS_UNLOCK_OBJ(cx, obj2);
4092 return JS_FALSE;
4094 re = js_NewRegExp(cx, NULL, re->source, re->flags, JS_FALSE);
4095 JS_UNLOCK_OBJ(cx, obj2);
4096 goto created;
4099 str = js_ValueToString(cx, argv[0]);
4100 if (!str)
4101 return JS_FALSE;
4102 argv[0] = STRING_TO_JSVAL(str);
4103 if (argc > 1) {
4104 if (JSVAL_IS_VOID(argv[1])) {
4105 opt = NULL;
4106 } else {
4107 opt = js_ValueToString(cx, argv[1]);
4108 if (!opt)
4109 return JS_FALSE;
4110 argv[1] = STRING_TO_JSVAL(opt);
4114 /* Escape any naked slashes in the regexp source. */
4115 JSSTRING_CHARS_AND_LENGTH(str, start, length);
4116 end = start + length;
4117 nstart = ncp = NULL;
4118 for (cp = start; cp < end; cp++) {
4119 if (*cp == '/' && (cp == start || cp[-1] != '\\')) {
4120 nbytes = (++length + 1) * sizeof(jschar);
4121 if (!nstart) {
4122 nstart = (jschar *) JS_malloc(cx, nbytes);
4123 if (!nstart)
4124 return JS_FALSE;
4125 ncp = nstart + (cp - start);
4126 js_strncpy(nstart, start, cp - start);
4127 } else {
4128 tmp = (jschar *) JS_realloc(cx, nstart, nbytes);
4129 if (!tmp) {
4130 JS_free(cx, nstart);
4131 return JS_FALSE;
4133 ncp = tmp + (ncp - nstart);
4134 nstart = tmp;
4136 *ncp++ = '\\';
4138 if (nstart)
4139 *ncp++ = *cp;
4142 if (nstart) {
4143 /* Don't forget to store the backstop after the new string. */
4144 JS_ASSERT((size_t)(ncp - nstart) == length);
4145 *ncp = 0;
4146 str = js_NewString(cx, nstart, length);
4147 if (!str) {
4148 JS_free(cx, nstart);
4149 return JS_FALSE;
4151 argv[0] = STRING_TO_JSVAL(str);
4155 re = js_NewRegExpOpt(cx, str, opt, JS_FALSE);
4156 created:
4157 if (!re)
4158 return JS_FALSE;
4159 JS_LOCK_OBJ(cx, obj);
4160 oldre = (JSRegExp *) JS_GetPrivate(cx, obj);
4161 ok = JS_SetPrivate(cx, obj, re);
4162 ok2 = js_SetLastIndex(cx, obj, 0);
4163 JS_UNLOCK_OBJ(cx, obj);
4164 if (!ok) {
4165 js_DestroyRegExp(cx, re);
4166 return JS_FALSE;
4168 if (oldre)
4169 js_DestroyRegExp(cx, oldre);
4170 *rval = OBJECT_TO_JSVAL(obj);
4171 return ok2;
4174 static JSBool
4175 regexp_compile(JSContext *cx, uintN argc, jsval *vp)
4177 JSObject *obj;
4179 obj = JS_THIS_OBJECT(cx, vp);
4180 return obj && regexp_compile_sub(cx, obj, argc, vp + 2, vp);
4183 static JSBool
4184 regexp_exec_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
4185 JSBool test, jsval *rval)
4187 JSBool ok, sticky;
4188 JSRegExp *re;
4189 jsdouble lastIndex;
4190 JSString *str;
4191 size_t i;
4193 ok = JS_InstanceOf(cx, obj, &js_RegExpClass, argv);
4194 if (!ok)
4195 return JS_FALSE;
4196 JS_LOCK_OBJ(cx, obj);
4197 re = (JSRegExp *) JS_GetPrivate(cx, obj);
4198 if (!re) {
4199 JS_UNLOCK_OBJ(cx, obj);
4200 return JS_TRUE;
4203 /* NB: we must reach out: after this paragraph, in order to drop re. */
4204 HOLD_REGEXP(cx, re);
4205 sticky = (re->flags & JSREG_STICKY) != 0;
4206 if (re->flags & (JSREG_GLOB | JSREG_STICKY)) {
4207 ok = js_GetLastIndex(cx, obj, &lastIndex);
4208 } else {
4209 lastIndex = 0;
4211 JS_UNLOCK_OBJ(cx, obj);
4212 if (!ok)
4213 goto out;
4215 /* Now that obj is unlocked, it's safe to (potentially) grab the GC lock. */
4216 if (argc == 0) {
4217 str = cx->regExpStatics.input;
4218 if (!str) {
4219 const char *bytes = js_GetStringBytes(cx, re->source);
4221 if (bytes) {
4222 JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
4223 JSMSG_NO_INPUT,
4224 bytes,
4225 (re->flags & JSREG_GLOB) ? "g" : "",
4226 (re->flags & JSREG_FOLD) ? "i" : "",
4227 (re->flags & JSREG_MULTILINE) ? "m" : "",
4228 (re->flags & JSREG_STICKY) ? "y" : "");
4230 ok = JS_FALSE;
4231 goto out;
4233 } else {
4234 str = js_ValueToString(cx, argv[0]);
4235 if (!str) {
4236 ok = JS_FALSE;
4237 goto out;
4239 argv[0] = STRING_TO_JSVAL(str);
4242 if (lastIndex < 0 || JSSTRING_LENGTH(str) < lastIndex) {
4243 ok = js_SetLastIndex(cx, obj, 0);
4244 *rval = JSVAL_NULL;
4245 } else {
4246 i = (size_t) lastIndex;
4247 ok = js_ExecuteRegExp(cx, re, str, &i, test, rval);
4248 if (ok &&
4249 ((re->flags & JSREG_GLOB) || (*rval != JSVAL_NULL && sticky))) {
4250 ok = js_SetLastIndex(cx, obj, (*rval == JSVAL_NULL) ? 0 : i);
4254 out:
4255 DROP_REGEXP(cx, re);
4256 return ok;
4259 static JSBool
4260 regexp_exec(JSContext *cx, uintN argc, jsval *vp)
4262 return regexp_exec_sub(cx, JS_THIS_OBJECT(cx, vp), argc, vp + 2, JS_FALSE,
4263 vp);
4266 static JSBool
4267 regexp_test(JSContext *cx, uintN argc, jsval *vp)
4269 if (!regexp_exec_sub(cx, JS_THIS_OBJECT(cx, vp), argc, vp + 2, JS_TRUE, vp))
4270 return JS_FALSE;
4271 if (*vp != JSVAL_TRUE)
4272 *vp = JSVAL_FALSE;
4273 return JS_TRUE;
4276 static JSFunctionSpec regexp_methods[] = {
4277 #if JS_HAS_TOSOURCE
4278 JS_FN(js_toSource_str, regexp_toString, 0,0),
4279 #endif
4280 JS_FN(js_toString_str, regexp_toString, 0,0),
4281 JS_FN("compile", regexp_compile, 2,0),
4282 JS_FN("exec", regexp_exec, 1,0),
4283 JS_FN("test", regexp_test, 1,0),
4284 JS_FS_END
4287 static JSBool
4288 RegExp(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
4290 if (!(cx->fp->flags & JSFRAME_CONSTRUCTING)) {
4292 * If first arg is regexp and no flags are given, just return the arg.
4293 * (regexp_compile_sub detects the regexp + flags case and throws a
4294 * TypeError.) See 10.15.3.1.
4296 if ((argc < 2 || JSVAL_IS_VOID(argv[1])) &&
4297 !JSVAL_IS_PRIMITIVE(argv[0]) &&
4298 OBJ_GET_CLASS(cx, JSVAL_TO_OBJECT(argv[0])) == &js_RegExpClass) {
4299 *rval = argv[0];
4300 return JS_TRUE;
4303 /* Otherwise, replace obj with a new RegExp object. */
4304 obj = js_NewObject(cx, &js_RegExpClass, NULL, NULL, 0);
4305 if (!obj)
4306 return JS_FALSE;
4309 * regexp_compile_sub does not use rval to root its temporaries so we
4310 * can use it to root obj.
4312 *rval = OBJECT_TO_JSVAL(obj);
4314 return regexp_compile_sub(cx, obj, argc, argv, rval);
4317 JSObject *
4318 js_InitRegExpClass(JSContext *cx, JSObject *obj)
4320 JSObject *proto, *ctor;
4321 jsval rval;
4323 proto = JS_InitClass(cx, obj, NULL, &js_RegExpClass, RegExp, 1,
4324 regexp_props, regexp_methods,
4325 regexp_static_props, NULL);
4327 if (!proto || !(ctor = JS_GetConstructor(cx, proto)))
4328 return NULL;
4329 if (!JS_AliasProperty(cx, ctor, "input", "$_") ||
4330 !JS_AliasProperty(cx, ctor, "multiline", "$*") ||
4331 !JS_AliasProperty(cx, ctor, "lastMatch", "$&") ||
4332 !JS_AliasProperty(cx, ctor, "lastParen", "$+") ||
4333 !JS_AliasProperty(cx, ctor, "leftContext", "$`") ||
4334 !JS_AliasProperty(cx, ctor, "rightContext", "$'")) {
4335 goto bad;
4338 /* Give RegExp.prototype private data so it matches the empty string. */
4339 if (!regexp_compile_sub(cx, proto, 0, NULL, &rval))
4340 goto bad;
4341 return proto;
4343 bad:
4344 JS_DeleteProperty(cx, obj, js_RegExpClass.name);
4345 return NULL;
4348 JSObject *
4349 js_NewRegExpObject(JSContext *cx, JSTokenStream *ts,
4350 jschar *chars, size_t length, uintN flags)
4352 JSString *str;
4353 JSObject *obj;
4354 JSRegExp *re;
4355 JSTempValueRooter tvr;
4357 str = js_NewStringCopyN(cx, chars, length);
4358 if (!str)
4359 return NULL;
4360 re = js_NewRegExp(cx, ts, str, flags, JS_FALSE);
4361 if (!re)
4362 return NULL;
4363 JS_PUSH_TEMP_ROOT_STRING(cx, str, &tvr);
4364 obj = js_NewObject(cx, &js_RegExpClass, NULL, NULL, 0);
4365 if (!obj || !JS_SetPrivate(cx, obj, re)) {
4366 js_DestroyRegExp(cx, re);
4367 obj = NULL;
4369 if (obj && !js_SetLastIndex(cx, obj, 0))
4370 obj = NULL;
4371 JS_POP_TEMP_ROOT(cx, &tvr);
4372 return obj;
4375 JSObject *
4376 js_CloneRegExpObject(JSContext *cx, JSObject *obj, JSObject *parent)
4378 JSObject *clone;
4379 JSRegExp *re;
4381 JS_ASSERT(OBJ_GET_CLASS(cx, obj) == &js_RegExpClass);
4382 clone = js_NewObject(cx, &js_RegExpClass, NULL, parent, 0);
4383 if (!clone)
4384 return NULL;
4385 re = (JSRegExp *) JS_GetPrivate(cx, obj);
4386 if (!JS_SetPrivate(cx, clone, re) || !js_SetLastIndex(cx, clone, 0)) {
4387 cx->weakRoots.newborn[GCX_OBJECT] = NULL;
4388 return NULL;
4390 HOLD_REGEXP(cx, re);
4391 return clone;
4394 JSBool
4395 js_GetLastIndex(JSContext *cx, JSObject *obj, jsdouble *lastIndex)
4397 jsval v;
4399 return JS_GetReservedSlot(cx, obj, 0, &v) &&
4400 JS_ValueToNumber(cx, v, lastIndex);
4403 JSBool
4404 js_SetLastIndex(JSContext *cx, JSObject *obj, jsdouble lastIndex)
4406 jsval v;
4408 return JS_NewNumberValue(cx, lastIndex, &v) &&
4409 JS_SetReservedSlot(cx, obj, 0, v);