1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
3 * ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
16 * The Original Code is Mozilla Communicator client code, released
19 * The Initial Developer of the Original Code is
20 * Netscape Communications Corporation.
21 * Portions created by the Initial Developer are Copyright (C) 1998
22 * the Initial Developer. All Rights Reserved.
26 * Alternatively, the contents of this file may be used under the terms of
27 * either of the GNU General Public License Version 2 or later (the "GPL"),
28 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
43 * JS string type implementation.
45 * A JS string is a counted array of unicode characters. To support handoff
46 * of API client memory, the chars are allocated separately from the length,
47 * necessitating a pointer after the count, to form a separately allocated
48 * string descriptor. String descriptors are GC'ed, while their chars are
49 * allocated from the malloc heap.
54 #include "jshashtable.h"
61 UNIT_STRING_LIMIT
= 256U,
62 SMALL_CHAR_LIMIT
= 128U, /* Bigger chars cannot be in a length-2 string. */
63 NUM_SMALL_CHARS
= 64U,
64 INT_STRING_LIMIT
= 256U,
65 NUM_HUNDRED_STRINGS
= 156U
69 js_GetDependentStringChars(JSString
*str
);
71 extern JSString
* JS_FASTCALL
72 js_ConcatStrings(JSContext
*cx
, JSString
*left
, JSString
*right
);
74 JS_STATIC_ASSERT(JS_BITS_PER_WORD
>= 32);
76 struct JSRopeBufferInfo
{
77 /* Number of jschars we can hold, not including null terminator. */
81 /* Forward declaration for friending. */
82 namespace js
{ namespace mjit
{
86 struct JSLinearString
;
89 * The GC-thing "string" type.
91 * In FLAT strings, the mChars field points to a flat character array owned by
92 * its GC-thing descriptor. The array is terminated at index length by a zero
93 * character and the size of the array in bytes is
94 * (length + 1) * sizeof(jschar). The terminator is purely a backstop, in case
95 * the chars pointer flows out to native code that requires \u0000 termination.
97 * A flat string with the ATOMIZED flag means that the string is hashed as
98 * an atom. This flag is used to avoid re-hashing the already-atomized string.
100 * A flat string with the EXTENSIBLE flag means that the string may change into
101 * a dependent string as part of an optimization with js_ConcatStrings:
102 * extending |str1 = "abc"| with the character |str2 = str1 + "d"| will place
103 * "d" in the extra capacity from |str1|, make that the buffer for |str2|, and
104 * turn |str1| into a dependent string of |str2|.
106 * Flat strings without the EXTENSIBLE flag can be safely accessed by multiple
109 * When the string is DEPENDENT, the string depends on characters of another
110 * string strongly referenced by the base field. The base member may point to
111 * another dependent string if chars() has not been called yet.
113 * When a string is a ROPE, it represents the lazy concatenation of other
114 * strings. In general, the nodes reachable from any rope form a dag.
116 * To allow static type-based checking that a given JSString* always points
117 * to a flat or non-rope string, the JSFlatString and JSLinearString types may
118 * be used. Instead of casting, callers should use ensureX() and assertIsX().
122 friend class js::TraceRecorder
;
123 friend class js::mjit::Compiler
;
125 friend JSAtom
*js_AtomizeString(JSContext
*cx
, JSString
*str
, uintN flags
);
128 * Not private because we want to be able to use static initializers for
129 * them. Don't use these directly! FIXME bug 614459.
131 size_t lengthAndFlags
; /* in all strings */
133 const jschar
*chars
; /* in non-rope strings */
134 JSString
*left
; /* in rope strings */
137 jschar inlineStorage
[4]; /* in short strings */
140 JSString
*right
; /* in rope strings */
141 JSString
*base
; /* in dependent strings */
142 size_t capacity
; /* in extensible flat strings */
145 JSString
*parent
; /* temporarily used during flatten */
146 size_t reserved
; /* may use for bug 615290 */
149 size_t externalStringType
; /* in external strings */
153 * The lengthAndFlags field in string headers has data arranged in the
156 * [ length (bits 4-31) ][ flags (bits 2-3) ][ type (bits 0-1) ]
158 * The length is packed in lengthAndFlags, even in string types that don't
159 * need 3 other fields, to make the length check simpler.
161 * When the string type is FLAT, the flags can contain ATOMIZED or
164 static const size_t TYPE_FLAGS_MASK
= JS_BITMASK(4);
165 static const size_t LENGTH_SHIFT
= 4;
167 static const size_t TYPE_MASK
= JS_BITMASK(2);
168 static const size_t FLAT
= 0x0;
169 static const size_t DEPENDENT
= 0x1;
170 static const size_t ROPE
= 0x2;
172 /* Allow checking 1 bit for dependent/rope strings. */
173 static const size_t DEPENDENT_BIT
= JS_BIT(0);
174 static const size_t ROPE_BIT
= JS_BIT(1);
176 static const size_t ATOMIZED
= JS_BIT(2);
177 static const size_t EXTENSIBLE
= JS_BIT(3);
180 size_t buildLengthAndFlags(size_t length
, size_t flags
) {
181 return (length
<< LENGTH_SHIFT
) | flags
;
184 inline js::gc::Cell
*asCell() {
185 return reinterpret_cast<js::gc::Cell
*>(this);
188 inline js::gc::FreeCell
*asFreeCell() {
189 return reinterpret_cast<js::gc::FreeCell
*>(this);
193 * Generous but sane length bound; the "-1" is there for comptibility with
196 static const size_t MAX_LENGTH
= (1 << 28) - 1;
198 JS_ALWAYS_INLINE
bool isDependent() const {
199 return lengthAndFlags
& DEPENDENT_BIT
;
202 JS_ALWAYS_INLINE
bool isFlat() const {
203 return (lengthAndFlags
& TYPE_MASK
) == FLAT
;
206 JS_ALWAYS_INLINE
bool isExtensible() const {
207 JS_ASSERT_IF(lengthAndFlags
& EXTENSIBLE
, isFlat());
208 return lengthAndFlags
& EXTENSIBLE
;
211 JS_ALWAYS_INLINE
bool isAtomized() const {
212 JS_ASSERT_IF(lengthAndFlags
& ATOMIZED
, isFlat());
213 return lengthAndFlags
& ATOMIZED
;
216 JS_ALWAYS_INLINE
bool isRope() const {
217 return lengthAndFlags
& ROPE_BIT
;
220 JS_ALWAYS_INLINE
size_t length() const {
221 return lengthAndFlags
>> LENGTH_SHIFT
;
224 JS_ALWAYS_INLINE
bool empty() const {
225 return lengthAndFlags
<= TYPE_FLAGS_MASK
;
228 /* This can fail by returning null and reporting an error on cx. */
229 JS_ALWAYS_INLINE
const jschar
*getChars(JSContext
*cx
) {
232 return nonRopeChars();
235 /* This can fail by returning null and reporting an error on cx. */
236 JS_ALWAYS_INLINE
const jschar
*getCharsZ(JSContext
*cx
) {
242 JS_ALWAYS_INLINE
void initFlatNotTerminated(jschar
*chars
, size_t length
) {
243 JS_ASSERT(length
<= MAX_LENGTH
);
244 JS_ASSERT(!isStatic(this));
245 lengthAndFlags
= buildLengthAndFlags(length
, FLAT
);
249 /* Specific flat string initializer and accessor methods. */
250 JS_ALWAYS_INLINE
void initFlat(jschar
*chars
, size_t length
) {
251 initFlatNotTerminated(chars
, length
);
252 JS_ASSERT(chars
[length
] == jschar(0));
255 JS_ALWAYS_INLINE
void initShortString(const jschar
*chars
, size_t length
) {
256 JS_ASSERT(length
<= MAX_LENGTH
);
257 JS_ASSERT(chars
>= inlineStorage
&& chars
< (jschar
*)(this + 2));
258 JS_ASSERT(!isStatic(this));
259 lengthAndFlags
= buildLengthAndFlags(length
, FLAT
);
263 JS_ALWAYS_INLINE
void initFlatExtensible(jschar
*chars
, size_t length
, size_t cap
) {
264 JS_ASSERT(length
<= MAX_LENGTH
);
265 JS_ASSERT(chars
[length
] == jschar(0));
266 JS_ASSERT(!isStatic(this));
267 lengthAndFlags
= buildLengthAndFlags(length
, FLAT
| EXTENSIBLE
);
272 JS_ALWAYS_INLINE JSFlatString
*assertIsFlat() {
274 return reinterpret_cast<JSFlatString
*>(this);
277 JS_ALWAYS_INLINE
const jschar
*flatChars() const {
282 JS_ALWAYS_INLINE
size_t flatLength() const {
287 inline void flatSetAtomized() {
289 JS_ASSERT(!isStatic(this));
290 lengthAndFlags
|= ATOMIZED
;
293 inline void flatClearExtensible() {
295 * N.B. This may be called on static strings, which may be in read-only
296 * memory, so we cannot unconditionally apply the mask.
299 if (lengthAndFlags
& EXTENSIBLE
)
300 lengthAndFlags
&= ~EXTENSIBLE
;
304 * The chars pointer should point somewhere inside the buffer owned by base.
305 * The caller still needs to pass base for GC purposes.
307 inline void initDependent(JSString
*base
, const jschar
*chars
, size_t length
) {
308 JS_ASSERT(!isStatic(this));
309 JS_ASSERT(base
->isFlat());
310 JS_ASSERT(chars
>= base
->flatChars() && chars
< base
->flatChars() + base
->length());
311 JS_ASSERT(length
<= base
->length() - (chars
- base
->flatChars()));
312 lengthAndFlags
= buildLengthAndFlags(length
, DEPENDENT
);
317 inline JSLinearString
*dependentBase() const {
318 JS_ASSERT(isDependent());
319 return s
.base
->assertIsLinear();
322 JS_ALWAYS_INLINE
const jschar
*dependentChars() {
323 JS_ASSERT(isDependent());
327 inline size_t dependentLength() const {
328 JS_ASSERT(isDependent());
332 const jschar
*undepend(JSContext
*cx
);
334 const jschar
*nonRopeChars() const {
335 JS_ASSERT(!isRope());
339 /* Rope-related initializers and accessors. */
340 inline void initRopeNode(JSString
*left
, JSString
*right
, size_t length
) {
341 JS_ASSERT(left
->length() + right
->length() == length
);
342 lengthAndFlags
= buildLengthAndFlags(length
, ROPE
);
347 inline JSString
*ropeLeft() const {
352 inline JSString
*ropeRight() const {
357 inline void finishTraversalConversion(JSString
*base
, const jschar
*baseBegin
, const jschar
*end
) {
358 JS_ASSERT(baseBegin
<= u
.chars
&& u
.chars
<= end
);
359 lengthAndFlags
= buildLengthAndFlags(end
- u
.chars
, DEPENDENT
);
363 const jschar
*flatten(JSContext
*maybecx
);
365 JSLinearString
*ensureLinear(JSContext
*cx
) {
366 if (isRope() && !flatten(cx
))
368 return reinterpret_cast<JSLinearString
*>(this);
371 bool isLinear() const {
375 JSLinearString
*assertIsLinear() {
376 JS_ASSERT(isLinear());
377 return reinterpret_cast<JSLinearString
*>(this);
380 typedef uint8 SmallChar
;
382 static inline bool fitsInSmallChar(jschar c
) {
383 return c
< SMALL_CHAR_LIMIT
&& toSmallChar
[c
] != INVALID_SMALL_CHAR
;
386 static inline bool isUnitString(void *ptr
) {
387 jsuword delta
= reinterpret_cast<jsuword
>(ptr
) -
388 reinterpret_cast<jsuword
>(unitStringTable
);
389 if (delta
>= UNIT_STRING_LIMIT
* sizeof(JSString
))
392 /* If ptr points inside the static array, it must be well-aligned. */
393 JS_ASSERT(delta
% sizeof(JSString
) == 0);
397 static inline bool isLength2String(void *ptr
) {
398 jsuword delta
= reinterpret_cast<jsuword
>(ptr
) -
399 reinterpret_cast<jsuword
>(length2StringTable
);
400 if (delta
>= NUM_SMALL_CHARS
* NUM_SMALL_CHARS
* sizeof(JSString
))
403 /* If ptr points inside the static array, it must be well-aligned. */
404 JS_ASSERT(delta
% sizeof(JSString
) == 0);
408 static inline bool isHundredString(void *ptr
) {
409 jsuword delta
= reinterpret_cast<jsuword
>(ptr
) -
410 reinterpret_cast<jsuword
>(hundredStringTable
);
411 if (delta
>= NUM_HUNDRED_STRINGS
* sizeof(JSString
))
414 /* If ptr points inside the static array, it must be well-aligned. */
415 JS_ASSERT(delta
% sizeof(JSString
) == 0);
419 static inline bool isStatic(void *ptr
) {
420 return isUnitString(ptr
) || isLength2String(ptr
) || isHundredString(ptr
);
424 #pragma align 8 (__1cIJSStringPunitStringTable_, __1cIJSStringSlength2StringTable_, __1cIJSStringShundredStringTable_)
427 static const SmallChar INVALID_SMALL_CHAR
= -1;
429 static const jschar fromSmallChar
[];
430 static const SmallChar toSmallChar
[];
431 static const JSString unitStringTable
[];
432 static const JSString length2StringTable
[];
433 static const JSString hundredStringTable
[];
435 * Since int strings can be unit strings, length-2 strings, or hundred
436 * strings, we keep a table to map from integer to the correct string.
438 static const JSString
*const intStringTable
[];
440 static JSFlatString
*unitString(jschar c
);
441 static JSLinearString
*getUnitString(JSContext
*cx
, JSString
*str
, size_t index
);
442 static JSFlatString
*length2String(jschar c1
, jschar c2
);
443 static JSFlatString
*length2String(uint32 i
);
444 static JSFlatString
*intString(jsint i
);
446 static JSFlatString
*lookupStaticString(const jschar
*chars
, size_t length
);
448 JS_ALWAYS_INLINE
void finalize(JSContext
*cx
);
450 static size_t offsetOfLengthAndFlags() {
451 return offsetof(JSString
, lengthAndFlags
);
454 static size_t offsetOfChars() {
455 return offsetof(JSString
, u
.chars
);
458 static void staticAsserts() {
459 JS_STATIC_ASSERT(((JSString::MAX_LENGTH
<< JSString::LENGTH_SHIFT
) >>
460 JSString::LENGTH_SHIFT
) == JSString::MAX_LENGTH
);
465 * A "linear" string may or may not be null-terminated, but it provides
466 * infallible access to a linear array of characters. Namely, this means the
467 * string is not a rope.
469 struct JSLinearString
: JSString
471 const jschar
*chars() const { return JSString::nonRopeChars(); }
474 JS_STATIC_ASSERT(sizeof(JSLinearString
) == sizeof(JSString
));
477 * A linear string where, additionally, chars()[length()] == '\0'. Namely, this
478 * means the string is not a dependent string or rope.
480 struct JSFlatString
: JSLinearString
482 const jschar
*charsZ() const { return chars(); }
485 JS_STATIC_ASSERT(sizeof(JSFlatString
) == sizeof(JSString
));
488 * A flat string which has been "atomized", i.e., that is a unique string among
489 * other atomized strings and therefore allows equality via pointer comparison.
491 struct JSAtom
: JSFlatString
495 struct JSExternalString
: JSString
497 static const uintN TYPE_LIMIT
= 8;
498 static JSStringFinalizeOp str_finalizers
[TYPE_LIMIT
];
500 static intN
changeFinalizer(JSStringFinalizeOp oldop
,
501 JSStringFinalizeOp newop
) {
502 for (uintN i
= 0; i
!= JS_ARRAY_LENGTH(str_finalizers
); i
++) {
503 if (str_finalizers
[i
] == oldop
) {
504 str_finalizers
[i
] = newop
;
511 void finalize(JSContext
*cx
);
515 JS_STATIC_ASSERT(sizeof(JSString
) == sizeof(JSExternalString
));
518 * Short strings should be created in cases where it's worthwhile to avoid
519 * mallocing the string buffer for a small string. We keep 2 string headers'
520 * worth of space in short strings so that more strings can be stored this way.
522 class JSShortString
: public js::gc::Cell
529 * Set the length of the string, and return a buffer for the caller to write
530 * to. This buffer must be written immediately, and should not be modified
533 inline jschar
*init(size_t length
) {
534 JS_ASSERT(length
<= MAX_SHORT_STRING_LENGTH
);
535 mHeader
.initShortString(mHeader
.inlineStorage
, length
);
536 return mHeader
.inlineStorage
;
539 inline jschar
*getInlineStorageBeforeInit() {
540 return mHeader
.inlineStorage
;
543 inline void initAtOffsetInBuffer(jschar
*p
, size_t length
) {
544 JS_ASSERT(p
>= mHeader
.inlineStorage
&& p
< mHeader
.inlineStorage
+ MAX_SHORT_STRING_LENGTH
);
545 mHeader
.initShortString(p
, length
);
548 inline void resetLength(size_t length
) {
549 mHeader
.initShortString(mHeader
.flatChars(), length
);
552 inline JSString
*header() {
556 static const size_t FREE_STRING_WORDS
= 2;
558 static const size_t MAX_SHORT_STRING_LENGTH
=
559 ((sizeof(JSString
) + FREE_STRING_WORDS
* sizeof(size_t)) / sizeof(jschar
)) - 1;
561 static inline bool fitsIntoShortString(size_t length
) {
562 return length
<= MAX_SHORT_STRING_LENGTH
;
565 JS_ALWAYS_INLINE
void finalize(JSContext
*cx
);
567 static void staticAsserts() {
568 JS_STATIC_ASSERT(offsetof(JSString
, inlineStorage
) ==
569 sizeof(JSString
) - JSShortString::FREE_STRING_WORDS
* sizeof(void *));
570 JS_STATIC_ASSERT(offsetof(JSShortString
, mDummy
) == sizeof(JSString
));
571 JS_STATIC_ASSERT(offsetof(JSString
, inlineStorage
) +
572 sizeof(jschar
) * (JSShortString::MAX_SHORT_STRING_LENGTH
+ 1) ==
573 sizeof(JSShortString
));
582 * When an algorithm does not need a string represented as a single linear
583 * array of characters, this range utility may be used to traverse the string a
584 * sequence of linear arrays of characters. This avoids flattening ropes.
586 * Implemented in jsstrinlines.h.
588 class StringSegmentRange
;
589 class MutatingRopeSegmentRange
;
592 * Utility for building a rope (lazy concatenation) of strings.
598 extern const jschar
*
599 js_GetStringChars(JSContext
*cx
, JSString
*str
);
601 extern const jschar
*
602 js_UndependString(JSContext
*cx
, JSString
*str
);
605 js_MakeStringImmutable(JSContext
*cx
, JSString
*str
);
607 extern JSString
* JS_FASTCALL
608 js_toLowerCase(JSContext
*cx
, JSString
*str
);
610 extern JSString
* JS_FASTCALL
611 js_toUpperCase(JSContext
*cx
, JSString
*str
);
618 extern jschar js_empty_ucstr
[];
619 extern JSSubString js_EmptySubString
;
621 /* Unicode character attribute lookup tables. */
622 extern const uint8 js_X
[];
623 extern const uint8 js_Y
[];
624 extern const uint32 js_A
[];
626 /* Enumerated Unicode general category types. */
627 typedef enum JSCharType
{
629 JSCT_UPPERCASE_LETTER
= 1,
630 JSCT_LOWERCASE_LETTER
= 2,
631 JSCT_TITLECASE_LETTER
= 3,
632 JSCT_MODIFIER_LETTER
= 4,
633 JSCT_OTHER_LETTER
= 5,
634 JSCT_NON_SPACING_MARK
= 6,
635 JSCT_ENCLOSING_MARK
= 7,
636 JSCT_COMBINING_SPACING_MARK
= 8,
637 JSCT_DECIMAL_DIGIT_NUMBER
= 9,
638 JSCT_LETTER_NUMBER
= 10,
639 JSCT_OTHER_NUMBER
= 11,
640 JSCT_SPACE_SEPARATOR
= 12,
641 JSCT_LINE_SEPARATOR
= 13,
642 JSCT_PARAGRAPH_SEPARATOR
= 14,
645 JSCT_PRIVATE_USE
= 18,
647 JSCT_DASH_PUNCTUATION
= 20,
648 JSCT_START_PUNCTUATION
= 21,
649 JSCT_END_PUNCTUATION
= 22,
650 JSCT_CONNECTOR_PUNCTUATION
= 23,
651 JSCT_OTHER_PUNCTUATION
= 24,
652 JSCT_MATH_SYMBOL
= 25,
653 JSCT_CURRENCY_SYMBOL
= 26,
654 JSCT_MODIFIER_SYMBOL
= 27,
655 JSCT_OTHER_SYMBOL
= 28
658 /* Character classifying and mapping macros, based on java.lang.Character. */
659 #define JS_CCODE(c) (js_A[js_Y[(js_X[(uint16)(c)>>6]<<6)|((c)&0x3F)]])
660 #define JS_CTYPE(c) (JS_CCODE(c) & 0x1F)
662 #define JS_ISALPHA(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
663 (1 << JSCT_LOWERCASE_LETTER) | \
664 (1 << JSCT_TITLECASE_LETTER) | \
665 (1 << JSCT_MODIFIER_LETTER) | \
666 (1 << JSCT_OTHER_LETTER)) \
669 #define JS_ISALNUM(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
670 (1 << JSCT_LOWERCASE_LETTER) | \
671 (1 << JSCT_TITLECASE_LETTER) | \
672 (1 << JSCT_MODIFIER_LETTER) | \
673 (1 << JSCT_OTHER_LETTER) | \
674 (1 << JSCT_DECIMAL_DIGIT_NUMBER)) \
677 /* A unicode letter, suitable for use in an identifier. */
678 #define JS_ISLETTER(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
679 (1 << JSCT_LOWERCASE_LETTER) | \
680 (1 << JSCT_TITLECASE_LETTER) | \
681 (1 << JSCT_MODIFIER_LETTER) | \
682 (1 << JSCT_OTHER_LETTER) | \
683 (1 << JSCT_LETTER_NUMBER)) \
687 * 'IdentifierPart' from ECMA grammar, is Unicode letter or combining mark or
688 * digit or connector punctuation.
690 #define JS_ISIDPART(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
691 (1 << JSCT_LOWERCASE_LETTER) | \
692 (1 << JSCT_TITLECASE_LETTER) | \
693 (1 << JSCT_MODIFIER_LETTER) | \
694 (1 << JSCT_OTHER_LETTER) | \
695 (1 << JSCT_LETTER_NUMBER) | \
696 (1 << JSCT_NON_SPACING_MARK) | \
697 (1 << JSCT_COMBINING_SPACING_MARK) | \
698 (1 << JSCT_DECIMAL_DIGIT_NUMBER) | \
699 (1 << JSCT_CONNECTOR_PUNCTUATION)) \
702 /* Unicode control-format characters, ignored in input */
703 #define JS_ISFORMAT(c) (((1 << JSCT_FORMAT) >> JS_CTYPE(c)) & 1)
706 * This table is used in JS_ISWORD. The definition has external linkage to
707 * allow the raw table data to be used in the regular expression compiler.
709 extern const bool js_alnum
[];
712 * This macro performs testing for the regular expression word class \w, which
713 * is defined by ECMA-262 15.10.2.6 to be [0-9A-Z_a-z]. If we want a
714 * Unicode-friendlier definition of "word", we should rename this macro to
715 * something regexp-y.
717 #define JS_ISWORD(c) ((c) < 128 && js_alnum[(c)])
719 #define JS_ISIDSTART(c) (JS_ISLETTER(c) || (c) == '_' || (c) == '$')
720 #define JS_ISIDENT(c) (JS_ISIDPART(c) || (c) == '_' || (c) == '$')
722 #define JS_ISXMLSPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\r' || \
724 #define JS_ISXMLNSSTART(c) ((JS_CCODE(c) & 0x00000100) || (c) == '_')
725 #define JS_ISXMLNS(c) ((JS_CCODE(c) & 0x00000080) || (c) == '.' || \
726 (c) == '-' || (c) == '_')
727 #define JS_ISXMLNAMESTART(c) (JS_ISXMLNSSTART(c) || (c) == ':')
728 #define JS_ISXMLNAME(c) (JS_ISXMLNS(c) || (c) == ':')
730 #define JS_ISDIGIT(c) (JS_CTYPE(c) == JSCT_DECIMAL_DIGIT_NUMBER)
732 const jschar BYTE_ORDER_MARK
= 0xFEFF;
733 const jschar NO_BREAK_SPACE
= 0x00A0;
741 return (w
<= ' ' && (w
== ' ' || (9 <= w
&& w
<= 0xD))) || w
== NO_BREAK_SPACE
;
743 return w
== BYTE_ORDER_MARK
|| (JS_CCODE(w
) & 0x00070000) == 0x00040000;
746 #define JS_ISPRINT(c) ((c) < 128 && isprint(c))
748 #define JS_ISUPPER(c) (JS_CTYPE(c) == JSCT_UPPERCASE_LETTER)
749 #define JS_ISLOWER(c) (JS_CTYPE(c) == JSCT_LOWERCASE_LETTER)
751 #define JS_TOUPPER(c) ((jschar) ((JS_CCODE(c) & 0x00100000) \
752 ? (c) - ((int32)JS_CCODE(c) >> 22) \
754 #define JS_TOLOWER(c) ((jschar) ((JS_CCODE(c) & 0x00200000) \
755 ? (c) + ((int32)JS_CCODE(c) >> 22) \
759 * Shorthands for ASCII (7-bit) decimal and hex conversion.
760 * Manually inline isdigit for performance; MSVC doesn't do this for us.
762 #define JS7_ISDEC(c) ((((unsigned)(c)) - '0') <= 9)
763 #define JS7_UNDEC(c) ((c) - '0')
764 #define JS7_ISHEX(c) ((c) < 128 && isxdigit(c))
765 #define JS7_UNHEX(c) (uintN)(JS7_ISDEC(c) ? (c) - '0' : 10 + tolower(c) - 'a')
766 #define JS7_ISLET(c) ((c) < 128 && isalpha(c))
768 /* Initialize the String class, returning its prototype object. */
769 extern js::Class js_StringClass
;
772 JSObject::isString() const
774 return getClass() == &js_StringClass
;
778 js_InitStringClass(JSContext
*cx
, JSObject
*obj
);
780 extern const char js_escape_str
[];
781 extern const char js_unescape_str
[];
782 extern const char js_uneval_str
[];
783 extern const char js_decodeURI_str
[];
784 extern const char js_encodeURI_str
[];
785 extern const char js_decodeURIComponent_str
[];
786 extern const char js_encodeURIComponent_str
[];
788 /* GC-allocate a string descriptor for the given malloc-allocated chars. */
789 extern JSFlatString
*
790 js_NewString(JSContext
*cx
, jschar
*chars
, size_t length
);
792 extern JSLinearString
*
793 js_NewDependentString(JSContext
*cx
, JSString
*base
, size_t start
,
796 /* Copy a counted string and GC-allocate a descriptor for it. */
797 extern JSFlatString
*
798 js_NewStringCopyN(JSContext
*cx
, const jschar
*s
, size_t n
);
800 extern JSFlatString
*
801 js_NewStringCopyN(JSContext
*cx
, const char *s
, size_t n
);
803 /* Copy a C string and GC-allocate a descriptor for it. */
804 extern JSFlatString
*
805 js_NewStringCopyZ(JSContext
*cx
, const jschar
*s
);
807 extern JSFlatString
*
808 js_NewStringCopyZ(JSContext
*cx
, const char *s
);
811 * Convert a value to a printable C string.
814 js_ValueToPrintable(JSContext
*cx
, const js::Value
&,
815 JSAutoByteString
*bytes
, bool asSource
= false);
818 * Convert a value to a string, returning null after reporting an error,
819 * otherwise returning a new string reference.
822 js_ValueToString(JSContext
*cx
, const js::Value
&v
);
827 * Most code that calls js_ValueToString knows the value is (probably) not a
828 * string, so it does not make sense to put this inline fast path into
831 static JS_ALWAYS_INLINE JSString
*
832 ValueToString_TestForStringInline(JSContext
*cx
, const Value
&v
)
836 return js_ValueToString(cx
, v
);
840 * This function implements E-262-3 section 9.8, toString. Convert the given
841 * value to a string of jschars appended to the given buffer. On error, the
842 * passed buffer may have partial results appended.
845 ValueToStringBuffer(JSContext
*cx
, const Value
&v
, StringBuffer
&sb
);
850 * Convert a value to its source expression, returning null after reporting
851 * an error, otherwise returning a new string reference.
853 extern JS_FRIEND_API(JSString
*)
854 js_ValueToSource(JSContext
*cx
, const js::Value
&v
);
857 * Compute a hash function from str. The caller can call this function even if
858 * str is not a GC-allocated thing.
861 js_HashString(JSLinearString
*str
)
863 const jschar
*s
= str
->chars();
864 size_t n
= str
->length();
866 for (h
= 0; n
; s
++, n
--)
867 h
= JS_ROTATE_LEFT32(h
, 4) ^ *s
;
874 * Test if strings are equal. The caller can call the function even if str1
875 * or str2 are not GC-allocated things.
878 EqualStrings(JSContext
*cx
, JSString
*str1
, JSString
*str2
, JSBool
*result
);
880 /* EqualStrings is infallible on linear strings. */
882 EqualStrings(JSLinearString
*str1
, JSLinearString
*str2
);
885 * Return less than, equal to, or greater than zero depending on whether
886 * str1 is less than, equal to, or greater than str2.
889 CompareStrings(JSContext
*cx
, JSString
*str1
, JSString
*str2
, int32
*result
);
892 * Return true if the string matches the given sequence of ASCII bytes.
895 StringEqualsAscii(JSLinearString
*str
, const char *asciiBytes
);
900 * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
901 * The patlen argument must be positive and no greater than sBMHPatLenMax.
903 * Return the index of pat in text, or -1 if not found.
905 static const jsuint sBMHCharSetSize
= 256; /* ISO-Latin-1 */
906 static const jsuint sBMHPatLenMax
= 255; /* skip table element is uint8 */
907 static const jsint sBMHBadPattern
= -2; /* return value if pat is not ISO-Latin-1 */
910 js_BoyerMooreHorspool(const jschar
*text
, jsuint textlen
,
911 const jschar
*pat
, jsuint patlen
);
914 js_strlen(const jschar
*s
);
917 js_strchr(const jschar
*s
, jschar c
);
920 js_strchr_limit(const jschar
*s
, jschar c
, const jschar
*limit
);
922 #define js_strncpy(t, s, n) memcpy((t), (s), (n) * sizeof(jschar))
925 js_short_strncpy(jschar
*dest
, const jschar
*src
, size_t num
)
928 * It isn't strictly necessary here for |num| to be small, but this function
929 * is currently only called on buffers for short strings.
931 JS_ASSERT(JSShortString::fitsIntoShortString(num
));
932 for (size_t i
= 0; i
< num
; i
++)
937 * Return s advanced past any Unicode white space characters.
939 static inline const jschar
*
940 js_SkipWhiteSpace(const jschar
*s
, const jschar
*end
)
943 while (s
!= end
&& JS_ISSPACE(*s
))
949 * Inflate bytes to JS chars and vice versa. Report out of memory via cx and
950 * return null on error, otherwise return the jschar or byte vector that was
951 * JS_malloc'ed. length is updated to the length of the new string in jschars.
954 js_InflateString(JSContext
*cx
, const char *bytes
, size_t *length
);
957 js_DeflateString(JSContext
*cx
, const jschar
*chars
, size_t length
);
960 * Inflate bytes to JS chars into a buffer. 'chars' must be large enough for
961 * 'length' jschars. The buffer is NOT null-terminated. The destination length
962 * must be be initialized with the buffer size and will contain on return the
963 * number of copied chars. Conversion behavior depends on js_CStringsAreUTF8.
966 js_InflateStringToBuffer(JSContext
*cx
, const char *bytes
, size_t length
,
967 jschar
*chars
, size_t *charsLength
);
970 * Same as js_InflateStringToBuffer, but always treats 'bytes' as UTF-8.
973 js_InflateUTF8StringToBuffer(JSContext
*cx
, const char *bytes
, size_t length
,
974 jschar
*chars
, size_t *charsLength
);
977 * Get number of bytes in the deflated sequence of characters. Behavior depends
978 * on js_CStringsAreUTF8.
981 js_GetDeflatedStringLength(JSContext
*cx
, const jschar
*chars
,
985 * Same as js_GetDeflatedStringLength, but always treats the result as UTF-8.
988 js_GetDeflatedUTF8StringLength(JSContext
*cx
, const jschar
*chars
,
992 * Deflate JS chars to bytes into a buffer. 'bytes' must be large enough for
993 * 'length chars. The buffer is NOT null-terminated. The destination length
994 * must to be initialized with the buffer size and will contain on return the
995 * number of copied bytes. Conversion behavior depends on js_CStringsAreUTF8.
998 js_DeflateStringToBuffer(JSContext
*cx
, const jschar
*chars
,
999 size_t charsLength
, char *bytes
, size_t *length
);
1002 * Same as js_DeflateStringToBuffer, but always treats 'bytes' as UTF-8.
1005 js_DeflateStringToUTF8Buffer(JSContext
*cx
, const jschar
*chars
,
1006 size_t charsLength
, char *bytes
, size_t *length
);
1008 /* Export a few natives and a helper to other files in SpiderMonkey. */
1010 js_str_escape(JSContext
*cx
, uintN argc
, js::Value
*argv
, js::Value
*rval
);
1013 * The String.prototype.replace fast-native entry point is exported for joined
1014 * function optimization in js{interp,tracer}.cpp.
1018 str_replace(JSContext
*cx
, uintN argc
, js::Value
*vp
);
1022 js_str_toString(JSContext
*cx
, uintN argc
, js::Value
*vp
);
1025 js_str_charAt(JSContext
*cx
, uintN argc
, js::Value
*vp
);
1028 js_str_charCodeAt(JSContext
*cx
, uintN argc
, js::Value
*vp
);
1031 * Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
1032 * least 6 bytes long. Return the number of UTF-8 bytes of data written.
1035 js_OneUcs4ToUtf8Char(uint8
*utf8Buffer
, uint32 ucs4Char
);
1040 PutEscapedStringImpl(char *buffer
, size_t size
, FILE *fp
, JSLinearString
*str
, uint32 quote
);
1043 * Write str into buffer escaping any non-printable or non-ASCII character
1044 * using \escapes for JS string literals.
1045 * Guarantees that a NUL is at the end of the buffer unless size is 0. Returns
1046 * the length of the written output, NOT including the NUL. Thus, a return
1047 * value of size or more means that the output was truncated. If buffer
1048 * is null, just returns the length of the output. If quote is not 0, it must
1049 * be a single or double quote character that will quote the output.
1052 PutEscapedString(char *buffer
, size_t size
, JSLinearString
*str
, uint32 quote
)
1054 size_t n
= PutEscapedStringImpl(buffer
, size
, NULL
, str
, quote
);
1056 /* PutEscapedStringImpl can only fail with a file. */
1057 JS_ASSERT(n
!= size_t(-1));
1062 * Write str into file escaping any non-printable or non-ASCII character.
1063 * If quote is not 0, it must be a single or double quote character that
1064 * will quote the output.
1067 FileEscapedString(FILE *fp
, JSLinearString
*str
, uint32 quote
)
1069 return PutEscapedStringImpl(NULL
, 0, fp
, str
, quote
) != size_t(-1);
1072 } /* namespace js */
1075 js_String(JSContext
*cx
, uintN argc
, js::Value
*vp
);
1077 #endif /* jsstr_h___ */