1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
8 * A class which represents a fragment of text (eg inside a text
9 * node); if only codepoints below 256 are used, the text is stored as
10 * a char*; otherwise the text is stored as a char16_t*
13 #ifndef nsTextFragment_h___
14 #define nsTextFragment_h___
16 #include "mozilla/Attributes.h"
17 #include "mozilla/MemoryReporting.h"
19 #include "nsCharTraits.h"
21 #include "mozilla/StringBuffer.h"
22 #include "nsReadableUtils.h"
23 #include "nsISupportsImpl.h"
25 // XXX should this normalize the code to keep a \u0000 at the end?
27 // XXX nsTextFragmentPool?
30 * A fragment of text. If mIs2b is 1 then the m2b pointer is valid
31 * otherwise the m1b pointer is valid. If m1b is used then each byte
32 * of data represents a single ucs2 character with the high byte being
35 * This class does not have a virtual destructor therefore it is not
36 * meant to be subclassed.
38 class nsTextFragment final
{
40 static nsresult
Init();
41 static void Shutdown();
44 * Default constructor. Initialize the fragment to be empty.
46 nsTextFragment() : m1b(nullptr), mAllBits(0) {
47 MOZ_COUNT_CTOR(nsTextFragment
);
48 NS_ASSERTION(sizeof(FragmentBits
) == 4, "Bad field packing!");
54 * Change the contents of this fragment to be a copy of the
55 * the argument fragment, or to "" if unable to allocate enough memory.
57 nsTextFragment
& operator=(const nsTextFragment
& aOther
);
60 * Return true if this fragment is represented by char16_t data
62 bool Is2b() const { return mState
.mIs2b
; }
65 * Return true if this fragment contains Bidi text
66 * For performance reasons this flag is only set if explicitely requested (by
67 * setting the aUpdateBidi argument on SetTo or Append to true).
69 bool IsBidi() const { return mState
.mIsBidi
; }
72 * Get a pointer to constant char16_t data.
74 const char16_t
* Get2b() const {
75 MOZ_ASSERT(Is2b(), "not 2b text");
76 return static_cast<char16_t
*>(m2b
->Data());
80 * Get a pointer to constant char data.
82 const char* Get1b() const {
83 NS_ASSERTION(!Is2b(), "not 1b text");
84 return (const char*)m1b
;
88 * Get the length of the fragment. The length is the number of logical
89 * characters, not the number of bytes to store the characters.
91 uint32_t GetLength() const { return mState
.mLength
; }
93 #define NS_MAX_TEXT_FRAGMENT_LENGTH (static_cast<uint32_t>(0x1FFFFFFF))
95 bool CanGrowBy(size_t n
) const {
96 return n
< (1 << 29) && mState
.mLength
+ n
< (1 << 29);
100 * Change the contents of this fragment to be a copy of the given
101 * buffer. If aUpdateBidi is true, contents of the fragment will be scanned,
102 * and mState.mIsBidi will be turned on if it includes any Bidi characters.
103 * If aForce2b is true, aBuffer will be stored as char16_t as is. Then,
104 * you can access the value faster but may waste memory if all characters
105 * are less than U+0100.
107 bool SetTo(const char16_t
* aBuffer
, uint32_t aLength
, bool aUpdateBidi
,
110 bool SetTo(const nsString
& aString
, bool aUpdateBidi
, bool aForce2b
) {
111 if (MOZ_UNLIKELY(aString
.Length() > NS_MAX_TEXT_FRAGMENT_LENGTH
)) {
115 if (aForce2b
&& !aUpdateBidi
) {
116 if (mozilla::StringBuffer
* buffer
= aString
.GetStringBuffer()) {
117 NS_ADDREF(m2b
= buffer
);
118 mState
.mInHeap
= true;
120 mState
.mLength
= aString
.Length();
125 return SetTo(aString
.get(), aString
.Length(), aUpdateBidi
, aForce2b
);
129 * Append aData to the end of this fragment. If aUpdateBidi is true, contents
130 * of the fragment will be scanned, and mState.mIsBidi will be turned on if
131 * it includes any Bidi characters.
132 * If aForce2b is true, the string will be stored as char16_t as is. Then,
133 * you can access the value faster but may waste memory if all characters
134 * are less than U+0100.
136 bool Append(const char16_t
* aBuffer
, uint32_t aLength
, bool aUpdateBidi
,
140 * Append the contents of this string fragment to aString
142 void AppendTo(nsAString
& aString
) const {
143 if (!AppendTo(aString
, mozilla::fallible
)) {
144 aString
.AllocFailed(aString
.Length() + GetLength());
149 * Append the contents of this string fragment to aString
150 * @return false if an out of memory condition is detected, true otherwise
152 [[nodiscard
]] bool AppendTo(nsAString
& aString
,
153 const mozilla::fallible_t
& aFallible
) const {
155 if (aString
.IsEmpty()) {
156 aString
.Assign(m2b
, mState
.mLength
);
159 return aString
.Append(Get2b(), mState
.mLength
, aFallible
);
161 return AppendASCIItoUTF16(Substring(m1b
, mState
.mLength
), aString
,
166 * Append a substring of the contents of this string fragment to aString.
167 * @param aOffset where to start the substring in this text fragment
168 * @param aLength the length of the substring
170 void AppendTo(nsAString
& aString
, uint32_t aOffset
, uint32_t aLength
) const {
171 if (!AppendTo(aString
, aOffset
, aLength
, mozilla::fallible
)) {
172 aString
.AllocFailed(aString
.Length() + aLength
);
177 * Append a substring of the contents of this string fragment to aString.
178 * @param aString the string in which to append
179 * @param aOffset where to start the substring in this text fragment
180 * @param aLength the length of the substring
181 * @return false if an out of memory condition is detected, true otherwise
183 [[nodiscard
]] bool AppendTo(nsAString
& aString
, uint32_t aOffset
,
185 const mozilla::fallible_t
& aFallible
) const {
187 bool ok
= aString
.Append(Get2b() + aOffset
, aLength
, aFallible
);
194 return AppendASCIItoUTF16(Substring(m1b
+ aOffset
, aLength
), aString
,
200 * Make a copy of the fragments contents starting at offset for
201 * count characters. The offset and count will be adjusted to
202 * lie within the fragments data. The fragments data is converted if
205 void CopyTo(char16_t
* aDest
, uint32_t aOffset
, uint32_t aCount
);
208 * Return the character in the text-fragment at the given
209 * index. This always returns a char16_t.
211 char16_t
CharAt(uint32_t aIndex
) const {
212 MOZ_ASSERT(aIndex
< mState
.mLength
, "bad index");
213 return mState
.mIs2b
? Get2b()[aIndex
]
214 : static_cast<unsigned char>(m1b
[aIndex
]);
218 * IsHighSurrogateFollowedByLowSurrogateAt() returns true if character at
219 * aIndex is high surrogate and it's followed by low surrogate.
221 inline bool IsHighSurrogateFollowedByLowSurrogateAt(uint32_t aIndex
) const {
222 MOZ_ASSERT(aIndex
< mState
.mLength
);
223 if (!mState
.mIs2b
|| aIndex
+ 1 >= mState
.mLength
) {
226 return NS_IS_SURROGATE_PAIR(Get2b()[aIndex
], Get2b()[aIndex
+ 1]);
230 * IsLowSurrogateFollowingHighSurrogateAt() returns true if character at
231 * aIndex is low surrogate and it follows high surrogate.
233 inline bool IsLowSurrogateFollowingHighSurrogateAt(uint32_t aIndex
) const {
234 MOZ_ASSERT(aIndex
< mState
.mLength
);
235 if (!mState
.mIs2b
|| !aIndex
) {
238 return NS_IS_SURROGATE_PAIR(Get2b()[aIndex
- 1], Get2b()[aIndex
]);
242 * ScalarValueAt() returns a Unicode scalar value at aIndex. If the character
243 * at aIndex is a high surrogate followed by low surrogate, returns character
244 * code for the pair. If the index is low surrogate, or a high surrogate but
245 * not in a pair, returns 0.
247 inline char32_t
ScalarValueAt(uint32_t aIndex
) const {
248 MOZ_ASSERT(aIndex
< mState
.mLength
);
250 return static_cast<unsigned char>(m1b
[aIndex
]);
252 char16_t ch
= Get2b()[aIndex
];
253 if (!IS_SURROGATE(ch
)) {
256 if (aIndex
+ 1 < mState
.mLength
&& NS_IS_HIGH_SURROGATE(ch
)) {
257 char16_t nextCh
= Get2b()[aIndex
+ 1];
258 if (NS_IS_LOW_SURROGATE(nextCh
)) {
259 return SURROGATE_TO_UCS4(ch
, nextCh
);
265 void SetBidi(bool aBidi
) { mState
.mIsBidi
= aBidi
; }
267 struct FragmentBits
{
268 // uint32_t to ensure that the values are unsigned, because we
269 // want 0/1, not 0/-1!
270 // Making these bool causes Windows to not actually pack them,
271 // which causes crashes because we assume this structure is no more than
273 uint32_t mInHeap
: 1;
275 uint32_t mIsBidi
: 1;
276 // Note that when you change the bits of mLength, you also need to change
277 // NS_MAX_TEXT_FRAGMENT_LENGTH.
278 uint32_t mLength
: 29;
281 size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf
) const;
284 * Check whether the text in this fragment is the same as the text in the
287 [[nodiscard
]] bool TextEquals(const nsTextFragment
& aOther
) const;
289 constexpr static uint32_t kNotFound
= UINT32_MAX
;
291 [[nodiscard
]] uint32_t FindChar(char aChar
, uint32_t aOffset
= 0) const {
292 if (aOffset
>= GetLength()) {
296 const char16_t
* end
= Get2b() + GetLength();
297 for (const char16_t
* ch
= Get2b() + aOffset
; ch
!= end
; ch
++) {
304 const char* end
= Get1b() + GetLength();
305 for (const char* ch
= Get1b() + aOffset
; ch
!= end
; ch
++) {
313 [[nodiscard
]] uint32_t FindChar(char16_t aChar
, uint32_t aOffset
= 0) const {
314 if (aOffset
>= GetLength()) {
318 const char16_t
* end
= Get2b() + GetLength();
319 for (const char16_t
* ch
= Get2b() + aOffset
; ch
!= end
; ch
++) {
329 const char* end
= Get1b() + GetLength();
330 for (const char* ch
= Get1b() + aOffset
; ch
!= end
; ch
++) {
342 * Scan the contents of the fragment and turn on mState.mIsBidi if it
343 * includes any Bidi characters.
345 void UpdateBidiFlag(const char16_t
* aBuffer
, uint32_t aLength
);
348 mozilla::StringBuffer
* m2b
;
349 const char* m1b
; // This is const since it can point to shared data
358 #endif /* nsTextFragment_h___ */