2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * (C) 1999 Antti Koivisto (koivisto@kde.org)
4 * (C) 2001 Dirk Mueller ( mueller@kde.org )
5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All rights reserved.
6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
26 #include "wtf/text/StringImpl.h"
28 #include "wtf/DynamicAnnotations.h"
29 #include "wtf/LeakAnnotations.h"
30 #include "wtf/MainThread.h"
31 #include "wtf/OwnPtr.h"
32 #include "wtf/PartitionAlloc.h"
33 #include "wtf/Partitions.h"
34 #include "wtf/PassOwnPtr.h"
35 #include "wtf/StdLibExtras.h"
36 #include "wtf/text/AtomicString.h"
37 #include "wtf/text/CharacterNames.h"
38 #include "wtf/text/StringBuffer.h"
39 #include "wtf/text/StringHash.h"
40 #include <unicode/translit.h>
41 #include <unicode/unistr.h>
44 #include "wtf/DataLog.h"
45 #include "wtf/HashMap.h"
46 #include "wtf/HashSet.h"
47 #include "wtf/RefCounted.h"
48 #include "wtf/ThreadingPrimitives.h"
56 using namespace Unicode
;
58 static_assert(sizeof(StringImpl
) == 3 * sizeof(int), "StringImpl should stay small");
62 static Mutex
& statsMutex()
64 DEFINE_STATIC_LOCAL(Mutex
, mutex
, ());
68 static HashSet
<void*>& liveStrings()
70 // Notice that we can't use HashSet<StringImpl*> because then HashSet would dedup identical strings.
71 DEFINE_STATIC_LOCAL(HashSet
<void*>, strings
, ());
75 void addStringForStats(StringImpl
* string
)
77 MutexLocker
locker(statsMutex());
78 liveStrings().add(string
);
81 void removeStringForStats(StringImpl
* string
)
83 MutexLocker
locker(statsMutex());
84 liveStrings().remove(string
);
87 static void fillWithSnippet(const StringImpl
* string
, Vector
<char>& snippet
)
89 const unsigned kMaxSnippetLength
= 64;
92 size_t expectedLength
= std::min(string
->length(), kMaxSnippetLength
);
93 if (expectedLength
== kMaxSnippetLength
)
94 expectedLength
+= 3; // For the "...".
95 ++expectedLength
; // For the terminating '\0'.
96 snippet
.reserveCapacity(expectedLength
);
99 for (i
= 0; i
< string
->length() && i
< kMaxSnippetLength
; ++i
) {
100 UChar c
= (*string
)[i
];
101 if (isASCIIPrintable(c
))
106 if (i
< string
->length()) {
111 snippet
.append('\0');
114 static bool isUnnecessarilyWide(const StringImpl
* string
)
116 if (string
->is8Bit())
119 for (unsigned i
= 0; i
< string
->length(); ++i
)
120 c
|= (*string
)[i
] >> 8;
124 class PerStringStats
: public RefCounted
<PerStringStats
> {
126 static PassRefPtr
<PerStringStats
> create()
128 return adoptRef(new PerStringStats
);
131 void add(const StringImpl
* string
)
135 m_length
= string
->length();
136 fillWithSnippet(string
, m_snippet
);
138 if (string
->isAtomic())
139 ++m_numberOfAtomicCopies
;
140 if (isUnnecessarilyWide(string
))
141 m_unnecessarilyWide
= true;
144 size_t totalCharacters() const
146 return m_numberOfCopies
* m_length
;
151 const char* status
= "ok";
152 if (m_unnecessarilyWide
)
154 dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies
, status
, m_length
, m_snippet
.data());
157 bool m_unnecessarilyWide
;
158 unsigned m_numberOfCopies
;
160 unsigned m_numberOfAtomicCopies
;
161 Vector
<char> m_snippet
;
165 : m_unnecessarilyWide(false)
166 , m_numberOfCopies(0)
168 , m_numberOfAtomicCopies(0)
173 bool operator<(const RefPtr
<PerStringStats
>& a
, const RefPtr
<PerStringStats
>& b
)
175 if (a
->m_unnecessarilyWide
!= b
->m_unnecessarilyWide
)
176 return !a
->m_unnecessarilyWide
&& b
->m_unnecessarilyWide
;
177 if (a
->totalCharacters() != b
->totalCharacters())
178 return a
->totalCharacters() < b
->totalCharacters();
179 if (a
->m_numberOfCopies
!= b
->m_numberOfCopies
)
180 return a
->m_numberOfCopies
< b
->m_numberOfCopies
;
181 if (a
->m_length
!= b
->m_length
)
182 return a
->m_length
< b
->m_length
;
183 return a
->m_numberOfAtomicCopies
< b
->m_numberOfAtomicCopies
;
186 static void printLiveStringStats(void*)
188 MutexLocker
locker(statsMutex());
189 HashSet
<void*>& strings
= liveStrings();
191 HashMap
<StringImpl
*, RefPtr
<PerStringStats
>> stats
;
192 for (HashSet
<void*>::iterator iter
= strings
.begin(); iter
!= strings
.end(); ++iter
) {
193 StringImpl
* string
= static_cast<StringImpl
*>(*iter
);
194 HashMap
<StringImpl
*, RefPtr
<PerStringStats
>>::iterator entry
= stats
.find(string
);
195 RefPtr
<PerStringStats
> value
= entry
== stats
.end() ? RefPtr
<PerStringStats
>(PerStringStats::create()) : entry
->value
;
197 stats
.set(string
, value
.release());
200 Vector
<RefPtr
<PerStringStats
>> all
;
201 for (HashMap
<StringImpl
*, RefPtr
<PerStringStats
>>::iterator iter
= stats
.begin(); iter
!= stats
.end(); ++iter
)
202 all
.append(iter
->value
);
204 std::sort(all
.begin(), all
.end());
205 std::reverse(all
.begin(), all
.end());
206 for (size_t i
= 0; i
< 20 && i
< all
.size(); ++i
)
210 StringStats
StringImpl::m_stringStats
;
212 unsigned StringStats::s_stringRemovesTillPrintStats
= StringStats::s_printStringStatsFrequency
;
214 void StringStats::removeString(StringImpl
* string
)
216 unsigned length
= string
->length();
217 --m_totalNumberStrings
;
219 if (string
->is8Bit()) {
220 --m_number8BitStrings
;
221 m_total8BitData
-= length
;
223 --m_number16BitStrings
;
224 m_total16BitData
-= length
;
227 if (!--s_stringRemovesTillPrintStats
) {
228 s_stringRemovesTillPrintStats
= s_printStringStatsFrequency
;
233 void StringStats::printStats()
235 dataLogF("String stats for process id %d:\n", getpid());
237 unsigned long long totalNumberCharacters
= m_total8BitData
+ m_total16BitData
;
238 double percent8Bit
= m_totalNumberStrings
? ((double)m_number8BitStrings
* 100) / (double)m_totalNumberStrings
: 0.0;
239 double average8bitLength
= m_number8BitStrings
? (double)m_total8BitData
/ (double)m_number8BitStrings
: 0.0;
240 dataLogF("%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number8BitStrings
, percent8Bit
, m_total8BitData
, m_total8BitData
, average8bitLength
);
242 double percent16Bit
= m_totalNumberStrings
? ((double)m_number16BitStrings
* 100) / (double)m_totalNumberStrings
: 0.0;
243 double average16bitLength
= m_number16BitStrings
? (double)m_total16BitData
/ (double)m_number16BitStrings
: 0.0;
244 dataLogF("%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number16BitStrings
, percent16Bit
, m_total16BitData
, m_total16BitData
* 2, average16bitLength
);
246 double averageLength
= m_totalNumberStrings
? (double)totalNumberCharacters
/ (double)m_totalNumberStrings
: 0.0;
247 unsigned long long totalDataBytes
= m_total8BitData
+ m_total16BitData
* 2;
248 dataLogF("%8u Total %12llu chars %12llu bytes avg length %6.1f\n", m_totalNumberStrings
, totalNumberCharacters
, totalDataBytes
, averageLength
);
249 unsigned long long totalSavedBytes
= m_total8BitData
;
250 double percentSavings
= totalSavedBytes
? ((double)totalSavedBytes
* 100) / (double)(totalDataBytes
+ totalSavedBytes
) : 0.0;
251 dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes
, percentSavings
);
253 unsigned totalOverhead
= m_totalNumberStrings
* sizeof(StringImpl
);
254 double overheadPercent
= (double)totalOverhead
/ (double)totalDataBytes
* 100;
255 dataLogF(" StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead
, overheadPercent
);
257 internal::callOnMainThread(&printLiveStringStats
, nullptr);
261 void* StringImpl::operator new(size_t size
)
263 ASSERT(size
== sizeof(StringImpl
));
264 return Partitions::bufferMalloc(size
);
267 void StringImpl::operator delete(void* ptr
)
269 Partitions::bufferFree(ptr
);
272 inline StringImpl::~StringImpl()
276 STRING_STATS_REMOVE_STRING(this);
279 AtomicString::remove(this);
282 void StringImpl::destroyIfNotStatic()
288 PassRefPtr
<StringImpl
> StringImpl::createUninitialized(unsigned length
, LChar
*& data
)
295 // Allocate a single buffer large enough to contain the StringImpl
296 // struct as well as the data which it contains. This removes one
297 // heap allocation from this call.
298 StringImpl
* string
= static_cast<StringImpl
*>(Partitions::bufferMalloc(allocationSize
<LChar
>(length
)));
300 data
= reinterpret_cast<LChar
*>(string
+ 1);
301 return adoptRef(new (string
) StringImpl(length
, Force8BitConstructor
));
304 PassRefPtr
<StringImpl
> StringImpl::createUninitialized(unsigned length
, UChar
*& data
)
311 // Allocate a single buffer large enough to contain the StringImpl
312 // struct as well as the data which it contains. This removes one
313 // heap allocation from this call.
314 StringImpl
* string
= static_cast<StringImpl
*>(Partitions::bufferMalloc(allocationSize
<UChar
>(length
)));
316 data
= reinterpret_cast<UChar
*>(string
+ 1);
317 return adoptRef(new (string
) StringImpl(length
));
320 PassRefPtr
<StringImpl
> StringImpl::reallocate(PassRefPtr
<StringImpl
> originalString
, unsigned length
)
322 ASSERT(originalString
->hasOneRef());
327 bool is8Bit
= originalString
->is8Bit();
328 // Same as createUninitialized() except here we use realloc.
329 size_t size
= is8Bit
? allocationSize
<LChar
>(length
) : allocationSize
<UChar
>(length
);
330 originalString
->~StringImpl();
331 StringImpl
* string
= static_cast<StringImpl
*>(Partitions::bufferRealloc(originalString
.leakRef(), size
));
333 return adoptRef(new (string
) StringImpl(length
, Force8BitConstructor
));
334 return adoptRef(new (string
) StringImpl(length
));
337 static StaticStringsTable
& staticStrings()
339 DEFINE_STATIC_LOCAL(StaticStringsTable
, staticStrings
, ());
340 return staticStrings
;
344 static bool s_allowCreationOfStaticStrings
= true;
347 const StaticStringsTable
& StringImpl::allStaticStrings()
349 return staticStrings();
352 void StringImpl::freezeStaticStrings()
354 ASSERT(isMainThread());
357 s_allowCreationOfStaticStrings
= false;
361 unsigned StringImpl::m_highestStaticStringLength
= 0;
363 StringImpl
* StringImpl::createStatic(const char* string
, unsigned length
, unsigned hash
)
365 ASSERT(s_allowCreationOfStaticStrings
);
369 StaticStringsTable::const_iterator it
= staticStrings().find(hash
);
370 if (it
!= staticStrings().end()) {
371 ASSERT(!memcmp(string
, it
->value
+ 1, length
* sizeof(LChar
)));
375 // Allocate a single buffer large enough to contain the StringImpl
376 // struct as well as the data which it contains. This removes one
377 // heap allocation from this call.
378 RELEASE_ASSERT(length
<= ((std::numeric_limits
<unsigned>::max() - sizeof(StringImpl
)) / sizeof(LChar
)));
379 size_t size
= sizeof(StringImpl
) + length
* sizeof(LChar
);
381 WTF_ANNOTATE_SCOPED_MEMORY_LEAK
;
382 StringImpl
* impl
= static_cast<StringImpl
*>(Partitions::bufferMalloc(size
));
384 LChar
* data
= reinterpret_cast<LChar
*>(impl
+ 1);
385 impl
= new (impl
) StringImpl(length
, hash
, StaticString
);
386 memcpy(data
, string
, length
* sizeof(LChar
));
388 impl
->assertHashIsCorrect();
391 ASSERT(isMainThread());
392 m_highestStaticStringLength
= std::max(m_highestStaticStringLength
, length
);
393 staticStrings().add(hash
, impl
);
394 WTF_ANNOTATE_BENIGN_RACE(impl
,
395 "Benign race on the reference counter of a static string created by StringImpl::createStatic");
400 PassRefPtr
<StringImpl
> StringImpl::create(const UChar
* characters
, unsigned length
)
402 if (!characters
|| !length
)
406 RefPtr
<StringImpl
> string
= createUninitialized(length
, data
);
407 memcpy(data
, characters
, length
* sizeof(UChar
));
408 return string
.release();
411 PassRefPtr
<StringImpl
> StringImpl::create(const LChar
* characters
, unsigned length
)
413 if (!characters
|| !length
)
417 RefPtr
<StringImpl
> string
= createUninitialized(length
, data
);
418 memcpy(data
, characters
, length
* sizeof(LChar
));
419 return string
.release();
422 PassRefPtr
<StringImpl
> StringImpl::create8BitIfPossible(const UChar
* characters
, unsigned length
)
424 if (!characters
|| !length
)
428 RefPtr
<StringImpl
> string
= createUninitialized(length
, data
);
430 for (size_t i
= 0; i
< length
; ++i
) {
431 if (characters
[i
] & 0xff00)
432 return create(characters
, length
);
433 data
[i
] = static_cast<LChar
>(characters
[i
]);
436 return string
.release();
439 PassRefPtr
<StringImpl
> StringImpl::create(const LChar
* string
)
443 size_t length
= strlen(reinterpret_cast<const char*>(string
));
444 RELEASE_ASSERT(length
<= numeric_limits
<unsigned>::max());
445 return create(string
, length
);
448 bool StringImpl::containsOnlyWhitespace()
450 // FIXME: The definition of whitespace here includes a number of characters
451 // that are not whitespace from the point of view of LayoutText; I wonder if
452 // that's a problem in practice.
454 for (unsigned i
= 0; i
< m_length
; ++i
) {
455 UChar c
= characters8()[i
];
456 if (!isASCIISpace(c
))
463 for (unsigned i
= 0; i
< m_length
; ++i
) {
464 UChar c
= characters16()[i
];
465 if (!isASCIISpace(c
))
471 PassRefPtr
<StringImpl
> StringImpl::substring(unsigned start
, unsigned length
)
473 if (start
>= m_length
)
475 unsigned maxLength
= m_length
- start
;
476 if (length
>= maxLength
) {
482 return create(characters8() + start
, length
);
484 return create(characters16() + start
, length
);
487 UChar32
StringImpl::characterStartingAt(unsigned i
)
490 return characters8()[i
];
491 if (U16_IS_SINGLE(characters16()[i
]))
492 return characters16()[i
];
493 if (i
+ 1 < m_length
&& U16_IS_LEAD(characters16()[i
]) && U16_IS_TRAIL(characters16()[i
+ 1]))
494 return U16_GET_SUPPLEMENTARY(characters16()[i
], characters16()[i
+ 1]);
498 PassRefPtr
<StringImpl
> StringImpl::lower()
500 // Note: This is a hot function in the Dromaeo benchmark, specifically the
501 // no-op code path up through the first 'return' statement.
503 // First scan the string for uppercase and non-ASCII characters:
505 unsigned firstIndexToBeLowered
= m_length
;
506 for (unsigned i
= 0; i
< m_length
; ++i
) {
507 LChar ch
= characters8()[i
];
508 if (UNLIKELY(isASCIIUpper(ch
) || ch
& ~0x7F)) {
509 firstIndexToBeLowered
= i
;
514 // Nothing to do if the string is all ASCII with no uppercase.
515 if (firstIndexToBeLowered
== m_length
)
519 RefPtr
<StringImpl
> newImpl
= createUninitialized(m_length
, data8
);
520 memcpy(data8
, characters8(), firstIndexToBeLowered
);
522 for (unsigned i
= firstIndexToBeLowered
; i
< m_length
; ++i
) {
523 LChar ch
= characters8()[i
];
524 data8
[i
] = UNLIKELY(ch
& ~0x7F) ? static_cast<LChar
>(Unicode::toLower(ch
))
528 return newImpl
.release();
534 const UChar
* end
= characters16() + m_length
;
535 for (const UChar
* chp
= characters16(); chp
!= end
; ++chp
) {
536 if (UNLIKELY(isASCIIUpper(*chp
)))
540 // Nothing to do if the string is all ASCII with no uppercase.
541 if (noUpper
&& !(ored
& ~0x7F))
544 RELEASE_ASSERT(m_length
<= static_cast<unsigned>(numeric_limits
<int32_t>::max()));
545 int32_t length
= m_length
;
547 if (!(ored
& ~0x7F)) {
549 RefPtr
<StringImpl
> newImpl
= createUninitialized(m_length
, data16
);
551 for (int32_t i
= 0; i
< length
; ++i
) {
552 UChar c
= characters16()[i
];
553 data16
[i
] = toASCIILower(c
);
555 return newImpl
.release();
558 // Do a slower implementation for cases that include non-ASCII characters.
560 RefPtr
<StringImpl
> newImpl
= createUninitialized(m_length
, data16
);
563 int32_t realLength
= Unicode::toLower(data16
, length
, characters16(), m_length
, &error
);
564 if (!error
&& realLength
== length
)
565 return newImpl
.release();
567 newImpl
= createUninitialized(realLength
, data16
);
568 Unicode::toLower(data16
, realLength
, characters16(), m_length
, &error
);
571 return newImpl
.release();
574 PassRefPtr
<StringImpl
> StringImpl::upper()
576 // This function could be optimized for no-op cases the way lower() is,
577 // but in empirical testing, few actual calls to upper() are no-ops, so
578 // it wouldn't be worth the extra time for pre-scanning.
580 RELEASE_ASSERT(m_length
<= static_cast<unsigned>(numeric_limits
<int32_t>::max()));
581 int32_t length
= m_length
;
585 RefPtr
<StringImpl
> newImpl
= createUninitialized(m_length
, data8
);
587 // Do a faster loop for the case where all the characters are ASCII.
589 for (int i
= 0; i
< length
; ++i
) {
590 LChar c
= characters8()[i
];
592 data8
[i
] = toASCIIUpper(c
);
595 return newImpl
.release();
597 // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
598 int numberSharpSCharacters
= 0;
600 // There are two special cases.
601 // 1. latin-1 characters when converted to upper case are 16 bit characters.
602 // 2. Lower case sharp-S converts to "SS" (two characters)
603 for (int32_t i
= 0; i
< length
; ++i
) {
604 LChar c
= characters8()[i
];
605 if (UNLIKELY(c
== smallLetterSharpSCharacter
))
606 ++numberSharpSCharacters
;
607 UChar upper
= static_cast<UChar
>(Unicode::toUpper(c
));
608 if (UNLIKELY(upper
> 0xff)) {
609 // Since this upper-cased character does not fit in an 8-bit string, we need to take the 16-bit path.
612 data8
[i
] = static_cast<LChar
>(upper
);
615 if (!numberSharpSCharacters
)
616 return newImpl
.release();
618 // We have numberSSCharacters sharp-s characters, but none of the other special characters.
619 newImpl
= createUninitialized(m_length
+ numberSharpSCharacters
, data8
);
623 for (int32_t i
= 0; i
< length
; ++i
) {
624 LChar c
= characters8()[i
];
625 if (c
== smallLetterSharpSCharacter
) {
629 *dest
++ = static_cast<LChar
>(Unicode::toUpper(c
));
632 return newImpl
.release();
636 RefPtr
<StringImpl
> upconverted
= upconvertedString();
637 const UChar
* source16
= upconverted
->characters16();
640 RefPtr
<StringImpl
> newImpl
= createUninitialized(m_length
, data16
);
642 // Do a faster loop for the case where all the characters are ASCII.
644 for (int i
= 0; i
< length
; ++i
) {
645 UChar c
= source16
[i
];
647 data16
[i
] = toASCIIUpper(c
);
650 return newImpl
.release();
652 // Do a slower implementation for cases that include non-ASCII characters.
654 int32_t realLength
= Unicode::toUpper(data16
, length
, source16
, m_length
, &error
);
655 if (!error
&& realLength
== length
)
657 newImpl
= createUninitialized(realLength
, data16
);
658 Unicode::toUpper(data16
, realLength
, source16
, m_length
, &error
);
661 return newImpl
.release();
664 static bool inline localeIdMatchesLang(const AtomicString
& localeId
, const char* lang
)
666 if (equalIgnoringCase(localeId
, lang
))
668 static char localeIdPrefix
[4];
669 static const char delimeter
[4] = "-_@";
671 size_t langLength
= strlen(lang
);
672 RELEASE_ASSERT(langLength
>= 2 && langLength
<= 3);
673 strncpy(localeIdPrefix
, lang
, langLength
);
674 for (int i
= 0; i
< 3; ++i
) {
675 localeIdPrefix
[langLength
] = delimeter
[i
];
676 // case-insensitive comparison
677 if (localeId
.impl() && localeId
.impl()->startsWith(localeIdPrefix
, langLength
+ 1, TextCaseInsensitive
))
683 typedef int32_t (*icuCaseConverter
)(UChar
*, int32_t, const UChar
*, int32_t, const char*, UErrorCode
*);
685 static PassRefPtr
<StringImpl
> caseConvert(const UChar
* source16
, size_t length
, icuCaseConverter converter
, const char* locale
, StringImpl
* originalString
)
688 size_t targetLength
= length
;
689 RefPtr
<StringImpl
> output
= StringImpl::createUninitialized(length
, data16
);
691 UErrorCode status
= U_ZERO_ERROR
;
692 targetLength
= converter(data16
, targetLength
, source16
, length
, locale
, &status
);
693 if (U_SUCCESS(status
)) {
695 output
->truncateAssumingIsolated(targetLength
);
696 return output
.release();
698 if (status
!= U_BUFFER_OVERFLOW_ERROR
)
699 return originalString
;
700 // Expand the buffer.
701 output
= StringImpl::createUninitialized(targetLength
, data16
);
705 PassRefPtr
<StringImpl
> StringImpl::lower(const AtomicString
& localeIdentifier
)
707 // Use the more-optimized code path most of the time.
708 // Only Turkic (tr and az) languages and Lithuanian requires
709 // locale-specific lowercasing rules. Even though CLDR has el-Lower,
710 // it's identical to the locale-agnostic lowercasing. Context-dependent
711 // handling of Greek capital sigma is built into the common lowercasing
713 const char* localeForConversion
= 0;
714 if (localeIdMatchesLang(localeIdentifier
, "tr") || localeIdMatchesLang(localeIdentifier
, "az"))
715 localeForConversion
= "tr";
716 else if (localeIdMatchesLang(localeIdentifier
, "lt"))
717 localeForConversion
= "lt";
721 if (m_length
> static_cast<unsigned>(numeric_limits
<int32_t>::max()))
723 int length
= m_length
;
725 RefPtr
<StringImpl
> upconverted
= upconvertedString();
726 const UChar
* source16
= upconverted
->characters16();
727 return caseConvert(source16
, length
, u_strToLower
, localeForConversion
, this);
730 PassRefPtr
<StringImpl
> StringImpl::upper(const AtomicString
& localeIdentifier
)
732 // Use the more-optimized code path most of the time.
733 // Only Turkic (tr and az) languages and Greek require locale-specific
734 // lowercasing rules.
735 icu::UnicodeString transliteratorId
;
736 const char* localeForConversion
= 0;
737 if (localeIdMatchesLang(localeIdentifier
, "tr") || localeIdMatchesLang(localeIdentifier
, "az"))
738 localeForConversion
= "tr";
739 else if (localeIdMatchesLang(localeIdentifier
, "el"))
740 transliteratorId
= UNICODE_STRING_SIMPLE("el-Upper");
741 else if (localeIdMatchesLang(localeIdentifier
, "lt"))
742 localeForConversion
= "lt";
746 if (m_length
> static_cast<unsigned>(numeric_limits
<int32_t>::max()))
748 int length
= m_length
;
750 RefPtr
<StringImpl
> upconverted
= upconvertedString();
751 const UChar
* source16
= upconverted
->characters16();
753 if (localeForConversion
)
754 return caseConvert(source16
, length
, u_strToUpper
, localeForConversion
, this);
756 // TODO(jungshik): Cache transliterator if perf penaly warrants it for Greek.
757 UErrorCode status
= U_ZERO_ERROR
;
758 OwnPtr
<icu::Transliterator
> translit
=
759 adoptPtr(icu::Transliterator::createInstance(transliteratorId
, UTRANS_FORWARD
, status
));
760 if (U_FAILURE(status
))
763 // target will be copy-on-write.
764 icu::UnicodeString
target(false, source16
, length
);
765 translit
->transliterate(target
);
767 return create(target
.getBuffer(), target
.length());
770 PassRefPtr
<StringImpl
> StringImpl::fill(UChar character
)
772 if (!(character
& ~0x7F)) {
774 RefPtr
<StringImpl
> newImpl
= createUninitialized(m_length
, data
);
775 for (unsigned i
= 0; i
< m_length
; ++i
)
776 data
[i
] = static_cast<LChar
>(character
);
777 return newImpl
.release();
780 RefPtr
<StringImpl
> newImpl
= createUninitialized(m_length
, data
);
781 for (unsigned i
= 0; i
< m_length
; ++i
)
783 return newImpl
.release();
786 PassRefPtr
<StringImpl
> StringImpl::foldCase()
788 RELEASE_ASSERT(m_length
<= static_cast<unsigned>(numeric_limits
<int32_t>::max()));
789 int32_t length
= m_length
;
792 // Do a faster loop for the case where all the characters are ASCII.
794 RefPtr
<StringImpl
>newImpl
= createUninitialized(m_length
, data
);
797 for (int32_t i
= 0; i
< length
; ++i
) {
798 LChar c
= characters8()[i
];
799 data
[i
] = toASCIILower(c
);
804 return newImpl
.release();
806 // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
807 for (int32_t i
= 0; i
< length
; ++i
)
808 data
[i
] = static_cast<LChar
>(Unicode::toLower(characters8()[i
]));
810 return newImpl
.release();
813 // Do a faster loop for the case where all the characters are ASCII.
815 RefPtr
<StringImpl
> newImpl
= createUninitialized(m_length
, data
);
817 for (int32_t i
= 0; i
< length
; ++i
) {
818 UChar c
= characters16()[i
];
820 data
[i
] = toASCIILower(c
);
823 return newImpl
.release();
825 // Do a slower implementation for cases that include non-ASCII characters.
827 int32_t realLength
= Unicode::foldCase(data
, length
, characters16(), m_length
, &error
);
828 if (!error
&& realLength
== length
)
829 return newImpl
.release();
830 newImpl
= createUninitialized(realLength
, data
);
831 Unicode::foldCase(data
, realLength
, characters16(), m_length
, &error
);
834 return newImpl
.release();
837 template <class UCharPredicate
>
838 inline PassRefPtr
<StringImpl
> StringImpl::stripMatchedCharacters(UCharPredicate predicate
)
844 unsigned end
= m_length
- 1;
846 // skip white space from start
847 while (start
<= end
&& predicate(is8Bit() ? characters8()[start
] : characters16()[start
]))
854 // skip white space from end
855 while (end
&& predicate(is8Bit() ? characters8()[end
] : characters16()[end
]))
858 if (!start
&& end
== m_length
- 1)
861 return create(characters8() + start
, end
+ 1 - start
);
862 return create(characters16() + start
, end
+ 1 - start
);
865 class UCharPredicate
{
867 inline UCharPredicate(CharacterMatchFunctionPtr function
): m_function(function
) { }
869 inline bool operator()(UChar ch
) const
871 return m_function(ch
);
875 const CharacterMatchFunctionPtr m_function
;
878 class SpaceOrNewlinePredicate
{
880 inline bool operator()(UChar ch
) const
882 return isSpaceOrNewline(ch
);
886 PassRefPtr
<StringImpl
> StringImpl::stripWhiteSpace()
888 return stripMatchedCharacters(SpaceOrNewlinePredicate());
891 PassRefPtr
<StringImpl
> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace
)
893 return stripMatchedCharacters(UCharPredicate(isWhiteSpace
));
896 template <typename CharType
>
897 ALWAYS_INLINE PassRefPtr
<StringImpl
> StringImpl::removeCharacters(const CharType
* characters
, CharacterMatchFunctionPtr findMatch
)
899 const CharType
* from
= characters
;
900 const CharType
* fromend
= from
+ m_length
;
902 // Assume the common case will not remove any characters
903 while (from
!= fromend
&& !findMatch(*from
))
908 StringBuffer
<CharType
> data(m_length
);
909 CharType
* to
= data
.characters();
910 unsigned outc
= from
- characters
;
913 memcpy(to
, characters
, outc
* sizeof(CharType
));
916 while (from
!= fromend
&& findMatch(*from
))
918 while (from
!= fromend
&& !findMatch(*from
))
919 to
[outc
++] = *from
++;
926 return data
.release();
929 PassRefPtr
<StringImpl
> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch
)
932 return removeCharacters(characters8(), findMatch
);
933 return removeCharacters(characters16(), findMatch
);
936 template <typename CharType
, class UCharPredicate
>
937 inline PassRefPtr
<StringImpl
> StringImpl::simplifyMatchedCharactersToSpace(UCharPredicate predicate
, StripBehavior stripBehavior
)
939 StringBuffer
<CharType
> data(m_length
);
941 const CharType
* from
= getCharacters
<CharType
>();
942 const CharType
* fromend
= from
+ m_length
;
944 bool changedToSpace
= false;
946 CharType
* to
= data
.characters();
948 if (stripBehavior
== StripExtraWhiteSpace
) {
950 while (from
!= fromend
&& predicate(*from
)) {
952 changedToSpace
= true;
955 while (from
!= fromend
&& !predicate(*from
))
956 to
[outc
++] = *from
++;
963 if (outc
> 0 && to
[outc
- 1] == ' ')
966 for (; from
!= fromend
; ++from
) {
967 if (predicate(*from
)) {
969 changedToSpace
= true;
977 if (static_cast<unsigned>(outc
) == m_length
&& !changedToSpace
)
982 return data
.release();
985 PassRefPtr
<StringImpl
> StringImpl::simplifyWhiteSpace(StripBehavior stripBehavior
)
988 return StringImpl::simplifyMatchedCharactersToSpace
<LChar
>(SpaceOrNewlinePredicate(), stripBehavior
);
989 return StringImpl::simplifyMatchedCharactersToSpace
<UChar
>(SpaceOrNewlinePredicate(), stripBehavior
);
992 PassRefPtr
<StringImpl
> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace
, StripBehavior stripBehavior
)
995 return StringImpl::simplifyMatchedCharactersToSpace
<LChar
>(UCharPredicate(isWhiteSpace
), stripBehavior
);
996 return StringImpl::simplifyMatchedCharactersToSpace
<UChar
>(UCharPredicate(isWhiteSpace
), stripBehavior
);
999 int StringImpl::toIntStrict(bool* ok
, int base
)
1002 return charactersToIntStrict(characters8(), m_length
, ok
, base
);
1003 return charactersToIntStrict(characters16(), m_length
, ok
, base
);
1006 unsigned StringImpl::toUIntStrict(bool* ok
, int base
)
1009 return charactersToUIntStrict(characters8(), m_length
, ok
, base
);
1010 return charactersToUIntStrict(characters16(), m_length
, ok
, base
);
1013 int64_t StringImpl::toInt64Strict(bool* ok
, int base
)
1016 return charactersToInt64Strict(characters8(), m_length
, ok
, base
);
1017 return charactersToInt64Strict(characters16(), m_length
, ok
, base
);
1020 uint64_t StringImpl::toUInt64Strict(bool* ok
, int base
)
1023 return charactersToUInt64Strict(characters8(), m_length
, ok
, base
);
1024 return charactersToUInt64Strict(characters16(), m_length
, ok
, base
);
1027 int StringImpl::toInt(bool* ok
)
1030 return charactersToInt(characters8(), m_length
, ok
);
1031 return charactersToInt(characters16(), m_length
, ok
);
1034 unsigned StringImpl::toUInt(bool* ok
)
1037 return charactersToUInt(characters8(), m_length
, ok
);
1038 return charactersToUInt(characters16(), m_length
, ok
);
1041 int64_t StringImpl::toInt64(bool* ok
)
1044 return charactersToInt64(characters8(), m_length
, ok
);
1045 return charactersToInt64(characters16(), m_length
, ok
);
1048 uint64_t StringImpl::toUInt64(bool* ok
)
1051 return charactersToUInt64(characters8(), m_length
, ok
);
1052 return charactersToUInt64(characters16(), m_length
, ok
);
1055 double StringImpl::toDouble(bool* ok
)
1058 return charactersToDouble(characters8(), m_length
, ok
);
1059 return charactersToDouble(characters16(), m_length
, ok
);
1062 float StringImpl::toFloat(bool* ok
)
1065 return charactersToFloat(characters8(), m_length
, ok
);
1066 return charactersToFloat(characters16(), m_length
, ok
);
1069 // Table is based on ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt
1070 const UChar
StringImpl::latin1CaseFoldTable
[256] = {
1071 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
1072 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
1073 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
1074 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
1075 0x0040, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
1076 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
1077 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
1078 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
1079 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
1080 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
1081 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
1082 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x03bc, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
1083 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
1084 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00d7, 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00df,
1085 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
1086 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
1089 bool equalIgnoringCase(const LChar
* a
, const LChar
* b
, unsigned length
)
1092 if (StringImpl::latin1CaseFoldTable
[*a
++] != StringImpl::latin1CaseFoldTable
[*b
++])
1098 bool equalIgnoringCase(const UChar
* a
, const LChar
* b
, unsigned length
)
1101 if (foldCase(*a
++) != StringImpl::latin1CaseFoldTable
[*b
++])
1107 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction
, unsigned start
)
1110 return WTF::find(characters8(), m_length
, matchFunction
, start
);
1111 return WTF::find(characters16(), m_length
, matchFunction
, start
);
1114 size_t StringImpl::find(const LChar
* matchString
, unsigned index
)
1116 // Check for null or empty string to match against
1119 size_t matchStringLength
= strlen(reinterpret_cast<const char*>(matchString
));
1120 RELEASE_ASSERT(matchStringLength
<= numeric_limits
<unsigned>::max());
1121 unsigned matchLength
= matchStringLength
;
1123 return min(index
, length());
1125 // Optimization 1: fast case for strings of length 1.
1126 if (matchLength
== 1)
1127 return WTF::find(characters16(), length(), *matchString
, index
);
1129 // Check index & matchLength are in range.
1130 if (index
> length())
1132 unsigned searchLength
= length() - index
;
1133 if (matchLength
> searchLength
)
1135 // delta is the number of additional times to test; delta == 0 means test only once.
1136 unsigned delta
= searchLength
- matchLength
;
1138 const UChar
* searchCharacters
= characters16() + index
;
1140 // Optimization 2: keep a running hash of the strings,
1141 // only call equal if the hashes match.
1142 unsigned searchHash
= 0;
1143 unsigned matchHash
= 0;
1144 for (unsigned i
= 0; i
< matchLength
; ++i
) {
1145 searchHash
+= searchCharacters
[i
];
1146 matchHash
+= matchString
[i
];
1150 // keep looping until we match
1151 while (searchHash
!= matchHash
|| !equal(searchCharacters
+ i
, matchString
, matchLength
)) {
1154 searchHash
+= searchCharacters
[i
+ matchLength
];
1155 searchHash
-= searchCharacters
[i
];
1161 template<typename CharType
>
1162 ALWAYS_INLINE
size_t findIgnoringCaseInternal(const CharType
* searchCharacters
, const LChar
* matchString
, unsigned index
, unsigned searchLength
, unsigned matchLength
)
1164 // delta is the number of additional times to test; delta == 0 means test only once.
1165 unsigned delta
= searchLength
- matchLength
;
1168 while (!equalIgnoringCase(searchCharacters
+ i
, matchString
, matchLength
)) {
1176 size_t StringImpl::findIgnoringCase(const LChar
* matchString
, unsigned index
)
1178 // Check for null or empty string to match against
1181 size_t matchStringLength
= strlen(reinterpret_cast<const char*>(matchString
));
1182 RELEASE_ASSERT(matchStringLength
<= numeric_limits
<unsigned>::max());
1183 unsigned matchLength
= matchStringLength
;
1185 return min(index
, length());
1187 // Check index & matchLength are in range.
1188 if (index
> length())
1190 unsigned searchLength
= length() - index
;
1191 if (matchLength
> searchLength
)
1195 return findIgnoringCaseInternal(characters8() + index
, matchString
, index
, searchLength
, matchLength
);
1196 return findIgnoringCaseInternal(characters16() + index
, matchString
, index
, searchLength
, matchLength
);
1199 template <typename SearchCharacterType
, typename MatchCharacterType
>
1200 ALWAYS_INLINE
static size_t findInternal(const SearchCharacterType
* searchCharacters
, const MatchCharacterType
* matchCharacters
, unsigned index
, unsigned searchLength
, unsigned matchLength
)
1202 // Optimization: keep a running hash of the strings,
1203 // only call equal() if the hashes match.
1205 // delta is the number of additional times to test; delta == 0 means test only once.
1206 unsigned delta
= searchLength
- matchLength
;
1208 unsigned searchHash
= 0;
1209 unsigned matchHash
= 0;
1211 for (unsigned i
= 0; i
< matchLength
; ++i
) {
1212 searchHash
+= searchCharacters
[i
];
1213 matchHash
+= matchCharacters
[i
];
1217 // keep looping until we match
1218 while (searchHash
!= matchHash
|| !equal(searchCharacters
+ i
, matchCharacters
, matchLength
)) {
1221 searchHash
+= searchCharacters
[i
+ matchLength
];
1222 searchHash
-= searchCharacters
[i
];
1228 size_t StringImpl::find(StringImpl
* matchString
)
1230 // Check for null string to match against
1231 if (UNLIKELY(!matchString
))
1233 unsigned matchLength
= matchString
->length();
1235 // Optimization 1: fast case for strings of length 1.
1236 if (matchLength
== 1) {
1238 if (matchString
->is8Bit())
1239 return WTF::find(characters8(), length(), matchString
->characters8()[0]);
1240 return WTF::find(characters8(), length(), matchString
->characters16()[0]);
1242 if (matchString
->is8Bit())
1243 return WTF::find(characters16(), length(), matchString
->characters8()[0]);
1244 return WTF::find(characters16(), length(), matchString
->characters16()[0]);
1247 // Check matchLength is in range.
1248 if (matchLength
> length())
1251 // Check for empty string to match against
1252 if (UNLIKELY(!matchLength
))
1256 if (matchString
->is8Bit())
1257 return findInternal(characters8(), matchString
->characters8(), 0, length(), matchLength
);
1258 return findInternal(characters8(), matchString
->characters16(), 0, length(), matchLength
);
1261 if (matchString
->is8Bit())
1262 return findInternal(characters16(), matchString
->characters8(), 0, length(), matchLength
);
1264 return findInternal(characters16(), matchString
->characters16(), 0, length(), matchLength
);
1267 size_t StringImpl::find(StringImpl
* matchString
, unsigned index
)
1269 // Check for null or empty string to match against
1270 if (UNLIKELY(!matchString
))
1273 unsigned matchLength
= matchString
->length();
1275 // Optimization 1: fast case for strings of length 1.
1276 if (matchLength
== 1) {
1278 return WTF::find(characters8(), length(), (*matchString
)[0], index
);
1279 return WTF::find(characters16(), length(), (*matchString
)[0], index
);
1282 if (UNLIKELY(!matchLength
))
1283 return min(index
, length());
1285 // Check index & matchLength are in range.
1286 if (index
> length())
1288 unsigned searchLength
= length() - index
;
1289 if (matchLength
> searchLength
)
1293 if (matchString
->is8Bit())
1294 return findInternal(characters8() + index
, matchString
->characters8(), index
, searchLength
, matchLength
);
1295 return findInternal(characters8() + index
, matchString
->characters16(), index
, searchLength
, matchLength
);
1298 if (matchString
->is8Bit())
1299 return findInternal(characters16() + index
, matchString
->characters8(), index
, searchLength
, matchLength
);
1301 return findInternal(characters16() + index
, matchString
->characters16(), index
, searchLength
, matchLength
);
1304 template <typename SearchCharacterType
, typename MatchCharacterType
>
1305 ALWAYS_INLINE
static size_t findIgnoringCaseInner(const SearchCharacterType
* searchCharacters
, const MatchCharacterType
* matchCharacters
, unsigned index
, unsigned searchLength
, unsigned matchLength
)
1307 // delta is the number of additional times to test; delta == 0 means test only once.
1308 unsigned delta
= searchLength
- matchLength
;
1311 // keep looping until we match
1312 while (!equalIgnoringCase(searchCharacters
+ i
, matchCharacters
, matchLength
)) {
1320 size_t StringImpl::findIgnoringCase(StringImpl
* matchString
, unsigned index
)
1322 // Check for null or empty string to match against
1325 unsigned matchLength
= matchString
->length();
1327 return min(index
, length());
1329 // Check index & matchLength are in range.
1330 if (index
> length())
1332 unsigned searchLength
= length() - index
;
1333 if (matchLength
> searchLength
)
1337 if (matchString
->is8Bit())
1338 return findIgnoringCaseInner(characters8() + index
, matchString
->characters8(), index
, searchLength
, matchLength
);
1339 return findIgnoringCaseInner(characters8() + index
, matchString
->characters16(), index
, searchLength
, matchLength
);
1342 if (matchString
->is8Bit())
1343 return findIgnoringCaseInner(characters16() + index
, matchString
->characters8(), index
, searchLength
, matchLength
);
1345 return findIgnoringCaseInner(characters16() + index
, matchString
->characters16(), index
, searchLength
, matchLength
);
1348 size_t StringImpl::findNextLineStart(unsigned index
)
1351 return WTF::findNextLineStart(characters8(), m_length
, index
);
1352 return WTF::findNextLineStart(characters16(), m_length
, index
);
1355 size_t StringImpl::count(LChar c
) const
1359 for (size_t i
= 0; i
< m_length
; ++i
)
1360 count
+= characters8()[i
] == c
;
1362 for (size_t i
= 0; i
< m_length
; ++i
)
1363 count
+= characters16()[i
] == c
;
1368 size_t StringImpl::reverseFind(UChar c
, unsigned index
)
1371 return WTF::reverseFind(characters8(), m_length
, c
, index
);
1372 return WTF::reverseFind(characters16(), m_length
, c
, index
);
1375 template <typename SearchCharacterType
, typename MatchCharacterType
>
1376 ALWAYS_INLINE
static size_t reverseFindInner(const SearchCharacterType
* searchCharacters
, const MatchCharacterType
* matchCharacters
, unsigned index
, unsigned length
, unsigned matchLength
)
1378 // Optimization: keep a running hash of the strings,
1379 // only call equal if the hashes match.
1381 // delta is the number of additional times to test; delta == 0 means test only once.
1382 unsigned delta
= min(index
, length
- matchLength
);
1384 unsigned searchHash
= 0;
1385 unsigned matchHash
= 0;
1386 for (unsigned i
= 0; i
< matchLength
; ++i
) {
1387 searchHash
+= searchCharacters
[delta
+ i
];
1388 matchHash
+= matchCharacters
[i
];
1391 // keep looping until we match
1392 while (searchHash
!= matchHash
|| !equal(searchCharacters
+ delta
, matchCharacters
, matchLength
)) {
1396 searchHash
-= searchCharacters
[delta
+ matchLength
];
1397 searchHash
+= searchCharacters
[delta
];
1402 size_t StringImpl::reverseFind(StringImpl
* matchString
, unsigned index
)
1404 // Check for null or empty string to match against
1407 unsigned matchLength
= matchString
->length();
1408 unsigned ourLength
= length();
1410 return min(index
, ourLength
);
1412 // Optimization 1: fast case for strings of length 1.
1413 if (matchLength
== 1) {
1415 return WTF::reverseFind(characters8(), ourLength
, (*matchString
)[0], index
);
1416 return WTF::reverseFind(characters16(), ourLength
, (*matchString
)[0], index
);
1419 // Check index & matchLength are in range.
1420 if (matchLength
> ourLength
)
1424 if (matchString
->is8Bit())
1425 return reverseFindInner(characters8(), matchString
->characters8(), index
, ourLength
, matchLength
);
1426 return reverseFindInner(characters8(), matchString
->characters16(), index
, ourLength
, matchLength
);
1429 if (matchString
->is8Bit())
1430 return reverseFindInner(characters16(), matchString
->characters8(), index
, ourLength
, matchLength
);
1432 return reverseFindInner(characters16(), matchString
->characters16(), index
, ourLength
, matchLength
);
1435 template <typename SearchCharacterType
, typename MatchCharacterType
>
1436 ALWAYS_INLINE
static size_t reverseFindIgnoringCaseInner(const SearchCharacterType
* searchCharacters
, const MatchCharacterType
* matchCharacters
, unsigned index
, unsigned length
, unsigned matchLength
)
1438 // delta is the number of additional times to test; delta == 0 means test only once.
1439 unsigned delta
= min(index
, length
- matchLength
);
1441 // keep looping until we match
1442 while (!equalIgnoringCase(searchCharacters
+ delta
, matchCharacters
, matchLength
)) {
1450 size_t StringImpl::reverseFindIgnoringCase(StringImpl
* matchString
, unsigned index
)
1452 // Check for null or empty string to match against
1455 unsigned matchLength
= matchString
->length();
1456 unsigned ourLength
= length();
1458 return min(index
, ourLength
);
1460 // Check index & matchLength are in range.
1461 if (matchLength
> ourLength
)
1465 if (matchString
->is8Bit())
1466 return reverseFindIgnoringCaseInner(characters8(), matchString
->characters8(), index
, ourLength
, matchLength
);
1467 return reverseFindIgnoringCaseInner(characters8(), matchString
->characters16(), index
, ourLength
, matchLength
);
1470 if (matchString
->is8Bit())
1471 return reverseFindIgnoringCaseInner(characters16(), matchString
->characters8(), index
, ourLength
, matchLength
);
1473 return reverseFindIgnoringCaseInner(characters16(), matchString
->characters16(), index
, ourLength
, matchLength
);
1476 ALWAYS_INLINE
static bool equalInner(const StringImpl
* stringImpl
, unsigned startOffset
, const char* matchString
, unsigned matchLength
, TextCaseSensitivity caseSensitivity
)
1479 ASSERT(matchLength
<= stringImpl
->length());
1480 ASSERT(startOffset
+ matchLength
<= stringImpl
->length());
1482 if (caseSensitivity
== TextCaseSensitive
) {
1483 if (stringImpl
->is8Bit())
1484 return equal(stringImpl
->characters8() + startOffset
, reinterpret_cast<const LChar
*>(matchString
), matchLength
);
1485 return equal(stringImpl
->characters16() + startOffset
, reinterpret_cast<const LChar
*>(matchString
), matchLength
);
1487 if (stringImpl
->is8Bit())
1488 return equalIgnoringCase(stringImpl
->characters8() + startOffset
, reinterpret_cast<const LChar
*>(matchString
), matchLength
);
1489 return equalIgnoringCase(stringImpl
->characters16() + startOffset
, reinterpret_cast<const LChar
*>(matchString
), matchLength
);
1492 bool StringImpl::startsWith(UChar character
) const
1494 return m_length
&& (*this)[0] == character
;
1497 bool StringImpl::startsWith(const char* matchString
, unsigned matchLength
, TextCaseSensitivity caseSensitivity
) const
1499 ASSERT(matchLength
);
1500 if (matchLength
> length())
1502 return equalInner(this, 0, matchString
, matchLength
, caseSensitivity
);
1505 bool StringImpl::endsWith(StringImpl
* matchString
, TextCaseSensitivity caseSensitivity
)
1507 ASSERT(matchString
);
1508 if (m_length
>= matchString
->m_length
) {
1509 unsigned start
= m_length
- matchString
->m_length
;
1510 if (caseSensitivity
== TextCaseSensitive
)
1511 return find(matchString
, start
) == start
;
1512 return findIgnoringCase(matchString
, start
) == start
;
1517 bool StringImpl::endsWith(UChar character
) const
1519 return m_length
&& (*this)[m_length
- 1] == character
;
1522 bool StringImpl::endsWith(const char* matchString
, unsigned matchLength
, TextCaseSensitivity caseSensitivity
) const
1524 ASSERT(matchLength
);
1525 if (matchLength
> length())
1527 unsigned startOffset
= length() - matchLength
;
1528 return equalInner(this, startOffset
, matchString
, matchLength
, caseSensitivity
);
1531 PassRefPtr
<StringImpl
> StringImpl::replace(UChar oldC
, UChar newC
)
1536 if (find(oldC
) == kNotFound
)
1543 LChar oldChar
= static_cast<LChar
>(oldC
);
1544 LChar newChar
= static_cast<LChar
>(newC
);
1546 RefPtr
<StringImpl
> newImpl
= createUninitialized(m_length
, data
);
1548 for (i
= 0; i
!= m_length
; ++i
) {
1549 LChar ch
= characters8()[i
];
1554 return newImpl
.release();
1557 // There is the possibility we need to up convert from 8 to 16 bit,
1558 // create a 16 bit string for the result.
1560 RefPtr
<StringImpl
> newImpl
= createUninitialized(m_length
, data
);
1562 for (i
= 0; i
!= m_length
; ++i
) {
1563 UChar ch
= characters8()[i
];
1569 return newImpl
.release();
1573 RefPtr
<StringImpl
> newImpl
= createUninitialized(m_length
, data
);
1575 for (i
= 0; i
!= m_length
; ++i
) {
1576 UChar ch
= characters16()[i
];
1581 return newImpl
.release();
1584 PassRefPtr
<StringImpl
> StringImpl::replace(unsigned position
, unsigned lengthToReplace
, StringImpl
* str
)
1586 position
= min(position
, length());
1587 lengthToReplace
= min(lengthToReplace
, length() - position
);
1588 unsigned lengthToInsert
= str
? str
->length() : 0;
1589 if (!lengthToReplace
&& !lengthToInsert
)
1592 RELEASE_ASSERT((length() - lengthToReplace
) < (numeric_limits
<unsigned>::max() - lengthToInsert
));
1594 if (is8Bit() && (!str
|| str
->is8Bit())) {
1596 RefPtr
<StringImpl
> newImpl
=
1597 createUninitialized(length() - lengthToReplace
+ lengthToInsert
, data
);
1598 memcpy(data
, characters8(), position
* sizeof(LChar
));
1600 memcpy(data
+ position
, str
->characters8(), lengthToInsert
* sizeof(LChar
));
1601 memcpy(data
+ position
+ lengthToInsert
, characters8() + position
+ lengthToReplace
,
1602 (length() - position
- lengthToReplace
) * sizeof(LChar
));
1603 return newImpl
.release();
1606 RefPtr
<StringImpl
> newImpl
=
1607 createUninitialized(length() - lengthToReplace
+ lengthToInsert
, data
);
1609 for (unsigned i
= 0; i
< position
; ++i
)
1610 data
[i
] = characters8()[i
];
1612 memcpy(data
, characters16(), position
* sizeof(UChar
));
1615 for (unsigned i
= 0; i
< lengthToInsert
; ++i
)
1616 data
[i
+ position
] = str
->characters8()[i
];
1618 memcpy(data
+ position
, str
->characters16(), lengthToInsert
* sizeof(UChar
));
1621 for (unsigned i
= 0; i
< length() - position
- lengthToReplace
; ++i
)
1622 data
[i
+ position
+ lengthToInsert
] = characters8()[i
+ position
+ lengthToReplace
];
1624 memcpy(data
+ position
+ lengthToInsert
, characters16() + position
+ lengthToReplace
,
1625 (length() - position
- lengthToReplace
) * sizeof(UChar
));
1627 return newImpl
.release();
1630 PassRefPtr
<StringImpl
> StringImpl::replace(UChar pattern
, StringImpl
* replacement
)
1635 if (replacement
->is8Bit())
1636 return replace(pattern
, replacement
->characters8(), replacement
->length());
1638 return replace(pattern
, replacement
->characters16(), replacement
->length());
1641 PassRefPtr
<StringImpl
> StringImpl::replace(UChar pattern
, const LChar
* replacement
, unsigned repStrLength
)
1643 ASSERT(replacement
);
1645 size_t srcSegmentStart
= 0;
1646 unsigned matchCount
= 0;
1648 // Count the matches.
1649 while ((srcSegmentStart
= find(pattern
, srcSegmentStart
)) != kNotFound
) {
1654 // If we have 0 matches then we don't have to do any more work.
1658 RELEASE_ASSERT(!repStrLength
|| matchCount
<= numeric_limits
<unsigned>::max() / repStrLength
);
1660 unsigned replaceSize
= matchCount
* repStrLength
;
1661 unsigned newSize
= m_length
- matchCount
;
1662 RELEASE_ASSERT(newSize
< (numeric_limits
<unsigned>::max() - replaceSize
));
1664 newSize
+= replaceSize
;
1666 // Construct the new data.
1667 size_t srcSegmentEnd
;
1668 unsigned srcSegmentLength
;
1669 srcSegmentStart
= 0;
1670 unsigned dstOffset
= 0;
1674 RefPtr
<StringImpl
> newImpl
= createUninitialized(newSize
, data
);
1676 while ((srcSegmentEnd
= find(pattern
, srcSegmentStart
)) != kNotFound
) {
1677 srcSegmentLength
= srcSegmentEnd
- srcSegmentStart
;
1678 memcpy(data
+ dstOffset
, characters8() + srcSegmentStart
, srcSegmentLength
* sizeof(LChar
));
1679 dstOffset
+= srcSegmentLength
;
1680 memcpy(data
+ dstOffset
, replacement
, repStrLength
* sizeof(LChar
));
1681 dstOffset
+= repStrLength
;
1682 srcSegmentStart
= srcSegmentEnd
+ 1;
1685 srcSegmentLength
= m_length
- srcSegmentStart
;
1686 memcpy(data
+ dstOffset
, characters8() + srcSegmentStart
, srcSegmentLength
* sizeof(LChar
));
1688 ASSERT(dstOffset
+ srcSegmentLength
== newImpl
->length());
1690 return newImpl
.release();
1694 RefPtr
<StringImpl
> newImpl
= createUninitialized(newSize
, data
);
1696 while ((srcSegmentEnd
= find(pattern
, srcSegmentStart
)) != kNotFound
) {
1697 srcSegmentLength
= srcSegmentEnd
- srcSegmentStart
;
1698 memcpy(data
+ dstOffset
, characters16() + srcSegmentStart
, srcSegmentLength
* sizeof(UChar
));
1700 dstOffset
+= srcSegmentLength
;
1701 for (unsigned i
= 0; i
< repStrLength
; ++i
)
1702 data
[i
+ dstOffset
] = replacement
[i
];
1704 dstOffset
+= repStrLength
;
1705 srcSegmentStart
= srcSegmentEnd
+ 1;
1708 srcSegmentLength
= m_length
- srcSegmentStart
;
1709 memcpy(data
+ dstOffset
, characters16() + srcSegmentStart
, srcSegmentLength
* sizeof(UChar
));
1711 ASSERT(dstOffset
+ srcSegmentLength
== newImpl
->length());
1713 return newImpl
.release();
1716 PassRefPtr
<StringImpl
> StringImpl::replace(UChar pattern
, const UChar
* replacement
, unsigned repStrLength
)
1718 ASSERT(replacement
);
1720 size_t srcSegmentStart
= 0;
1721 unsigned matchCount
= 0;
1723 // Count the matches.
1724 while ((srcSegmentStart
= find(pattern
, srcSegmentStart
)) != kNotFound
) {
1729 // If we have 0 matches then we don't have to do any more work.
1733 RELEASE_ASSERT(!repStrLength
|| matchCount
<= numeric_limits
<unsigned>::max() / repStrLength
);
1735 unsigned replaceSize
= matchCount
* repStrLength
;
1736 unsigned newSize
= m_length
- matchCount
;
1737 RELEASE_ASSERT(newSize
< (numeric_limits
<unsigned>::max() - replaceSize
));
1739 newSize
+= replaceSize
;
1741 // Construct the new data.
1742 size_t srcSegmentEnd
;
1743 unsigned srcSegmentLength
;
1744 srcSegmentStart
= 0;
1745 unsigned dstOffset
= 0;
1749 RefPtr
<StringImpl
> newImpl
= createUninitialized(newSize
, data
);
1751 while ((srcSegmentEnd
= find(pattern
, srcSegmentStart
)) != kNotFound
) {
1752 srcSegmentLength
= srcSegmentEnd
- srcSegmentStart
;
1753 for (unsigned i
= 0; i
< srcSegmentLength
; ++i
)
1754 data
[i
+ dstOffset
] = characters8()[i
+ srcSegmentStart
];
1756 dstOffset
+= srcSegmentLength
;
1757 memcpy(data
+ dstOffset
, replacement
, repStrLength
* sizeof(UChar
));
1759 dstOffset
+= repStrLength
;
1760 srcSegmentStart
= srcSegmentEnd
+ 1;
1763 srcSegmentLength
= m_length
- srcSegmentStart
;
1764 for (unsigned i
= 0; i
< srcSegmentLength
; ++i
)
1765 data
[i
+ dstOffset
] = characters8()[i
+ srcSegmentStart
];
1767 ASSERT(dstOffset
+ srcSegmentLength
== newImpl
->length());
1769 return newImpl
.release();
1773 RefPtr
<StringImpl
> newImpl
= createUninitialized(newSize
, data
);
1775 while ((srcSegmentEnd
= find(pattern
, srcSegmentStart
)) != kNotFound
) {
1776 srcSegmentLength
= srcSegmentEnd
- srcSegmentStart
;
1777 memcpy(data
+ dstOffset
, characters16() + srcSegmentStart
, srcSegmentLength
* sizeof(UChar
));
1779 dstOffset
+= srcSegmentLength
;
1780 memcpy(data
+ dstOffset
, replacement
, repStrLength
* sizeof(UChar
));
1782 dstOffset
+= repStrLength
;
1783 srcSegmentStart
= srcSegmentEnd
+ 1;
1786 srcSegmentLength
= m_length
- srcSegmentStart
;
1787 memcpy(data
+ dstOffset
, characters16() + srcSegmentStart
, srcSegmentLength
* sizeof(UChar
));
1789 ASSERT(dstOffset
+ srcSegmentLength
== newImpl
->length());
1791 return newImpl
.release();
1794 PassRefPtr
<StringImpl
> StringImpl::replace(StringImpl
* pattern
, StringImpl
* replacement
)
1796 if (!pattern
|| !replacement
)
1799 unsigned patternLength
= pattern
->length();
1803 unsigned repStrLength
= replacement
->length();
1804 size_t srcSegmentStart
= 0;
1805 unsigned matchCount
= 0;
1807 // Count the matches.
1808 while ((srcSegmentStart
= find(pattern
, srcSegmentStart
)) != kNotFound
) {
1810 srcSegmentStart
+= patternLength
;
1813 // If we have 0 matches, we don't have to do any more work
1817 unsigned newSize
= m_length
- matchCount
* patternLength
;
1818 RELEASE_ASSERT(!repStrLength
|| matchCount
<= numeric_limits
<unsigned>::max() / repStrLength
);
1820 RELEASE_ASSERT(newSize
<= (numeric_limits
<unsigned>::max() - matchCount
* repStrLength
));
1822 newSize
+= matchCount
* repStrLength
;
1825 // Construct the new data
1826 size_t srcSegmentEnd
;
1827 unsigned srcSegmentLength
;
1828 srcSegmentStart
= 0;
1829 unsigned dstOffset
= 0;
1830 bool srcIs8Bit
= is8Bit();
1831 bool replacementIs8Bit
= replacement
->is8Bit();
1833 // There are 4 cases:
1834 // 1. This and replacement are both 8 bit.
1835 // 2. This and replacement are both 16 bit.
1836 // 3. This is 8 bit and replacement is 16 bit.
1837 // 4. This is 16 bit and replacement is 8 bit.
1838 if (srcIs8Bit
&& replacementIs8Bit
) {
1841 RefPtr
<StringImpl
> newImpl
= createUninitialized(newSize
, data
);
1842 while ((srcSegmentEnd
= find(pattern
, srcSegmentStart
)) != kNotFound
) {
1843 srcSegmentLength
= srcSegmentEnd
- srcSegmentStart
;
1844 memcpy(data
+ dstOffset
, characters8() + srcSegmentStart
, srcSegmentLength
* sizeof(LChar
));
1845 dstOffset
+= srcSegmentLength
;
1846 memcpy(data
+ dstOffset
, replacement
->characters8(), repStrLength
* sizeof(LChar
));
1847 dstOffset
+= repStrLength
;
1848 srcSegmentStart
= srcSegmentEnd
+ patternLength
;
1851 srcSegmentLength
= m_length
- srcSegmentStart
;
1852 memcpy(data
+ dstOffset
, characters8() + srcSegmentStart
, srcSegmentLength
* sizeof(LChar
));
1854 ASSERT(dstOffset
+ srcSegmentLength
== newImpl
->length());
1856 return newImpl
.release();
1860 RefPtr
<StringImpl
> newImpl
= createUninitialized(newSize
, data
);
1861 while ((srcSegmentEnd
= find(pattern
, srcSegmentStart
)) != kNotFound
) {
1862 srcSegmentLength
= srcSegmentEnd
- srcSegmentStart
;
1865 for (unsigned i
= 0; i
< srcSegmentLength
; ++i
)
1866 data
[i
+ dstOffset
] = characters8()[i
+ srcSegmentStart
];
1869 memcpy(data
+ dstOffset
, characters16() + srcSegmentStart
, srcSegmentLength
* sizeof(UChar
));
1871 dstOffset
+= srcSegmentLength
;
1872 if (replacementIs8Bit
) {
1874 for (unsigned i
= 0; i
< repStrLength
; ++i
)
1875 data
[i
+ dstOffset
] = replacement
->characters8()[i
];
1878 memcpy(data
+ dstOffset
, replacement
->characters16(), repStrLength
* sizeof(UChar
));
1880 dstOffset
+= repStrLength
;
1881 srcSegmentStart
= srcSegmentEnd
+ patternLength
;
1884 srcSegmentLength
= m_length
- srcSegmentStart
;
1887 for (unsigned i
= 0; i
< srcSegmentLength
; ++i
)
1888 data
[i
+ dstOffset
] = characters8()[i
+ srcSegmentStart
];
1891 memcpy(data
+ dstOffset
, characters16() + srcSegmentStart
, srcSegmentLength
* sizeof(UChar
));
1894 ASSERT(dstOffset
+ srcSegmentLength
== newImpl
->length());
1896 return newImpl
.release();
1899 PassRefPtr
<StringImpl
> StringImpl::upconvertedString()
1902 return String::make16BitFrom8BitSource(characters8(), m_length
).releaseImpl();
1906 static inline bool stringImplContentEqual(const StringImpl
* a
, const StringImpl
* b
)
1908 unsigned aLength
= a
->length();
1909 unsigned bLength
= b
->length();
1910 if (aLength
!= bLength
)
1915 return equal(a
->characters8(), b
->characters8(), aLength
);
1917 return equal(a
->characters8(), b
->characters16(), aLength
);
1921 return equal(a
->characters16(), b
->characters8(), aLength
);
1923 return equal(a
->characters16(), b
->characters16(), aLength
);
1926 bool equal(const StringImpl
* a
, const StringImpl
* b
)
1932 if (a
->isAtomic() && b
->isAtomic())
1935 return stringImplContentEqual(a
, b
);
1938 template <typename CharType
>
1939 inline bool equalInternal(const StringImpl
* a
, const CharType
* b
, unsigned length
)
1946 if (a
->length() != length
)
1949 return equal(a
->characters8(), b
, length
);
1950 return equal(a
->characters16(), b
, length
);
1953 bool equal(const StringImpl
* a
, const LChar
* b
, unsigned length
)
1955 return equalInternal(a
, b
, length
);
1958 bool equal(const StringImpl
* a
, const UChar
* b
, unsigned length
)
1960 return equalInternal(a
, b
, length
);
1963 bool equal(const StringImpl
* a
, const LChar
* b
)
1970 unsigned length
= a
->length();
1973 const LChar
* aPtr
= a
->characters8();
1974 for (unsigned i
= 0; i
!= length
; ++i
) {
1986 const UChar
* aPtr
= a
->characters16();
1987 for (unsigned i
= 0; i
!= length
; ++i
) {
1998 bool equalNonNull(const StringImpl
* a
, const StringImpl
* b
)
2004 return stringImplContentEqual(a
, b
);
2007 bool equalIgnoringCase(const StringImpl
* a
, const StringImpl
* b
)
2014 return CaseFoldingHash::equal(a
, b
);
2017 bool equalIgnoringCase(const StringImpl
* a
, const LChar
* b
)
2024 unsigned length
= a
->length();
2026 // Do a faster loop for the case where all the characters are ASCII.
2030 const LChar
* as
= a
->characters8();
2031 for (unsigned i
= 0; i
!= length
; ++i
) {
2037 equal
= equal
&& (toASCIILower(ac
) == toASCIILower(bc
));
2040 // Do a slower implementation for cases that include non-ASCII characters.
2043 for (unsigned i
= 0; i
!= length
; ++i
)
2044 equal
= equal
&& (foldCase(as
[i
]) == foldCase(b
[i
]));
2047 return equal
&& !b
[length
];
2050 const UChar
* as
= a
->characters16();
2051 for (unsigned i
= 0; i
!= length
; ++i
) {
2057 equal
= equal
&& (toASCIILower(ac
) == toASCIILower(bc
));
2060 // Do a slower implementation for cases that include non-ASCII characters.
2063 for (unsigned i
= 0; i
!= length
; ++i
) {
2064 equal
= equal
&& (foldCase(as
[i
]) == foldCase(b
[i
]));
2068 return equal
&& !b
[length
];
2071 bool equalIgnoringCaseNonNull(const StringImpl
* a
, const StringImpl
* b
)
2077 unsigned length
= a
->length();
2078 if (length
!= b
->length())
2083 return equalIgnoringCase(a
->characters8(), b
->characters8(), length
);
2085 return equalIgnoringCase(b
->characters16(), a
->characters8(), length
);
2089 return equalIgnoringCase(a
->characters16(), b
->characters8(), length
);
2091 return equalIgnoringCase(a
->characters16(), b
->characters16(), length
);
2094 bool equalIgnoringNullity(StringImpl
* a
, StringImpl
* b
)
2096 if (!a
&& b
&& !b
->length())
2098 if (!b
&& a
&& !a
->length())
2103 size_t StringImpl::sizeInBytes() const
2105 size_t size
= length();
2108 return size
+ sizeof(*this);
2111 UChar32
toUpper(UChar32 c
, const AtomicString
& localeIdentifier
)
2113 if (!localeIdentifier
.isNull()) {
2114 if (localeIdMatchesLang(localeIdentifier
, "tr") || localeIdMatchesLang(localeIdentifier
, "az")) {
2116 return latinCapitalLetterIWithDotAbove
;
2117 if (c
== latinSmallLetterDotlessI
)
2119 } else if (localeIdMatchesLang(localeIdentifier
, "lt")) {
2120 // TODO(rob.buis) implement upper-casing rules for lt
2121 // like in StringImpl::upper(locale).