Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / third_party / WebKit / Source / wtf / text / StringImpl.cpp
blobdfaef64356afd78cdbeef85436b84286d5243fac
1 /*
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * (C) 1999 Antti Koivisto (koivisto@kde.org)
4 * (C) 2001 Dirk Mueller ( mueller@kde.org )
5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All rights reserved.
6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
25 #include "config.h"
26 #include "wtf/text/StringImpl.h"
28 #include "wtf/DynamicAnnotations.h"
29 #include "wtf/LeakAnnotations.h"
30 #include "wtf/MainThread.h"
31 #include "wtf/OwnPtr.h"
32 #include "wtf/PartitionAlloc.h"
33 #include "wtf/Partitions.h"
34 #include "wtf/PassOwnPtr.h"
35 #include "wtf/StdLibExtras.h"
36 #include "wtf/text/AtomicString.h"
37 #include "wtf/text/CharacterNames.h"
38 #include "wtf/text/StringBuffer.h"
39 #include "wtf/text/StringHash.h"
40 #include <unicode/translit.h>
41 #include <unicode/unistr.h>
43 #ifdef STRING_STATS
44 #include "wtf/DataLog.h"
45 #include "wtf/HashMap.h"
46 #include "wtf/HashSet.h"
47 #include "wtf/RefCounted.h"
48 #include "wtf/ThreadingPrimitives.h"
49 #include <unistd.h>
50 #endif
52 using namespace std;
54 namespace WTF {
56 using namespace Unicode;
58 static_assert(sizeof(StringImpl) == 3 * sizeof(int), "StringImpl should stay small");
60 #ifdef STRING_STATS
62 static Mutex& statsMutex()
64 DEFINE_STATIC_LOCAL(Mutex, mutex, ());
65 return mutex;
68 static HashSet<void*>& liveStrings()
70 // Notice that we can't use HashSet<StringImpl*> because then HashSet would dedup identical strings.
71 DEFINE_STATIC_LOCAL(HashSet<void*>, strings, ());
72 return strings;
75 void addStringForStats(StringImpl* string)
77 MutexLocker locker(statsMutex());
78 liveStrings().add(string);
81 void removeStringForStats(StringImpl* string)
83 MutexLocker locker(statsMutex());
84 liveStrings().remove(string);
87 static void fillWithSnippet(const StringImpl* string, Vector<char>& snippet)
89 const unsigned kMaxSnippetLength = 64;
90 snippet.clear();
92 size_t expectedLength = std::min(string->length(), kMaxSnippetLength);
93 if (expectedLength == kMaxSnippetLength)
94 expectedLength += 3; // For the "...".
95 ++expectedLength; // For the terminating '\0'.
96 snippet.reserveCapacity(expectedLength);
98 size_t i;
99 for (i = 0; i < string->length() && i < kMaxSnippetLength; ++i) {
100 UChar c = (*string)[i];
101 if (isASCIIPrintable(c))
102 snippet.append(c);
103 else
104 snippet.append('?');
106 if (i < string->length()) {
107 snippet.append('.');
108 snippet.append('.');
109 snippet.append('.');
111 snippet.append('\0');
114 static bool isUnnecessarilyWide(const StringImpl* string)
116 if (string->is8Bit())
117 return false;
118 UChar c = 0;
119 for (unsigned i = 0; i < string->length(); ++i)
120 c |= (*string)[i] >> 8;
121 return !c;
124 class PerStringStats : public RefCounted<PerStringStats> {
125 public:
126 static PassRefPtr<PerStringStats> create()
128 return adoptRef(new PerStringStats);
131 void add(const StringImpl* string)
133 ++m_numberOfCopies;
134 if (!m_length) {
135 m_length = string->length();
136 fillWithSnippet(string, m_snippet);
138 if (string->isAtomic())
139 ++m_numberOfAtomicCopies;
140 if (isUnnecessarilyWide(string))
141 m_unnecessarilyWide = true;
144 size_t totalCharacters() const
146 return m_numberOfCopies * m_length;
149 void print()
151 const char* status = "ok";
152 if (m_unnecessarilyWide)
153 status = "16";
154 dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies, status, m_length, m_snippet.data());
157 bool m_unnecessarilyWide;
158 unsigned m_numberOfCopies;
159 unsigned m_length;
160 unsigned m_numberOfAtomicCopies;
161 Vector<char> m_snippet;
163 private:
164 PerStringStats()
165 : m_unnecessarilyWide(false)
166 , m_numberOfCopies(0)
167 , m_length(0)
168 , m_numberOfAtomicCopies(0)
173 bool operator<(const RefPtr<PerStringStats>& a, const RefPtr<PerStringStats>& b)
175 if (a->m_unnecessarilyWide != b->m_unnecessarilyWide)
176 return !a->m_unnecessarilyWide && b->m_unnecessarilyWide;
177 if (a->totalCharacters() != b->totalCharacters())
178 return a->totalCharacters() < b->totalCharacters();
179 if (a->m_numberOfCopies != b->m_numberOfCopies)
180 return a->m_numberOfCopies < b->m_numberOfCopies;
181 if (a->m_length != b->m_length)
182 return a->m_length < b->m_length;
183 return a->m_numberOfAtomicCopies < b->m_numberOfAtomicCopies;
186 static void printLiveStringStats(void*)
188 MutexLocker locker(statsMutex());
189 HashSet<void*>& strings = liveStrings();
191 HashMap<StringImpl*, RefPtr<PerStringStats>> stats;
192 for (HashSet<void*>::iterator iter = strings.begin(); iter != strings.end(); ++iter) {
193 StringImpl* string = static_cast<StringImpl*>(*iter);
194 HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator entry = stats.find(string);
195 RefPtr<PerStringStats> value = entry == stats.end() ? RefPtr<PerStringStats>(PerStringStats::create()) : entry->value;
196 value->add(string);
197 stats.set(string, value.release());
200 Vector<RefPtr<PerStringStats>> all;
201 for (HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator iter = stats.begin(); iter != stats.end(); ++iter)
202 all.append(iter->value);
204 std::sort(all.begin(), all.end());
205 std::reverse(all.begin(), all.end());
206 for (size_t i = 0; i < 20 && i < all.size(); ++i)
207 all[i]->print();
210 StringStats StringImpl::m_stringStats;
212 unsigned StringStats::s_stringRemovesTillPrintStats = StringStats::s_printStringStatsFrequency;
214 void StringStats::removeString(StringImpl* string)
216 unsigned length = string->length();
217 --m_totalNumberStrings;
219 if (string->is8Bit()) {
220 --m_number8BitStrings;
221 m_total8BitData -= length;
222 } else {
223 --m_number16BitStrings;
224 m_total16BitData -= length;
227 if (!--s_stringRemovesTillPrintStats) {
228 s_stringRemovesTillPrintStats = s_printStringStatsFrequency;
229 printStats();
233 void StringStats::printStats()
235 dataLogF("String stats for process id %d:\n", getpid());
237 unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitData;
238 double percent8Bit = m_totalNumberStrings ? ((double)m_number8BitStrings * 100) / (double)m_totalNumberStrings : 0.0;
239 double average8bitLength = m_number8BitStrings ? (double)m_total8BitData / (double)m_number8BitStrings : 0.0;
240 dataLogF("%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, average8bitLength);
242 double percent16Bit = m_totalNumberStrings ? ((double)m_number16BitStrings * 100) / (double)m_totalNumberStrings : 0.0;
243 double average16bitLength = m_number16BitStrings ? (double)m_total16BitData / (double)m_number16BitStrings : 0.0;
244 dataLogF("%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number16BitStrings, percent16Bit, m_total16BitData, m_total16BitData * 2, average16bitLength);
246 double averageLength = m_totalNumberStrings ? (double)totalNumberCharacters / (double)m_totalNumberStrings : 0.0;
247 unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2;
248 dataLogF("%8u Total %12llu chars %12llu bytes avg length %6.1f\n", m_totalNumberStrings, totalNumberCharacters, totalDataBytes, averageLength);
249 unsigned long long totalSavedBytes = m_total8BitData;
250 double percentSavings = totalSavedBytes ? ((double)totalSavedBytes * 100) / (double)(totalDataBytes + totalSavedBytes) : 0.0;
251 dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes, percentSavings);
253 unsigned totalOverhead = m_totalNumberStrings * sizeof(StringImpl);
254 double overheadPercent = (double)totalOverhead / (double)totalDataBytes * 100;
255 dataLogF(" StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead, overheadPercent);
257 internal::callOnMainThread(&printLiveStringStats, nullptr);
259 #endif
261 void* StringImpl::operator new(size_t size)
263 ASSERT(size == sizeof(StringImpl));
264 return Partitions::bufferMalloc(size);
267 void StringImpl::operator delete(void* ptr)
269 Partitions::bufferFree(ptr);
272 inline StringImpl::~StringImpl()
274 ASSERT(!isStatic());
276 STRING_STATS_REMOVE_STRING(this);
278 if (isAtomic())
279 AtomicString::remove(this);
282 void StringImpl::destroyIfNotStatic()
284 if (!isStatic())
285 delete this;
288 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*& data)
290 if (!length) {
291 data = 0;
292 return empty();
295 // Allocate a single buffer large enough to contain the StringImpl
296 // struct as well as the data which it contains. This removes one
297 // heap allocation from this call.
298 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc(allocationSize<LChar>(length)));
300 data = reinterpret_cast<LChar*>(string + 1);
301 return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
304 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data)
306 if (!length) {
307 data = 0;
308 return empty();
311 // Allocate a single buffer large enough to contain the StringImpl
312 // struct as well as the data which it contains. This removes one
313 // heap allocation from this call.
314 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc(allocationSize<UChar>(length)));
316 data = reinterpret_cast<UChar*>(string + 1);
317 return adoptRef(new (string) StringImpl(length));
320 PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalString, unsigned length)
322 ASSERT(originalString->hasOneRef());
324 if (!length)
325 return empty();
327 bool is8Bit = originalString->is8Bit();
328 // Same as createUninitialized() except here we use realloc.
329 size_t size = is8Bit ? allocationSize<LChar>(length) : allocationSize<UChar>(length);
330 originalString->~StringImpl();
331 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferRealloc(originalString.leakRef(), size));
332 if (is8Bit)
333 return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
334 return adoptRef(new (string) StringImpl(length));
337 static StaticStringsTable& staticStrings()
339 DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ());
340 return staticStrings;
343 #if ENABLE(ASSERT)
344 static bool s_allowCreationOfStaticStrings = true;
345 #endif
347 const StaticStringsTable& StringImpl::allStaticStrings()
349 return staticStrings();
352 void StringImpl::freezeStaticStrings()
354 ASSERT(isMainThread());
356 #if ENABLE(ASSERT)
357 s_allowCreationOfStaticStrings = false;
358 #endif
361 unsigned StringImpl::m_highestStaticStringLength = 0;
363 StringImpl* StringImpl::createStatic(const char* string, unsigned length, unsigned hash)
365 ASSERT(s_allowCreationOfStaticStrings);
366 ASSERT(string);
367 ASSERT(length);
369 StaticStringsTable::const_iterator it = staticStrings().find(hash);
370 if (it != staticStrings().end()) {
371 ASSERT(!memcmp(string, it->value + 1, length * sizeof(LChar)));
372 return it->value;
375 // Allocate a single buffer large enough to contain the StringImpl
376 // struct as well as the data which it contains. This removes one
377 // heap allocation from this call.
378 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(LChar)));
379 size_t size = sizeof(StringImpl) + length * sizeof(LChar);
381 WTF_ANNOTATE_SCOPED_MEMORY_LEAK;
382 StringImpl* impl = static_cast<StringImpl*>(Partitions::bufferMalloc(size));
384 LChar* data = reinterpret_cast<LChar*>(impl + 1);
385 impl = new (impl) StringImpl(length, hash, StaticString);
386 memcpy(data, string, length * sizeof(LChar));
387 #if ENABLE(ASSERT)
388 impl->assertHashIsCorrect();
389 #endif
391 ASSERT(isMainThread());
392 m_highestStaticStringLength = std::max(m_highestStaticStringLength, length);
393 staticStrings().add(hash, impl);
394 WTF_ANNOTATE_BENIGN_RACE(impl,
395 "Benign race on the reference counter of a static string created by StringImpl::createStatic");
397 return impl;
400 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length)
402 if (!characters || !length)
403 return empty();
405 UChar* data;
406 RefPtr<StringImpl> string = createUninitialized(length, data);
407 memcpy(data, characters, length * sizeof(UChar));
408 return string.release();
411 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters, unsigned length)
413 if (!characters || !length)
414 return empty();
416 LChar* data;
417 RefPtr<StringImpl> string = createUninitialized(length, data);
418 memcpy(data, characters, length * sizeof(LChar));
419 return string.release();
422 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, unsigned length)
424 if (!characters || !length)
425 return empty();
427 LChar* data;
428 RefPtr<StringImpl> string = createUninitialized(length, data);
430 for (size_t i = 0; i < length; ++i) {
431 if (characters[i] & 0xff00)
432 return create(characters, length);
433 data[i] = static_cast<LChar>(characters[i]);
436 return string.release();
439 PassRefPtr<StringImpl> StringImpl::create(const LChar* string)
441 if (!string)
442 return empty();
443 size_t length = strlen(reinterpret_cast<const char*>(string));
444 RELEASE_ASSERT(length <= numeric_limits<unsigned>::max());
445 return create(string, length);
448 bool StringImpl::containsOnlyWhitespace()
450 // FIXME: The definition of whitespace here includes a number of characters
451 // that are not whitespace from the point of view of LayoutText; I wonder if
452 // that's a problem in practice.
453 if (is8Bit()) {
454 for (unsigned i = 0; i < m_length; ++i) {
455 UChar c = characters8()[i];
456 if (!isASCIISpace(c))
457 return false;
460 return true;
463 for (unsigned i = 0; i < m_length; ++i) {
464 UChar c = characters16()[i];
465 if (!isASCIISpace(c))
466 return false;
468 return true;
471 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length)
473 if (start >= m_length)
474 return empty();
475 unsigned maxLength = m_length - start;
476 if (length >= maxLength) {
477 if (!start)
478 return this;
479 length = maxLength;
481 if (is8Bit())
482 return create(characters8() + start, length);
484 return create(characters16() + start, length);
487 UChar32 StringImpl::characterStartingAt(unsigned i)
489 if (is8Bit())
490 return characters8()[i];
491 if (U16_IS_SINGLE(characters16()[i]))
492 return characters16()[i];
493 if (i + 1 < m_length && U16_IS_LEAD(characters16()[i]) && U16_IS_TRAIL(characters16()[i + 1]))
494 return U16_GET_SUPPLEMENTARY(characters16()[i], characters16()[i + 1]);
495 return 0;
498 PassRefPtr<StringImpl> StringImpl::lower()
500 // Note: This is a hot function in the Dromaeo benchmark, specifically the
501 // no-op code path up through the first 'return' statement.
503 // First scan the string for uppercase and non-ASCII characters:
504 if (is8Bit()) {
505 unsigned firstIndexToBeLowered = m_length;
506 for (unsigned i = 0; i < m_length; ++i) {
507 LChar ch = characters8()[i];
508 if (UNLIKELY(isASCIIUpper(ch) || ch & ~0x7F)) {
509 firstIndexToBeLowered = i;
510 break;
514 // Nothing to do if the string is all ASCII with no uppercase.
515 if (firstIndexToBeLowered == m_length)
516 return this;
518 LChar* data8;
519 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
520 memcpy(data8, characters8(), firstIndexToBeLowered);
522 for (unsigned i = firstIndexToBeLowered; i < m_length; ++i) {
523 LChar ch = characters8()[i];
524 data8[i] = UNLIKELY(ch & ~0x7F) ? static_cast<LChar>(Unicode::toLower(ch))
525 : toASCIILower(ch);
528 return newImpl.release();
531 bool noUpper = true;
532 UChar ored = 0;
534 const UChar* end = characters16() + m_length;
535 for (const UChar* chp = characters16(); chp != end; ++chp) {
536 if (UNLIKELY(isASCIIUpper(*chp)))
537 noUpper = false;
538 ored |= *chp;
540 // Nothing to do if the string is all ASCII with no uppercase.
541 if (noUpper && !(ored & ~0x7F))
542 return this;
544 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
545 int32_t length = m_length;
547 if (!(ored & ~0x7F)) {
548 UChar* data16;
549 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
551 for (int32_t i = 0; i < length; ++i) {
552 UChar c = characters16()[i];
553 data16[i] = toASCIILower(c);
555 return newImpl.release();
558 // Do a slower implementation for cases that include non-ASCII characters.
559 UChar* data16;
560 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
562 bool error;
563 int32_t realLength = Unicode::toLower(data16, length, characters16(), m_length, &error);
564 if (!error && realLength == length)
565 return newImpl.release();
567 newImpl = createUninitialized(realLength, data16);
568 Unicode::toLower(data16, realLength, characters16(), m_length, &error);
569 if (error)
570 return this;
571 return newImpl.release();
574 PassRefPtr<StringImpl> StringImpl::upper()
576 // This function could be optimized for no-op cases the way lower() is,
577 // but in empirical testing, few actual calls to upper() are no-ops, so
578 // it wouldn't be worth the extra time for pre-scanning.
580 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
581 int32_t length = m_length;
583 if (is8Bit()) {
584 LChar* data8;
585 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
587 // Do a faster loop for the case where all the characters are ASCII.
588 LChar ored = 0;
589 for (int i = 0; i < length; ++i) {
590 LChar c = characters8()[i];
591 ored |= c;
592 data8[i] = toASCIIUpper(c);
594 if (!(ored & ~0x7F))
595 return newImpl.release();
597 // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
598 int numberSharpSCharacters = 0;
600 // There are two special cases.
601 // 1. latin-1 characters when converted to upper case are 16 bit characters.
602 // 2. Lower case sharp-S converts to "SS" (two characters)
603 for (int32_t i = 0; i < length; ++i) {
604 LChar c = characters8()[i];
605 if (UNLIKELY(c == smallLetterSharpSCharacter))
606 ++numberSharpSCharacters;
607 UChar upper = static_cast<UChar>(Unicode::toUpper(c));
608 if (UNLIKELY(upper > 0xff)) {
609 // Since this upper-cased character does not fit in an 8-bit string, we need to take the 16-bit path.
610 goto upconvert;
612 data8[i] = static_cast<LChar>(upper);
615 if (!numberSharpSCharacters)
616 return newImpl.release();
618 // We have numberSSCharacters sharp-s characters, but none of the other special characters.
619 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8);
621 LChar* dest = data8;
623 for (int32_t i = 0; i < length; ++i) {
624 LChar c = characters8()[i];
625 if (c == smallLetterSharpSCharacter) {
626 *dest++ = 'S';
627 *dest++ = 'S';
628 } else
629 *dest++ = static_cast<LChar>(Unicode::toUpper(c));
632 return newImpl.release();
635 upconvert:
636 RefPtr<StringImpl> upconverted = upconvertedString();
637 const UChar* source16 = upconverted->characters16();
639 UChar* data16;
640 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
642 // Do a faster loop for the case where all the characters are ASCII.
643 UChar ored = 0;
644 for (int i = 0; i < length; ++i) {
645 UChar c = source16[i];
646 ored |= c;
647 data16[i] = toASCIIUpper(c);
649 if (!(ored & ~0x7F))
650 return newImpl.release();
652 // Do a slower implementation for cases that include non-ASCII characters.
653 bool error;
654 int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &error);
655 if (!error && realLength == length)
656 return newImpl;
657 newImpl = createUninitialized(realLength, data16);
658 Unicode::toUpper(data16, realLength, source16, m_length, &error);
659 if (error)
660 return this;
661 return newImpl.release();
664 static bool inline localeIdMatchesLang(const AtomicString& localeId, const char* lang)
666 if (equalIgnoringCase(localeId, lang))
667 return true;
668 static char localeIdPrefix[4];
669 static const char delimeter[4] = "-_@";
671 size_t langLength = strlen(lang);
672 RELEASE_ASSERT(langLength >= 2 && langLength <= 3);
673 strncpy(localeIdPrefix, lang, langLength);
674 for (int i = 0; i < 3; ++i) {
675 localeIdPrefix[langLength] = delimeter[i];
676 // case-insensitive comparison
677 if (localeId.impl() && localeId.impl()->startsWith(localeIdPrefix, langLength + 1, TextCaseInsensitive))
678 return true;
680 return false;
683 typedef int32_t (*icuCaseConverter)(UChar*, int32_t, const UChar*, int32_t, const char*, UErrorCode*);
685 static PassRefPtr<StringImpl> caseConvert(const UChar* source16, size_t length, icuCaseConverter converter, const char* locale, StringImpl* originalString)
687 UChar* data16;
688 size_t targetLength = length;
689 RefPtr<StringImpl> output = StringImpl::createUninitialized(length, data16);
690 do {
691 UErrorCode status = U_ZERO_ERROR;
692 targetLength = converter(data16, targetLength, source16, length, locale, &status);
693 if (U_SUCCESS(status)) {
694 if (length > 0)
695 output->truncateAssumingIsolated(targetLength);
696 return output.release();
698 if (status != U_BUFFER_OVERFLOW_ERROR)
699 return originalString;
700 // Expand the buffer.
701 output = StringImpl::createUninitialized(targetLength, data16);
702 } while (true);
705 PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier)
707 // Use the more-optimized code path most of the time.
708 // Only Turkic (tr and az) languages and Lithuanian requires
709 // locale-specific lowercasing rules. Even though CLDR has el-Lower,
710 // it's identical to the locale-agnostic lowercasing. Context-dependent
711 // handling of Greek capital sigma is built into the common lowercasing
712 // function in ICU.
713 const char* localeForConversion = 0;
714 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(localeIdentifier, "az"))
715 localeForConversion = "tr";
716 else if (localeIdMatchesLang(localeIdentifier, "lt"))
717 localeForConversion = "lt";
718 else
719 return lower();
721 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
722 CRASH();
723 int length = m_length;
725 RefPtr<StringImpl> upconverted = upconvertedString();
726 const UChar* source16 = upconverted->characters16();
727 return caseConvert(source16, length, u_strToLower, localeForConversion, this);
730 PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier)
732 // Use the more-optimized code path most of the time.
733 // Only Turkic (tr and az) languages and Greek require locale-specific
734 // lowercasing rules.
735 icu::UnicodeString transliteratorId;
736 const char* localeForConversion = 0;
737 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(localeIdentifier, "az"))
738 localeForConversion = "tr";
739 else if (localeIdMatchesLang(localeIdentifier, "el"))
740 transliteratorId = UNICODE_STRING_SIMPLE("el-Upper");
741 else if (localeIdMatchesLang(localeIdentifier, "lt"))
742 localeForConversion = "lt";
743 else
744 return upper();
746 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
747 CRASH();
748 int length = m_length;
750 RefPtr<StringImpl> upconverted = upconvertedString();
751 const UChar* source16 = upconverted->characters16();
753 if (localeForConversion)
754 return caseConvert(source16, length, u_strToUpper, localeForConversion, this);
756 // TODO(jungshik): Cache transliterator if perf penaly warrants it for Greek.
757 UErrorCode status = U_ZERO_ERROR;
758 OwnPtr<icu::Transliterator> translit =
759 adoptPtr(icu::Transliterator::createInstance(transliteratorId, UTRANS_FORWARD, status));
760 if (U_FAILURE(status))
761 return upper();
763 // target will be copy-on-write.
764 icu::UnicodeString target(false, source16, length);
765 translit->transliterate(target);
767 return create(target.getBuffer(), target.length());
770 PassRefPtr<StringImpl> StringImpl::fill(UChar character)
772 if (!(character & ~0x7F)) {
773 LChar* data;
774 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
775 for (unsigned i = 0; i < m_length; ++i)
776 data[i] = static_cast<LChar>(character);
777 return newImpl.release();
779 UChar* data;
780 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
781 for (unsigned i = 0; i < m_length; ++i)
782 data[i] = character;
783 return newImpl.release();
786 PassRefPtr<StringImpl> StringImpl::foldCase()
788 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
789 int32_t length = m_length;
791 if (is8Bit()) {
792 // Do a faster loop for the case where all the characters are ASCII.
793 LChar* data;
794 RefPtr <StringImpl>newImpl = createUninitialized(m_length, data);
795 LChar ored = 0;
797 for (int32_t i = 0; i < length; ++i) {
798 LChar c = characters8()[i];
799 data[i] = toASCIILower(c);
800 ored |= c;
803 if (!(ored & ~0x7F))
804 return newImpl.release();
806 // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
807 for (int32_t i = 0; i < length; ++i)
808 data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));
810 return newImpl.release();
813 // Do a faster loop for the case where all the characters are ASCII.
814 UChar* data;
815 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
816 UChar ored = 0;
817 for (int32_t i = 0; i < length; ++i) {
818 UChar c = characters16()[i];
819 ored |= c;
820 data[i] = toASCIILower(c);
822 if (!(ored & ~0x7F))
823 return newImpl.release();
825 // Do a slower implementation for cases that include non-ASCII characters.
826 bool error;
827 int32_t realLength = Unicode::foldCase(data, length, characters16(), m_length, &error);
828 if (!error && realLength == length)
829 return newImpl.release();
830 newImpl = createUninitialized(realLength, data);
831 Unicode::foldCase(data, realLength, characters16(), m_length, &error);
832 if (error)
833 return this;
834 return newImpl.release();
837 template <class UCharPredicate>
838 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate predicate)
840 if (!m_length)
841 return empty();
843 unsigned start = 0;
844 unsigned end = m_length - 1;
846 // skip white space from start
847 while (start <= end && predicate(is8Bit() ? characters8()[start] : characters16()[start]))
848 ++start;
850 // only white space
851 if (start > end)
852 return empty();
854 // skip white space from end
855 while (end && predicate(is8Bit() ? characters8()[end] : characters16()[end]))
856 --end;
858 if (!start && end == m_length - 1)
859 return this;
860 if (is8Bit())
861 return create(characters8() + start, end + 1 - start);
862 return create(characters16() + start, end + 1 - start);
865 class UCharPredicate {
866 public:
867 inline UCharPredicate(CharacterMatchFunctionPtr function): m_function(function) { }
869 inline bool operator()(UChar ch) const
871 return m_function(ch);
874 private:
875 const CharacterMatchFunctionPtr m_function;
878 class SpaceOrNewlinePredicate {
879 public:
880 inline bool operator()(UChar ch) const
882 return isSpaceOrNewline(ch);
886 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace()
888 return stripMatchedCharacters(SpaceOrNewlinePredicate());
891 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace)
893 return stripMatchedCharacters(UCharPredicate(isWhiteSpace));
896 template <typename CharType>
897 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(const CharType* characters, CharacterMatchFunctionPtr findMatch)
899 const CharType* from = characters;
900 const CharType* fromend = from + m_length;
902 // Assume the common case will not remove any characters
903 while (from != fromend && !findMatch(*from))
904 ++from;
905 if (from == fromend)
906 return this;
908 StringBuffer<CharType> data(m_length);
909 CharType* to = data.characters();
910 unsigned outc = from - characters;
912 if (outc)
913 memcpy(to, characters, outc * sizeof(CharType));
915 while (true) {
916 while (from != fromend && findMatch(*from))
917 ++from;
918 while (from != fromend && !findMatch(*from))
919 to[outc++] = *from++;
920 if (from == fromend)
921 break;
924 data.shrink(outc);
926 return data.release();
929 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch)
931 if (is8Bit())
932 return removeCharacters(characters8(), findMatch);
933 return removeCharacters(characters16(), findMatch);
936 template <typename CharType, class UCharPredicate>
937 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UCharPredicate predicate, StripBehavior stripBehavior)
939 StringBuffer<CharType> data(m_length);
941 const CharType* from = getCharacters<CharType>();
942 const CharType* fromend = from + m_length;
943 int outc = 0;
944 bool changedToSpace = false;
946 CharType* to = data.characters();
948 if (stripBehavior == StripExtraWhiteSpace) {
949 while (true) {
950 while (from != fromend && predicate(*from)) {
951 if (*from != ' ')
952 changedToSpace = true;
953 ++from;
955 while (from != fromend && !predicate(*from))
956 to[outc++] = *from++;
957 if (from != fromend)
958 to[outc++] = ' ';
959 else
960 break;
963 if (outc > 0 && to[outc - 1] == ' ')
964 --outc;
965 } else {
966 for (; from != fromend; ++from) {
967 if (predicate(*from)) {
968 if (*from != ' ')
969 changedToSpace = true;
970 to[outc++] = ' ';
971 } else {
972 to[outc++] = *from;
977 if (static_cast<unsigned>(outc) == m_length && !changedToSpace)
978 return this;
980 data.shrink(outc);
982 return data.release();
985 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(StripBehavior stripBehavior)
987 if (is8Bit())
988 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(SpaceOrNewlinePredicate(), stripBehavior);
989 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(SpaceOrNewlinePredicate(), stripBehavior);
992 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace, StripBehavior stripBehavior)
994 if (is8Bit())
995 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(UCharPredicate(isWhiteSpace), stripBehavior);
996 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(UCharPredicate(isWhiteSpace), stripBehavior);
999 int StringImpl::toIntStrict(bool* ok, int base)
1001 if (is8Bit())
1002 return charactersToIntStrict(characters8(), m_length, ok, base);
1003 return charactersToIntStrict(characters16(), m_length, ok, base);
1006 unsigned StringImpl::toUIntStrict(bool* ok, int base)
1008 if (is8Bit())
1009 return charactersToUIntStrict(characters8(), m_length, ok, base);
1010 return charactersToUIntStrict(characters16(), m_length, ok, base);
1013 int64_t StringImpl::toInt64Strict(bool* ok, int base)
1015 if (is8Bit())
1016 return charactersToInt64Strict(characters8(), m_length, ok, base);
1017 return charactersToInt64Strict(characters16(), m_length, ok, base);
1020 uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
1022 if (is8Bit())
1023 return charactersToUInt64Strict(characters8(), m_length, ok, base);
1024 return charactersToUInt64Strict(characters16(), m_length, ok, base);
1027 int StringImpl::toInt(bool* ok)
1029 if (is8Bit())
1030 return charactersToInt(characters8(), m_length, ok);
1031 return charactersToInt(characters16(), m_length, ok);
1034 unsigned StringImpl::toUInt(bool* ok)
1036 if (is8Bit())
1037 return charactersToUInt(characters8(), m_length, ok);
1038 return charactersToUInt(characters16(), m_length, ok);
1041 int64_t StringImpl::toInt64(bool* ok)
1043 if (is8Bit())
1044 return charactersToInt64(characters8(), m_length, ok);
1045 return charactersToInt64(characters16(), m_length, ok);
1048 uint64_t StringImpl::toUInt64(bool* ok)
1050 if (is8Bit())
1051 return charactersToUInt64(characters8(), m_length, ok);
1052 return charactersToUInt64(characters16(), m_length, ok);
1055 double StringImpl::toDouble(bool* ok)
1057 if (is8Bit())
1058 return charactersToDouble(characters8(), m_length, ok);
1059 return charactersToDouble(characters16(), m_length, ok);
1062 float StringImpl::toFloat(bool* ok)
1064 if (is8Bit())
1065 return charactersToFloat(characters8(), m_length, ok);
1066 return charactersToFloat(characters16(), m_length, ok);
1069 // Table is based on ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt
1070 const UChar StringImpl::latin1CaseFoldTable[256] = {
1071 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
1072 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
1073 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
1074 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
1075 0x0040, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
1076 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
1077 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
1078 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
1079 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
1080 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
1081 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
1082 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x03bc, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
1083 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
1084 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00d7, 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00df,
1085 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
1086 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
1089 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length)
1091 while (length--) {
1092 if (StringImpl::latin1CaseFoldTable[*a++] != StringImpl::latin1CaseFoldTable[*b++])
1093 return false;
1095 return true;
1098 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length)
1100 while (length--) {
1101 if (foldCase(*a++) != StringImpl::latin1CaseFoldTable[*b++])
1102 return false;
1104 return true;
1107 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start)
1109 if (is8Bit())
1110 return WTF::find(characters8(), m_length, matchFunction, start);
1111 return WTF::find(characters16(), m_length, matchFunction, start);
1114 size_t StringImpl::find(const LChar* matchString, unsigned index)
1116 // Check for null or empty string to match against
1117 if (!matchString)
1118 return kNotFound;
1119 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString));
1120 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
1121 unsigned matchLength = matchStringLength;
1122 if (!matchLength)
1123 return min(index, length());
1125 // Optimization 1: fast case for strings of length 1.
1126 if (matchLength == 1)
1127 return WTF::find(characters16(), length(), *matchString, index);
1129 // Check index & matchLength are in range.
1130 if (index > length())
1131 return kNotFound;
1132 unsigned searchLength = length() - index;
1133 if (matchLength > searchLength)
1134 return kNotFound;
1135 // delta is the number of additional times to test; delta == 0 means test only once.
1136 unsigned delta = searchLength - matchLength;
1138 const UChar* searchCharacters = characters16() + index;
1140 // Optimization 2: keep a running hash of the strings,
1141 // only call equal if the hashes match.
1142 unsigned searchHash = 0;
1143 unsigned matchHash = 0;
1144 for (unsigned i = 0; i < matchLength; ++i) {
1145 searchHash += searchCharacters[i];
1146 matchHash += matchString[i];
1149 unsigned i = 0;
1150 // keep looping until we match
1151 while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) {
1152 if (i == delta)
1153 return kNotFound;
1154 searchHash += searchCharacters[i + matchLength];
1155 searchHash -= searchCharacters[i];
1156 ++i;
1158 return index + i;
1161 template<typename CharType>
1162 ALWAYS_INLINE size_t findIgnoringCaseInternal(const CharType* searchCharacters, const LChar* matchString, unsigned index, unsigned searchLength, unsigned matchLength)
1164 // delta is the number of additional times to test; delta == 0 means test only once.
1165 unsigned delta = searchLength - matchLength;
1167 unsigned i = 0;
1168 while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) {
1169 if (i == delta)
1170 return kNotFound;
1171 ++i;
1173 return index + i;
1176 size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index)
1178 // Check for null or empty string to match against
1179 if (!matchString)
1180 return kNotFound;
1181 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString));
1182 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
1183 unsigned matchLength = matchStringLength;
1184 if (!matchLength)
1185 return min(index, length());
1187 // Check index & matchLength are in range.
1188 if (index > length())
1189 return kNotFound;
1190 unsigned searchLength = length() - index;
1191 if (matchLength > searchLength)
1192 return kNotFound;
1194 if (is8Bit())
1195 return findIgnoringCaseInternal(characters8() + index, matchString, index, searchLength, matchLength);
1196 return findIgnoringCaseInternal(characters16() + index, matchString, index, searchLength, matchLength);
1199 template <typename SearchCharacterType, typename MatchCharacterType>
1200 ALWAYS_INLINE static size_t findInternal(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength)
1202 // Optimization: keep a running hash of the strings,
1203 // only call equal() if the hashes match.
1205 // delta is the number of additional times to test; delta == 0 means test only once.
1206 unsigned delta = searchLength - matchLength;
1208 unsigned searchHash = 0;
1209 unsigned matchHash = 0;
1211 for (unsigned i = 0; i < matchLength; ++i) {
1212 searchHash += searchCharacters[i];
1213 matchHash += matchCharacters[i];
1216 unsigned i = 0;
1217 // keep looping until we match
1218 while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacters, matchLength)) {
1219 if (i == delta)
1220 return kNotFound;
1221 searchHash += searchCharacters[i + matchLength];
1222 searchHash -= searchCharacters[i];
1223 ++i;
1225 return index + i;
1228 size_t StringImpl::find(StringImpl* matchString)
1230 // Check for null string to match against
1231 if (UNLIKELY(!matchString))
1232 return kNotFound;
1233 unsigned matchLength = matchString->length();
1235 // Optimization 1: fast case for strings of length 1.
1236 if (matchLength == 1) {
1237 if (is8Bit()) {
1238 if (matchString->is8Bit())
1239 return WTF::find(characters8(), length(), matchString->characters8()[0]);
1240 return WTF::find(characters8(), length(), matchString->characters16()[0]);
1242 if (matchString->is8Bit())
1243 return WTF::find(characters16(), length(), matchString->characters8()[0]);
1244 return WTF::find(characters16(), length(), matchString->characters16()[0]);
1247 // Check matchLength is in range.
1248 if (matchLength > length())
1249 return kNotFound;
1251 // Check for empty string to match against
1252 if (UNLIKELY(!matchLength))
1253 return 0;
1255 if (is8Bit()) {
1256 if (matchString->is8Bit())
1257 return findInternal(characters8(), matchString->characters8(), 0, length(), matchLength);
1258 return findInternal(characters8(), matchString->characters16(), 0, length(), matchLength);
1261 if (matchString->is8Bit())
1262 return findInternal(characters16(), matchString->characters8(), 0, length(), matchLength);
1264 return findInternal(characters16(), matchString->characters16(), 0, length(), matchLength);
1267 size_t StringImpl::find(StringImpl* matchString, unsigned index)
1269 // Check for null or empty string to match against
1270 if (UNLIKELY(!matchString))
1271 return kNotFound;
1273 unsigned matchLength = matchString->length();
1275 // Optimization 1: fast case for strings of length 1.
1276 if (matchLength == 1) {
1277 if (is8Bit())
1278 return WTF::find(characters8(), length(), (*matchString)[0], index);
1279 return WTF::find(characters16(), length(), (*matchString)[0], index);
1282 if (UNLIKELY(!matchLength))
1283 return min(index, length());
1285 // Check index & matchLength are in range.
1286 if (index > length())
1287 return kNotFound;
1288 unsigned searchLength = length() - index;
1289 if (matchLength > searchLength)
1290 return kNotFound;
1292 if (is8Bit()) {
1293 if (matchString->is8Bit())
1294 return findInternal(characters8() + index, matchString->characters8(), index, searchLength, matchLength);
1295 return findInternal(characters8() + index, matchString->characters16(), index, searchLength, matchLength);
1298 if (matchString->is8Bit())
1299 return findInternal(characters16() + index, matchString->characters8(), index, searchLength, matchLength);
1301 return findInternal(characters16() + index, matchString->characters16(), index, searchLength, matchLength);
1304 template <typename SearchCharacterType, typename MatchCharacterType>
1305 ALWAYS_INLINE static size_t findIgnoringCaseInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength)
1307 // delta is the number of additional times to test; delta == 0 means test only once.
1308 unsigned delta = searchLength - matchLength;
1310 unsigned i = 0;
1311 // keep looping until we match
1312 while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) {
1313 if (i == delta)
1314 return kNotFound;
1315 ++i;
1317 return index + i;
1320 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index)
1322 // Check for null or empty string to match against
1323 if (!matchString)
1324 return kNotFound;
1325 unsigned matchLength = matchString->length();
1326 if (!matchLength)
1327 return min(index, length());
1329 // Check index & matchLength are in range.
1330 if (index > length())
1331 return kNotFound;
1332 unsigned searchLength = length() - index;
1333 if (matchLength > searchLength)
1334 return kNotFound;
1336 if (is8Bit()) {
1337 if (matchString->is8Bit())
1338 return findIgnoringCaseInner(characters8() + index, matchString->characters8(), index, searchLength, matchLength);
1339 return findIgnoringCaseInner(characters8() + index, matchString->characters16(), index, searchLength, matchLength);
1342 if (matchString->is8Bit())
1343 return findIgnoringCaseInner(characters16() + index, matchString->characters8(), index, searchLength, matchLength);
1345 return findIgnoringCaseInner(characters16() + index, matchString->characters16(), index, searchLength, matchLength);
1348 size_t StringImpl::findNextLineStart(unsigned index)
1350 if (is8Bit())
1351 return WTF::findNextLineStart(characters8(), m_length, index);
1352 return WTF::findNextLineStart(characters16(), m_length, index);
1355 size_t StringImpl::count(LChar c) const
1357 int count = 0;
1358 if (is8Bit()) {
1359 for (size_t i = 0; i < m_length; ++i)
1360 count += characters8()[i] == c;
1361 } else {
1362 for (size_t i = 0; i < m_length; ++i)
1363 count += characters16()[i] == c;
1365 return count;
1368 size_t StringImpl::reverseFind(UChar c, unsigned index)
1370 if (is8Bit())
1371 return WTF::reverseFind(characters8(), m_length, c, index);
1372 return WTF::reverseFind(characters16(), m_length, c, index);
1375 template <typename SearchCharacterType, typename MatchCharacterType>
1376 ALWAYS_INLINE static size_t reverseFindInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength)
1378 // Optimization: keep a running hash of the strings,
1379 // only call equal if the hashes match.
1381 // delta is the number of additional times to test; delta == 0 means test only once.
1382 unsigned delta = min(index, length - matchLength);
1384 unsigned searchHash = 0;
1385 unsigned matchHash = 0;
1386 for (unsigned i = 0; i < matchLength; ++i) {
1387 searchHash += searchCharacters[delta + i];
1388 matchHash += matchCharacters[i];
1391 // keep looping until we match
1392 while (searchHash != matchHash || !equal(searchCharacters + delta, matchCharacters, matchLength)) {
1393 if (!delta)
1394 return kNotFound;
1395 --delta;
1396 searchHash -= searchCharacters[delta + matchLength];
1397 searchHash += searchCharacters[delta];
1399 return delta;
1402 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index)
1404 // Check for null or empty string to match against
1405 if (!matchString)
1406 return kNotFound;
1407 unsigned matchLength = matchString->length();
1408 unsigned ourLength = length();
1409 if (!matchLength)
1410 return min(index, ourLength);
1412 // Optimization 1: fast case for strings of length 1.
1413 if (matchLength == 1) {
1414 if (is8Bit())
1415 return WTF::reverseFind(characters8(), ourLength, (*matchString)[0], index);
1416 return WTF::reverseFind(characters16(), ourLength, (*matchString)[0], index);
1419 // Check index & matchLength are in range.
1420 if (matchLength > ourLength)
1421 return kNotFound;
1423 if (is8Bit()) {
1424 if (matchString->is8Bit())
1425 return reverseFindInner(characters8(), matchString->characters8(), index, ourLength, matchLength);
1426 return reverseFindInner(characters8(), matchString->characters16(), index, ourLength, matchLength);
1429 if (matchString->is8Bit())
1430 return reverseFindInner(characters16(), matchString->characters8(), index, ourLength, matchLength);
1432 return reverseFindInner(characters16(), matchString->characters16(), index, ourLength, matchLength);
1435 template <typename SearchCharacterType, typename MatchCharacterType>
1436 ALWAYS_INLINE static size_t reverseFindIgnoringCaseInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength)
1438 // delta is the number of additional times to test; delta == 0 means test only once.
1439 unsigned delta = min(index, length - matchLength);
1441 // keep looping until we match
1442 while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) {
1443 if (!delta)
1444 return kNotFound;
1445 --delta;
1447 return delta;
1450 size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned index)
1452 // Check for null or empty string to match against
1453 if (!matchString)
1454 return kNotFound;
1455 unsigned matchLength = matchString->length();
1456 unsigned ourLength = length();
1457 if (!matchLength)
1458 return min(index, ourLength);
1460 // Check index & matchLength are in range.
1461 if (matchLength > ourLength)
1462 return kNotFound;
1464 if (is8Bit()) {
1465 if (matchString->is8Bit())
1466 return reverseFindIgnoringCaseInner(characters8(), matchString->characters8(), index, ourLength, matchLength);
1467 return reverseFindIgnoringCaseInner(characters8(), matchString->characters16(), index, ourLength, matchLength);
1470 if (matchString->is8Bit())
1471 return reverseFindIgnoringCaseInner(characters16(), matchString->characters8(), index, ourLength, matchLength);
1473 return reverseFindIgnoringCaseInner(characters16(), matchString->characters16(), index, ourLength, matchLength);
1476 ALWAYS_INLINE static bool equalInner(const StringImpl* stringImpl, unsigned startOffset, const char* matchString, unsigned matchLength, TextCaseSensitivity caseSensitivity)
1478 ASSERT(stringImpl);
1479 ASSERT(matchLength <= stringImpl->length());
1480 ASSERT(startOffset + matchLength <= stringImpl->length());
1482 if (caseSensitivity == TextCaseSensitive) {
1483 if (stringImpl->is8Bit())
1484 return equal(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1485 return equal(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1487 if (stringImpl->is8Bit())
1488 return equalIgnoringCase(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1489 return equalIgnoringCase(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1492 bool StringImpl::startsWith(UChar character) const
1494 return m_length && (*this)[0] == character;
1497 bool StringImpl::startsWith(const char* matchString, unsigned matchLength, TextCaseSensitivity caseSensitivity) const
1499 ASSERT(matchLength);
1500 if (matchLength > length())
1501 return false;
1502 return equalInner(this, 0, matchString, matchLength, caseSensitivity);
1505 bool StringImpl::endsWith(StringImpl* matchString, TextCaseSensitivity caseSensitivity)
1507 ASSERT(matchString);
1508 if (m_length >= matchString->m_length) {
1509 unsigned start = m_length - matchString->m_length;
1510 if (caseSensitivity == TextCaseSensitive)
1511 return find(matchString, start) == start;
1512 return findIgnoringCase(matchString, start) == start;
1514 return false;
1517 bool StringImpl::endsWith(UChar character) const
1519 return m_length && (*this)[m_length - 1] == character;
1522 bool StringImpl::endsWith(const char* matchString, unsigned matchLength, TextCaseSensitivity caseSensitivity) const
1524 ASSERT(matchLength);
1525 if (matchLength > length())
1526 return false;
1527 unsigned startOffset = length() - matchLength;
1528 return equalInner(this, startOffset, matchString, matchLength, caseSensitivity);
1531 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
1533 if (oldC == newC)
1534 return this;
1536 if (find(oldC) == kNotFound)
1537 return this;
1539 unsigned i;
1540 if (is8Bit()) {
1541 if (newC <= 0xff) {
1542 LChar* data;
1543 LChar oldChar = static_cast<LChar>(oldC);
1544 LChar newChar = static_cast<LChar>(newC);
1546 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1548 for (i = 0; i != m_length; ++i) {
1549 LChar ch = characters8()[i];
1550 if (ch == oldChar)
1551 ch = newChar;
1552 data[i] = ch;
1554 return newImpl.release();
1557 // There is the possibility we need to up convert from 8 to 16 bit,
1558 // create a 16 bit string for the result.
1559 UChar* data;
1560 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1562 for (i = 0; i != m_length; ++i) {
1563 UChar ch = characters8()[i];
1564 if (ch == oldC)
1565 ch = newC;
1566 data[i] = ch;
1569 return newImpl.release();
1572 UChar* data;
1573 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1575 for (i = 0; i != m_length; ++i) {
1576 UChar ch = characters16()[i];
1577 if (ch == oldC)
1578 ch = newC;
1579 data[i] = ch;
1581 return newImpl.release();
1584 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str)
1586 position = min(position, length());
1587 lengthToReplace = min(lengthToReplace, length() - position);
1588 unsigned lengthToInsert = str ? str->length() : 0;
1589 if (!lengthToReplace && !lengthToInsert)
1590 return this;
1592 RELEASE_ASSERT((length() - lengthToReplace) < (numeric_limits<unsigned>::max() - lengthToInsert));
1594 if (is8Bit() && (!str || str->is8Bit())) {
1595 LChar* data;
1596 RefPtr<StringImpl> newImpl =
1597 createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1598 memcpy(data, characters8(), position * sizeof(LChar));
1599 if (str)
1600 memcpy(data + position, str->characters8(), lengthToInsert * sizeof(LChar));
1601 memcpy(data + position + lengthToInsert, characters8() + position + lengthToReplace,
1602 (length() - position - lengthToReplace) * sizeof(LChar));
1603 return newImpl.release();
1605 UChar* data;
1606 RefPtr<StringImpl> newImpl =
1607 createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1608 if (is8Bit())
1609 for (unsigned i = 0; i < position; ++i)
1610 data[i] = characters8()[i];
1611 else
1612 memcpy(data, characters16(), position * sizeof(UChar));
1613 if (str) {
1614 if (str->is8Bit())
1615 for (unsigned i = 0; i < lengthToInsert; ++i)
1616 data[i + position] = str->characters8()[i];
1617 else
1618 memcpy(data + position, str->characters16(), lengthToInsert * sizeof(UChar));
1620 if (is8Bit()) {
1621 for (unsigned i = 0; i < length() - position - lengthToReplace; ++i)
1622 data[i + position + lengthToInsert] = characters8()[i + position + lengthToReplace];
1623 } else {
1624 memcpy(data + position + lengthToInsert, characters16() + position + lengthToReplace,
1625 (length() - position - lengthToReplace) * sizeof(UChar));
1627 return newImpl.release();
1630 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement)
1632 if (!replacement)
1633 return this;
1635 if (replacement->is8Bit())
1636 return replace(pattern, replacement->characters8(), replacement->length());
1638 return replace(pattern, replacement->characters16(), replacement->length());
1641 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const LChar* replacement, unsigned repStrLength)
1643 ASSERT(replacement);
1645 size_t srcSegmentStart = 0;
1646 unsigned matchCount = 0;
1648 // Count the matches.
1649 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1650 ++matchCount;
1651 ++srcSegmentStart;
1654 // If we have 0 matches then we don't have to do any more work.
1655 if (!matchCount)
1656 return this;
1658 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1660 unsigned replaceSize = matchCount * repStrLength;
1661 unsigned newSize = m_length - matchCount;
1662 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
1664 newSize += replaceSize;
1666 // Construct the new data.
1667 size_t srcSegmentEnd;
1668 unsigned srcSegmentLength;
1669 srcSegmentStart = 0;
1670 unsigned dstOffset = 0;
1672 if (is8Bit()) {
1673 LChar* data;
1674 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1676 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1677 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1678 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1679 dstOffset += srcSegmentLength;
1680 memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar));
1681 dstOffset += repStrLength;
1682 srcSegmentStart = srcSegmentEnd + 1;
1685 srcSegmentLength = m_length - srcSegmentStart;
1686 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1688 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1690 return newImpl.release();
1693 UChar* data;
1694 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1696 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1697 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1698 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1700 dstOffset += srcSegmentLength;
1701 for (unsigned i = 0; i < repStrLength; ++i)
1702 data[i + dstOffset] = replacement[i];
1704 dstOffset += repStrLength;
1705 srcSegmentStart = srcSegmentEnd + 1;
1708 srcSegmentLength = m_length - srcSegmentStart;
1709 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1711 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1713 return newImpl.release();
1716 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const UChar* replacement, unsigned repStrLength)
1718 ASSERT(replacement);
1720 size_t srcSegmentStart = 0;
1721 unsigned matchCount = 0;
1723 // Count the matches.
1724 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1725 ++matchCount;
1726 ++srcSegmentStart;
1729 // If we have 0 matches then we don't have to do any more work.
1730 if (!matchCount)
1731 return this;
1733 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1735 unsigned replaceSize = matchCount * repStrLength;
1736 unsigned newSize = m_length - matchCount;
1737 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
1739 newSize += replaceSize;
1741 // Construct the new data.
1742 size_t srcSegmentEnd;
1743 unsigned srcSegmentLength;
1744 srcSegmentStart = 0;
1745 unsigned dstOffset = 0;
1747 if (is8Bit()) {
1748 UChar* data;
1749 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1751 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1752 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1753 for (unsigned i = 0; i < srcSegmentLength; ++i)
1754 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1756 dstOffset += srcSegmentLength;
1757 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
1759 dstOffset += repStrLength;
1760 srcSegmentStart = srcSegmentEnd + 1;
1763 srcSegmentLength = m_length - srcSegmentStart;
1764 for (unsigned i = 0; i < srcSegmentLength; ++i)
1765 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1767 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1769 return newImpl.release();
1772 UChar* data;
1773 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1775 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1776 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1777 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1779 dstOffset += srcSegmentLength;
1780 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
1782 dstOffset += repStrLength;
1783 srcSegmentStart = srcSegmentEnd + 1;
1786 srcSegmentLength = m_length - srcSegmentStart;
1787 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1789 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1791 return newImpl.release();
1794 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement)
1796 if (!pattern || !replacement)
1797 return this;
1799 unsigned patternLength = pattern->length();
1800 if (!patternLength)
1801 return this;
1803 unsigned repStrLength = replacement->length();
1804 size_t srcSegmentStart = 0;
1805 unsigned matchCount = 0;
1807 // Count the matches.
1808 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1809 ++matchCount;
1810 srcSegmentStart += patternLength;
1813 // If we have 0 matches, we don't have to do any more work
1814 if (!matchCount)
1815 return this;
1817 unsigned newSize = m_length - matchCount * patternLength;
1818 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1820 RELEASE_ASSERT(newSize <= (numeric_limits<unsigned>::max() - matchCount * repStrLength));
1822 newSize += matchCount * repStrLength;
1825 // Construct the new data
1826 size_t srcSegmentEnd;
1827 unsigned srcSegmentLength;
1828 srcSegmentStart = 0;
1829 unsigned dstOffset = 0;
1830 bool srcIs8Bit = is8Bit();
1831 bool replacementIs8Bit = replacement->is8Bit();
1833 // There are 4 cases:
1834 // 1. This and replacement are both 8 bit.
1835 // 2. This and replacement are both 16 bit.
1836 // 3. This is 8 bit and replacement is 16 bit.
1837 // 4. This is 16 bit and replacement is 8 bit.
1838 if (srcIs8Bit && replacementIs8Bit) {
1839 // Case 1
1840 LChar* data;
1841 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1842 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1843 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1844 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1845 dstOffset += srcSegmentLength;
1846 memcpy(data + dstOffset, replacement->characters8(), repStrLength * sizeof(LChar));
1847 dstOffset += repStrLength;
1848 srcSegmentStart = srcSegmentEnd + patternLength;
1851 srcSegmentLength = m_length - srcSegmentStart;
1852 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1854 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1856 return newImpl.release();
1859 UChar* data;
1860 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1861 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1862 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1863 if (srcIs8Bit) {
1864 // Case 3.
1865 for (unsigned i = 0; i < srcSegmentLength; ++i)
1866 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1867 } else {
1868 // Case 2 & 4.
1869 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1871 dstOffset += srcSegmentLength;
1872 if (replacementIs8Bit) {
1873 // Cases 2 & 3.
1874 for (unsigned i = 0; i < repStrLength; ++i)
1875 data[i + dstOffset] = replacement->characters8()[i];
1876 } else {
1877 // Case 4
1878 memcpy(data + dstOffset, replacement->characters16(), repStrLength * sizeof(UChar));
1880 dstOffset += repStrLength;
1881 srcSegmentStart = srcSegmentEnd + patternLength;
1884 srcSegmentLength = m_length - srcSegmentStart;
1885 if (srcIs8Bit) {
1886 // Case 3.
1887 for (unsigned i = 0; i < srcSegmentLength; ++i)
1888 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1889 } else {
1890 // Cases 2 & 4.
1891 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1894 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1896 return newImpl.release();
1899 PassRefPtr<StringImpl> StringImpl::upconvertedString()
1901 if (is8Bit())
1902 return String::make16BitFrom8BitSource(characters8(), m_length).releaseImpl();
1903 return this;
1906 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl* b)
1908 unsigned aLength = a->length();
1909 unsigned bLength = b->length();
1910 if (aLength != bLength)
1911 return false;
1913 if (a->is8Bit()) {
1914 if (b->is8Bit())
1915 return equal(a->characters8(), b->characters8(), aLength);
1917 return equal(a->characters8(), b->characters16(), aLength);
1920 if (b->is8Bit())
1921 return equal(a->characters16(), b->characters8(), aLength);
1923 return equal(a->characters16(), b->characters16(), aLength);
1926 bool equal(const StringImpl* a, const StringImpl* b)
1928 if (a == b)
1929 return true;
1930 if (!a || !b)
1931 return false;
1932 if (a->isAtomic() && b->isAtomic())
1933 return false;
1935 return stringImplContentEqual(a, b);
1938 template <typename CharType>
1939 inline bool equalInternal(const StringImpl* a, const CharType* b, unsigned length)
1941 if (!a)
1942 return !b;
1943 if (!b)
1944 return false;
1946 if (a->length() != length)
1947 return false;
1948 if (a->is8Bit())
1949 return equal(a->characters8(), b, length);
1950 return equal(a->characters16(), b, length);
1953 bool equal(const StringImpl* a, const LChar* b, unsigned length)
1955 return equalInternal(a, b, length);
1958 bool equal(const StringImpl* a, const UChar* b, unsigned length)
1960 return equalInternal(a, b, length);
1963 bool equal(const StringImpl* a, const LChar* b)
1965 if (!a)
1966 return !b;
1967 if (!b)
1968 return !a;
1970 unsigned length = a->length();
1972 if (a->is8Bit()) {
1973 const LChar* aPtr = a->characters8();
1974 for (unsigned i = 0; i != length; ++i) {
1975 LChar bc = b[i];
1976 LChar ac = aPtr[i];
1977 if (!bc)
1978 return false;
1979 if (ac != bc)
1980 return false;
1983 return !b[length];
1986 const UChar* aPtr = a->characters16();
1987 for (unsigned i = 0; i != length; ++i) {
1988 LChar bc = b[i];
1989 if (!bc)
1990 return false;
1991 if (aPtr[i] != bc)
1992 return false;
1995 return !b[length];
1998 bool equalNonNull(const StringImpl* a, const StringImpl* b)
2000 ASSERT(a && b);
2001 if (a == b)
2002 return true;
2004 return stringImplContentEqual(a, b);
2007 bool equalIgnoringCase(const StringImpl* a, const StringImpl* b)
2009 if (a == b)
2010 return true;
2011 if (!a || !b)
2012 return false;
2014 return CaseFoldingHash::equal(a, b);
2017 bool equalIgnoringCase(const StringImpl* a, const LChar* b)
2019 if (!a)
2020 return !b;
2021 if (!b)
2022 return !a;
2024 unsigned length = a->length();
2026 // Do a faster loop for the case where all the characters are ASCII.
2027 UChar ored = 0;
2028 bool equal = true;
2029 if (a->is8Bit()) {
2030 const LChar* as = a->characters8();
2031 for (unsigned i = 0; i != length; ++i) {
2032 LChar bc = b[i];
2033 if (!bc)
2034 return false;
2035 UChar ac = as[i];
2036 ored |= ac;
2037 equal = equal && (toASCIILower(ac) == toASCIILower(bc));
2040 // Do a slower implementation for cases that include non-ASCII characters.
2041 if (ored & ~0x7F) {
2042 equal = true;
2043 for (unsigned i = 0; i != length; ++i)
2044 equal = equal && (foldCase(as[i]) == foldCase(b[i]));
2047 return equal && !b[length];
2050 const UChar* as = a->characters16();
2051 for (unsigned i = 0; i != length; ++i) {
2052 LChar bc = b[i];
2053 if (!bc)
2054 return false;
2055 UChar ac = as[i];
2056 ored |= ac;
2057 equal = equal && (toASCIILower(ac) == toASCIILower(bc));
2060 // Do a slower implementation for cases that include non-ASCII characters.
2061 if (ored & ~0x7F) {
2062 equal = true;
2063 for (unsigned i = 0; i != length; ++i) {
2064 equal = equal && (foldCase(as[i]) == foldCase(b[i]));
2068 return equal && !b[length];
2071 bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b)
2073 ASSERT(a && b);
2074 if (a == b)
2075 return true;
2077 unsigned length = a->length();
2078 if (length != b->length())
2079 return false;
2081 if (a->is8Bit()) {
2082 if (b->is8Bit())
2083 return equalIgnoringCase(a->characters8(), b->characters8(), length);
2085 return equalIgnoringCase(b->characters16(), a->characters8(), length);
2088 if (b->is8Bit())
2089 return equalIgnoringCase(a->characters16(), b->characters8(), length);
2091 return equalIgnoringCase(a->characters16(), b->characters16(), length);
2094 bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
2096 if (!a && b && !b->length())
2097 return true;
2098 if (!b && a && !a->length())
2099 return true;
2100 return equal(a, b);
2103 size_t StringImpl::sizeInBytes() const
2105 size_t size = length();
2106 if (!is8Bit())
2107 size *= 2;
2108 return size + sizeof(*this);
2111 UChar32 toUpper(UChar32 c, const AtomicString& localeIdentifier)
2113 if (!localeIdentifier.isNull()) {
2114 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(localeIdentifier, "az")) {
2115 if (c == 'i')
2116 return latinCapitalLetterIWithDotAbove;
2117 if (c == latinSmallLetterDotlessI)
2118 return 'I';
2119 } else if (localeIdMatchesLang(localeIdentifier, "lt")) {
2120 // TODO(rob.buis) implement upper-casing rules for lt
2121 // like in StringImpl::upper(locale).
2125 return toUpper(c);
2128 } // namespace WTF