Bug 470455 - test_database_sync_embed_visits.js leaks, r=sdwilsh
[wine-gecko.git] / xpcom / string / src / nsReadableUtils.cpp
blob854967fb13f82cfd3ad252bb07009109810c768b
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 2000
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
23 * Scott Collins <scc@mozilla.org> (original author)
25 * Alternatively, the contents of this file may be used under the terms of
26 * either of the GNU General Public License Version 2 or later (the "GPL"),
27 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
39 #include "nsReadableUtils.h"
40 #include "nsMemory.h"
41 #include "nsString.h"
42 #include "nsUTF8Utils.h"
44 NS_COM
45 void
46 LossyCopyUTF16toASCII( const nsAString& aSource, nsACString& aDest )
48 aDest.Truncate();
49 LossyAppendUTF16toASCII(aSource, aDest);
52 NS_COM
53 void
54 CopyASCIItoUTF16( const nsACString& aSource, nsAString& aDest )
56 aDest.Truncate();
57 AppendASCIItoUTF16(aSource, aDest);
60 NS_COM
61 void
62 LossyCopyUTF16toASCII( const PRUnichar* aSource, nsACString& aDest )
64 aDest.Truncate();
65 if (aSource) {
66 LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);
70 NS_COM
71 void
72 CopyASCIItoUTF16( const char* aSource, nsAString& aDest )
74 aDest.Truncate();
75 if (aSource) {
76 AppendASCIItoUTF16(nsDependentCString(aSource), aDest);
80 NS_COM
81 void
82 CopyUTF16toUTF8( const nsAString& aSource, nsACString& aDest )
84 aDest.Truncate();
85 AppendUTF16toUTF8(aSource, aDest);
88 NS_COM
89 void
90 CopyUTF8toUTF16( const nsACString& aSource, nsAString& aDest )
92 aDest.Truncate();
93 AppendUTF8toUTF16(aSource, aDest);
96 NS_COM
97 void
98 CopyUTF16toUTF8( const PRUnichar* aSource, nsACString& aDest )
100 aDest.Truncate();
101 AppendUTF16toUTF8(aSource, aDest);
104 NS_COM
105 void
106 CopyUTF8toUTF16( const char* aSource, nsAString& aDest )
108 aDest.Truncate();
109 AppendUTF8toUTF16(aSource, aDest);
112 // Like GetMutableData, but returns false if it can't
113 // allocate enough memory (e.g. due to OOM) rather than
114 // returning zero (which could have other meanings) and
115 // throws away the out-param pointer.
116 PRBool
117 SetLengthForWriting(nsAString& aDest, PRUint32 aDesiredLength)
119 PRUnichar* dummy;
120 PRUint32 len = aDest.GetMutableData(&dummy, aDesiredLength);
121 return (len >= aDesiredLength);
124 PRBool
125 SetLengthForWritingC(nsACString& aDest, PRUint32 aDesiredLength)
127 char* dummy;
128 PRUint32 len = aDest.GetMutableData(&dummy, aDesiredLength);
129 return (len >= aDesiredLength);
133 NS_COM
134 void
135 LossyAppendUTF16toASCII( const nsAString& aSource, nsACString& aDest )
137 PRUint32 old_dest_length = aDest.Length();
138 if (!SetLengthForWritingC(aDest, old_dest_length + aSource.Length()))
139 return;
141 nsAString::const_iterator fromBegin, fromEnd;
143 nsACString::iterator dest;
144 aDest.BeginWriting(dest);
146 dest.advance(old_dest_length);
148 // right now, this won't work on multi-fragment destinations
149 LossyConvertEncoding<PRUnichar, char> converter(dest.get());
151 copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
154 NS_COM
155 void
156 AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest )
158 PRUint32 old_dest_length = aDest.Length();
159 if (!SetLengthForWriting(aDest, old_dest_length + aSource.Length()))
160 return;
162 nsACString::const_iterator fromBegin, fromEnd;
164 nsAString::iterator dest;
165 aDest.BeginWriting(dest);
167 dest.advance(old_dest_length);
169 // right now, this won't work on multi-fragment destinations
170 LossyConvertEncoding<char, PRUnichar> converter(dest.get());
172 copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
175 NS_COM
176 void
177 LossyAppendUTF16toASCII( const PRUnichar* aSource, nsACString& aDest )
179 if (aSource) {
180 LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);
184 NS_COM
185 void
186 AppendASCIItoUTF16( const char* aSource, nsAString& aDest )
188 if (aSource) {
189 AppendASCIItoUTF16(nsDependentCString(aSource), aDest);
193 NS_COM
194 void
195 AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest )
197 nsAString::const_iterator source_start, source_end;
198 CalculateUTF8Size calculator;
199 copy_string(aSource.BeginReading(source_start),
200 aSource.EndReading(source_end), calculator);
202 PRUint32 count = calculator.Size();
204 if (count)
206 PRUint32 old_dest_length = aDest.Length();
208 // Grow the buffer if we need to.
209 if(!SetLengthForWritingC(aDest, old_dest_length + count))
210 return;
212 nsACString::iterator dest;
213 aDest.BeginWriting(dest);
215 dest.advance(old_dest_length);
217 if (count <= (PRUint32)dest.size_forward())
219 // aDest has enough room in the fragment just past the end
220 // of its old data that it can hold what we're about to
221 // append. Append using copy_string().
223 // All ready? Time to convert
225 ConvertUTF16toUTF8 converter(dest.get());
226 copy_string(aSource.BeginReading(source_start),
227 aSource.EndReading(source_end), converter);
229 if (converter.Size() != count)
231 NS_ERROR("Input invalid or incorrect length was calculated");
233 aDest.SetLength(old_dest_length);
236 else
238 // This isn't the fastest way to do this, but it gets
239 // complicated to convert UTF16 into a fragmented UTF8
240 // string, so we'll take the easy way out here in this
241 // rare situation.
243 aDest.Replace(old_dest_length, count,
244 NS_ConvertUTF16toUTF8(aSource));
249 NS_COM
250 void
251 AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest )
253 nsACString::const_iterator source_start, source_end;
254 CalculateUTF8Length calculator;
255 copy_string(aSource.BeginReading(source_start),
256 aSource.EndReading(source_end), calculator);
258 PRUint32 count = calculator.Length();
260 if (count)
262 PRUint32 old_dest_length = aDest.Length();
264 // Grow the buffer if we need to.
265 if(!SetLengthForWriting(aDest, old_dest_length + count))
266 return;
268 nsAString::iterator dest;
269 aDest.BeginWriting(dest);
271 dest.advance(old_dest_length);
273 if (count <= (PRUint32)dest.size_forward())
275 // aDest has enough room in the fragment just past the end
276 // of its old data that it can hold what we're about to
277 // append. Append using copy_string().
279 // All ready? Time to convert
281 ConvertUTF8toUTF16 converter(dest.get());
282 copy_string(aSource.BeginReading(source_start),
283 aSource.EndReading(source_end), converter);
285 if (converter.Length() != count)
287 NS_ERROR("Input wasn't UTF8 or incorrect length was calculated");
288 aDest.SetLength(old_dest_length);
291 else
293 // This isn't the fastest way to do this, but it gets
294 // complicated to convert parts of a UTF8 string into a
295 // UTF16 string, so we'll take the easy way out here in
296 // this rare situation.
298 aDest.Replace(old_dest_length, count,
299 NS_ConvertUTF8toUTF16(aSource));
304 NS_COM
305 void
306 AppendUTF16toUTF8( const PRUnichar* aSource, nsACString& aDest )
308 if (aSource) {
309 AppendUTF16toUTF8(nsDependentString(aSource), aDest);
313 NS_COM
314 void
315 AppendUTF8toUTF16( const char* aSource, nsAString& aDest )
317 if (aSource) {
318 AppendUTF8toUTF16(nsDependentCString(aSource), aDest);
324 * A helper function that allocates a buffer of the desired character type big enough to hold a copy of the supplied string (plus a zero terminator).
326 * @param aSource an string you will eventually be making a copy of
327 * @return a new buffer (of the type specified by the second parameter) which you must free with |nsMemory::Free|.
330 template <class FromStringT, class ToCharT>
331 inline
332 ToCharT*
333 AllocateStringCopy( const FromStringT& aSource, ToCharT* )
335 return static_cast<ToCharT*>(nsMemory::Alloc((aSource.Length()+1) * sizeof(ToCharT)));
339 NS_COM
340 char*
341 ToNewCString( const nsAString& aSource )
343 char* result = AllocateStringCopy(aSource, (char*)0);
344 if (!result)
345 return nsnull;
347 nsAString::const_iterator fromBegin, fromEnd;
348 LossyConvertEncoding<PRUnichar, char> converter(result);
349 copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator();
350 return result;
353 NS_COM
354 char*
355 ToNewUTF8String( const nsAString& aSource, PRUint32 *aUTF8Count )
357 nsAString::const_iterator start, end;
358 CalculateUTF8Size calculator;
359 copy_string(aSource.BeginReading(start), aSource.EndReading(end),
360 calculator);
362 if (aUTF8Count)
363 *aUTF8Count = calculator.Size();
365 char *result = static_cast<char*>
366 (nsMemory::Alloc(calculator.Size() + 1));
367 if (!result)
368 return nsnull;
370 ConvertUTF16toUTF8 converter(result);
371 copy_string(aSource.BeginReading(start), aSource.EndReading(end),
372 converter).write_terminator();
373 NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch");
375 return result;
378 NS_COM
379 char*
380 ToNewCString( const nsACString& aSource )
382 // no conversion needed, just allocate a buffer of the correct length and copy into it
384 char* result = AllocateStringCopy(aSource, (char*)0);
385 if (!result)
386 return nsnull;
388 nsACString::const_iterator fromBegin, fromEnd;
389 char* toBegin = result;
390 *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), toBegin) = char(0);
391 return result;
394 NS_COM
395 PRUnichar*
396 ToNewUnicode( const nsAString& aSource )
398 // no conversion needed, just allocate a buffer of the correct length and copy into it
400 PRUnichar* result = AllocateStringCopy(aSource, (PRUnichar*)0);
401 if (!result)
402 return nsnull;
404 nsAString::const_iterator fromBegin, fromEnd;
405 PRUnichar* toBegin = result;
406 *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), toBegin) = PRUnichar(0);
407 return result;
410 NS_COM
411 PRUnichar*
412 ToNewUnicode( const nsACString& aSource )
414 PRUnichar* result = AllocateStringCopy(aSource, (PRUnichar*)0);
415 if (!result)
416 return nsnull;
418 nsACString::const_iterator fromBegin, fromEnd;
419 LossyConvertEncoding<char, PRUnichar> converter(result);
420 copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator();
421 return result;
424 NS_COM
425 PRUnichar*
426 UTF8ToNewUnicode( const nsACString& aSource, PRUint32 *aUTF16Count )
428 nsACString::const_iterator start, end;
429 CalculateUTF8Length calculator;
430 copy_string(aSource.BeginReading(start), aSource.EndReading(end),
431 calculator);
433 if (aUTF16Count)
434 *aUTF16Count = calculator.Length();
436 PRUnichar *result = static_cast<PRUnichar*>
437 (nsMemory::Alloc(sizeof(PRUnichar) * (calculator.Length() + 1)));
438 if (!result)
439 return nsnull;
441 ConvertUTF8toUTF16 converter(result);
442 copy_string(aSource.BeginReading(start), aSource.EndReading(end),
443 converter).write_terminator();
444 NS_ASSERTION(calculator.Length() == converter.Length(), "length mismatch");
446 return result;
449 NS_COM
450 PRUnichar*
451 CopyUnicodeTo( const nsAString& aSource, PRUint32 aSrcOffset, PRUnichar* aDest, PRUint32 aLength )
453 nsAString::const_iterator fromBegin, fromEnd;
454 PRUnichar* toBegin = aDest;
455 copy_string(aSource.BeginReading(fromBegin).advance( PRInt32(aSrcOffset) ), aSource.BeginReading(fromEnd).advance( PRInt32(aSrcOffset+aLength) ), toBegin);
456 return aDest;
459 NS_COM
460 void
461 CopyUnicodeTo( const nsAString::const_iterator& aSrcStart,
462 const nsAString::const_iterator& aSrcEnd,
463 nsAString& aDest )
465 nsAString::iterator writer;
466 if (!SetLengthForWriting(aDest, Distance(aSrcStart, aSrcEnd)))
467 return;
469 aDest.BeginWriting(writer);
470 nsAString::const_iterator fromBegin(aSrcStart);
472 copy_string(fromBegin, aSrcEnd, writer);
475 NS_COM
476 void
477 AppendUnicodeTo( const nsAString::const_iterator& aSrcStart,
478 const nsAString::const_iterator& aSrcEnd,
479 nsAString& aDest )
481 nsAString::iterator writer;
482 PRUint32 oldLength = aDest.Length();
483 if(!SetLengthForWriting(aDest, oldLength + Distance(aSrcStart, aSrcEnd)))
484 return;
486 aDest.BeginWriting(writer).advance(oldLength);
487 nsAString::const_iterator fromBegin(aSrcStart);
489 copy_string(fromBegin, aSrcEnd, writer);
492 NS_COM
493 PRBool
494 IsASCII( const nsAString& aString )
496 static const PRUnichar NOT_ASCII = PRUnichar(~0x007F);
499 // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
501 nsAString::const_iterator iter, done_reading;
502 aString.BeginReading(iter);
503 aString.EndReading(done_reading);
505 const PRUnichar* c = iter.get();
506 const PRUnichar* end = done_reading.get();
508 while ( c < end )
510 if ( *c++ & NOT_ASCII )
511 return PR_FALSE;
514 return PR_TRUE;
517 NS_COM
518 PRBool
519 IsASCII( const nsACString& aString )
521 static const char NOT_ASCII = char(~0x7F);
524 // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
526 nsACString::const_iterator iter, done_reading;
527 aString.BeginReading(iter);
528 aString.EndReading(done_reading);
530 const char* c = iter.get();
531 const char* end = done_reading.get();
533 while ( c < end )
535 if ( *c++ & NOT_ASCII )
536 return PR_FALSE;
539 return PR_TRUE;
542 NS_COM
543 PRBool
544 IsUTF8( const nsACString& aString )
546 nsReadingIterator<char> done_reading;
547 aString.EndReading(done_reading);
549 PRInt32 state = 0;
550 PRBool overlong = PR_FALSE;
551 PRBool surrogate = PR_FALSE;
552 PRBool nonchar = PR_FALSE;
553 PRUint16 olupper = 0; // overlong byte upper bound.
554 PRUint16 slower = 0; // surrogate byte lower bound.
556 nsReadingIterator<char> iter;
557 aString.BeginReading(iter);
559 const char* ptr = iter.get();
560 const char* end = done_reading.get();
561 while ( ptr < end )
563 PRUint8 c;
565 if (0 == state)
567 c = *ptr++;
569 if ( UTF8traits::isASCII(c) )
570 continue;
572 if ( c <= 0xC1 ) // [80-BF] where not expected, [C0-C1] for overlong.
573 return PR_FALSE;
574 else if ( UTF8traits::is2byte(c) )
575 state = 1;
576 else if ( UTF8traits::is3byte(c) )
578 state = 2;
579 if ( c == 0xE0 ) // to exclude E0[80-9F][80-BF]
581 overlong = PR_TRUE;
582 olupper = 0x9F;
584 else if ( c == 0xED ) // ED[A0-BF][80-BF] : surrogate codepoint
586 surrogate = PR_TRUE;
587 slower = 0xA0;
589 else if ( c == 0xEF ) // EF BF [BE-BF] : non-character
590 nonchar = PR_TRUE;
592 else if ( c <= 0xF4 ) // XXX replace /w UTF8traits::is4byte when it's updated to exclude [F5-F7].(bug 199090)
594 state = 3;
595 nonchar = PR_TRUE;
596 if ( c == 0xF0 ) // to exclude F0[80-8F][80-BF]{2}
598 overlong = PR_TRUE;
599 olupper = 0x8F;
601 else if ( c == 0xF4 ) // to exclude F4[90-BF][80-BF]
603 // actually not surrogates but codepoints beyond 0x10FFFF
604 surrogate = PR_TRUE;
605 slower = 0x90;
608 else
609 return PR_FALSE; // Not UTF-8 string
612 while ( ptr < end && state )
614 c = *ptr++;
615 --state;
617 // non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF]
618 if ( nonchar && ( !state && c < 0xBE ||
619 state == 1 && c != 0xBF ||
620 state == 2 && 0x0F != (0x0F & c) ))
621 nonchar = PR_FALSE;
623 if ( !UTF8traits::isInSeq(c) || overlong && c <= olupper ||
624 surrogate && slower <= c || nonchar && !state )
625 return PR_FALSE; // Not UTF-8 string
626 overlong = surrogate = PR_FALSE;
629 return !state; // state != 0 at the end indicates an invalid UTF-8 seq.
633 * A character sink for in-place case conversion.
635 class ConvertToUpperCase
637 public:
638 typedef char value_type;
640 PRUint32
641 write( const char* aSource, PRUint32 aSourceLength )
643 char* cp = const_cast<char*>(aSource);
644 const char* end = aSource + aSourceLength;
645 while (cp != end) {
646 char ch = *cp;
647 if ((ch >= 'a') && (ch <= 'z'))
648 *cp = ch - ('a' - 'A');
649 ++cp;
651 return aSourceLength;
655 NS_COM
656 void
657 ToUpperCase( nsCSubstring& aCString )
659 ConvertToUpperCase converter;
660 char* start;
661 converter.write(aCString.BeginWriting(start), aCString.Length());
665 * A character sink for copying with case conversion.
667 class CopyToUpperCase
669 public:
670 typedef char value_type;
672 CopyToUpperCase( nsACString::iterator& aDestIter )
673 : mIter(aDestIter)
677 PRUint32
678 write( const char* aSource, PRUint32 aSourceLength )
680 PRUint32 len = PR_MIN(PRUint32(mIter.size_forward()), aSourceLength);
681 char* cp = mIter.get();
682 const char* end = aSource + len;
683 while (aSource != end) {
684 char ch = *aSource;
685 if ((ch >= 'a') && (ch <= 'z'))
686 *cp = ch - ('a' - 'A');
687 else
688 *cp = ch;
689 ++aSource;
690 ++cp;
692 mIter.advance(len);
693 return len;
696 protected:
697 nsACString::iterator& mIter;
700 NS_COM
701 void
702 ToUpperCase( const nsACString& aSource, nsACString& aDest )
704 nsACString::const_iterator fromBegin, fromEnd;
705 nsACString::iterator toBegin;
706 if (!SetLengthForWritingC(aDest, aSource.Length()))
707 return;
709 CopyToUpperCase converter(aDest.BeginWriting(toBegin));
710 copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
714 * A character sink for case conversion.
716 class ConvertToLowerCase
718 public:
719 typedef char value_type;
721 PRUint32
722 write( const char* aSource, PRUint32 aSourceLength )
724 char* cp = const_cast<char*>(aSource);
725 const char* end = aSource + aSourceLength;
726 while (cp != end) {
727 char ch = *cp;
728 if ((ch >= 'A') && (ch <= 'Z'))
729 *cp = ch + ('a' - 'A');
730 ++cp;
732 return aSourceLength;
736 NS_COM
737 void
738 ToLowerCase( nsCSubstring& aCString )
740 ConvertToLowerCase converter;
741 char* start;
742 converter.write(aCString.BeginWriting(start), aCString.Length());
746 * A character sink for copying with case conversion.
748 class CopyToLowerCase
750 public:
751 typedef char value_type;
753 CopyToLowerCase( nsACString::iterator& aDestIter )
754 : mIter(aDestIter)
758 PRUint32
759 write( const char* aSource, PRUint32 aSourceLength )
761 PRUint32 len = PR_MIN(PRUint32(mIter.size_forward()), aSourceLength);
762 char* cp = mIter.get();
763 const char* end = aSource + len;
764 while (aSource != end) {
765 char ch = *aSource;
766 if ((ch >= 'A') && (ch <= 'Z'))
767 *cp = ch + ('a' - 'A');
768 else
769 *cp = ch;
770 ++aSource;
771 ++cp;
773 mIter.advance(len);
774 return len;
777 protected:
778 nsACString::iterator& mIter;
781 NS_COM
782 void
783 ToLowerCase( const nsACString& aSource, nsACString& aDest )
785 nsACString::const_iterator fromBegin, fromEnd;
786 nsACString::iterator toBegin;
787 if (!SetLengthForWritingC(aDest, aSource.Length()))
788 return;
790 CopyToLowerCase converter(aDest.BeginWriting(toBegin));
791 copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
794 template <class StringT, class IteratorT, class Comparator>
795 PRBool
796 FindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, const Comparator& compare )
798 PRBool found_it = PR_FALSE;
800 // only bother searching at all if we're given a non-empty range to search
801 if ( aSearchStart != aSearchEnd )
803 IteratorT aPatternStart, aPatternEnd;
804 aPattern.BeginReading(aPatternStart);
805 aPattern.EndReading(aPatternEnd);
807 // outer loop keeps searching till we find it or run out of string to search
808 while ( !found_it )
810 // fast inner loop (that's what it's called, not what it is) looks for a potential match
811 while ( aSearchStart != aSearchEnd &&
812 compare(*aPatternStart, *aSearchStart) )
813 ++aSearchStart;
815 // if we broke out of the `fast' loop because we're out of string ... we're done: no match
816 if ( aSearchStart == aSearchEnd )
817 break;
819 // otherwise, we're at a potential match, let's see if we really hit one
820 IteratorT testPattern(aPatternStart);
821 IteratorT testSearch(aSearchStart);
823 // slow inner loop verifies the potential match (found by the `fast' loop) at the current position
824 for(;;)
826 // we already compared the first character in the outer loop,
827 // so we'll advance before the next comparison
828 ++testPattern;
829 ++testSearch;
831 // if we verified all the way to the end of the pattern, then we found it!
832 if ( testPattern == aPatternEnd )
834 found_it = PR_TRUE;
835 aSearchEnd = testSearch; // return the exact found range through the parameters
836 break;
839 // if we got to end of the string we're searching before we hit the end of the
840 // pattern, we'll never find what we're looking for
841 if ( testSearch == aSearchEnd )
843 aSearchStart = aSearchEnd;
844 break;
847 // else if we mismatched ... it's time to advance to the next search position
848 // and get back into the `fast' loop
849 if ( compare(*testPattern, *testSearch) )
851 ++aSearchStart;
852 break;
858 return found_it;
862 * This searches the entire string from right to left, and returns the first match found, if any.
864 template <class StringT, class IteratorT, class Comparator>
865 PRBool
866 RFindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, const Comparator& compare )
868 IteratorT patternStart, patternEnd, searchEnd = aSearchEnd;
869 aPattern.BeginReading(patternStart);
870 aPattern.EndReading(patternEnd);
872 // Point to the last character in the pattern
873 --patternEnd;
874 // outer loop keeps searching till we run out of string to search
875 while ( aSearchStart != searchEnd )
877 // Point to the end position of the next possible match
878 --searchEnd;
880 // Check last character, if a match, explore further from here
881 if ( compare(*patternEnd, *searchEnd) == 0 )
883 // We're at a potential match, let's see if we really hit one
884 IteratorT testPattern(patternEnd);
885 IteratorT testSearch(searchEnd);
887 // inner loop verifies the potential match at the current position
890 // if we verified all the way to the end of the pattern, then we found it!
891 if ( testPattern == patternStart )
893 aSearchStart = testSearch; // point to start of match
894 aSearchEnd = ++searchEnd; // point to end of match
895 return PR_TRUE;
898 // if we got to end of the string we're searching before we hit the end of the
899 // pattern, we'll never find what we're looking for
900 if ( testSearch == aSearchStart )
902 aSearchStart = aSearchEnd;
903 return PR_FALSE;
906 // test previous character for a match
907 --testPattern;
908 --testSearch;
910 while ( compare(*testPattern, *testSearch) == 0 );
914 aSearchStart = aSearchEnd;
915 return PR_FALSE;
918 NS_COM
919 PRBool
920 FindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator )
922 return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
925 NS_COM
926 PRBool
927 FindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator& aComparator)
929 return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
932 NS_COM
933 PRBool
934 CaseInsensitiveFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd )
936 return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, nsCaseInsensitiveCStringComparator());
939 NS_COM
940 PRBool
941 RFindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator)
943 return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
946 NS_COM
947 PRBool
948 RFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator& aComparator)
950 return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
953 NS_COM
954 PRBool
955 FindCharInReadable( PRUnichar aChar, nsAString::const_iterator& aSearchStart, const nsAString::const_iterator& aSearchEnd )
957 PRInt32 fragmentLength = aSearchEnd.get() - aSearchStart.get();
959 const PRUnichar* charFoundAt = nsCharTraits<PRUnichar>::find(aSearchStart.get(), fragmentLength, aChar);
960 if ( charFoundAt ) {
961 aSearchStart.advance( charFoundAt - aSearchStart.get() );
962 return PR_TRUE;
965 aSearchStart.advance(fragmentLength);
966 return PR_FALSE;
969 NS_COM
970 PRBool
971 FindCharInReadable( char aChar, nsACString::const_iterator& aSearchStart, const nsACString::const_iterator& aSearchEnd )
973 PRInt32 fragmentLength = aSearchEnd.get() - aSearchStart.get();
975 const char* charFoundAt = nsCharTraits<char>::find(aSearchStart.get(), fragmentLength, aChar);
976 if ( charFoundAt ) {
977 aSearchStart.advance( charFoundAt - aSearchStart.get() );
978 return PR_TRUE;
981 aSearchStart.advance(fragmentLength);
982 return PR_FALSE;
985 NS_COM
986 PRUint32
987 CountCharInReadable( const nsAString& aStr,
988 PRUnichar aChar )
990 PRUint32 count = 0;
991 nsAString::const_iterator begin, end;
993 aStr.BeginReading(begin);
994 aStr.EndReading(end);
996 while (begin != end) {
997 if (*begin == aChar) {
998 ++count;
1000 ++begin;
1003 return count;
1006 NS_COM
1007 PRUint32
1008 CountCharInReadable( const nsACString& aStr,
1009 char aChar )
1011 PRUint32 count = 0;
1012 nsACString::const_iterator begin, end;
1014 aStr.BeginReading(begin);
1015 aStr.EndReading(end);
1017 while (begin != end) {
1018 if (*begin == aChar) {
1019 ++count;
1021 ++begin;
1024 return count;
1027 NS_COM PRBool
1028 StringBeginsWith( const nsAString& aSource, const nsAString& aSubstring,
1029 const nsStringComparator& aComparator )
1031 nsAString::size_type src_len = aSource.Length(),
1032 sub_len = aSubstring.Length();
1033 if (sub_len > src_len)
1034 return PR_FALSE;
1035 return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
1038 NS_COM PRBool
1039 StringBeginsWith( const nsACString& aSource, const nsACString& aSubstring,
1040 const nsCStringComparator& aComparator )
1042 nsACString::size_type src_len = aSource.Length(),
1043 sub_len = aSubstring.Length();
1044 if (sub_len > src_len)
1045 return PR_FALSE;
1046 return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
1049 NS_COM PRBool
1050 StringEndsWith( const nsAString& aSource, const nsAString& aSubstring,
1051 const nsStringComparator& aComparator )
1053 nsAString::size_type src_len = aSource.Length(),
1054 sub_len = aSubstring.Length();
1055 if (sub_len > src_len)
1056 return PR_FALSE;
1057 return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring,
1058 aComparator);
1061 NS_COM PRBool
1062 StringEndsWith( const nsACString& aSource, const nsACString& aSubstring,
1063 const nsCStringComparator& aComparator )
1065 nsACString::size_type src_len = aSource.Length(),
1066 sub_len = aSubstring.Length();
1067 if (sub_len > src_len)
1068 return PR_FALSE;
1069 return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring,
1070 aComparator);
1075 static const PRUnichar empty_buffer[1] = { '\0' };
1077 NS_COM
1078 const nsAFlatString&
1079 EmptyString()
1081 static const nsDependentString sEmpty(empty_buffer);
1083 return sEmpty;
1086 NS_COM
1087 const nsAFlatCString&
1088 EmptyCString()
1090 static const nsDependentCString sEmpty((const char *)empty_buffer);
1092 return sEmpty;
1095 NS_COM PRInt32
1096 CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String,
1097 const nsASingleFragmentString& aUTF16String)
1099 static const PRUint32 NOT_ASCII = PRUint32(~0x7F);
1101 const char *u8, *u8end;
1102 aUTF8String.BeginReading(u8);
1103 aUTF8String.EndReading(u8end);
1105 const PRUnichar *u16, *u16end;
1106 aUTF16String.BeginReading(u16);
1107 aUTF16String.EndReading(u16end);
1109 while (u8 != u8end && u16 != u16end)
1111 // Cast away the signedness of *u8 to prevent signextension when
1112 // converting to PRUint32
1113 PRUint32 c8_32 = (PRUint8)*u8;
1115 if (c8_32 & NOT_ASCII)
1117 PRBool err;
1118 c8_32 = UTF8CharEnumerator::NextChar(&u8, u8end, &err);
1119 if (err)
1120 return PR_INT32_MIN;
1122 PRUint32 c16_32 = UTF16CharEnumerator::NextChar(&u16, u16end);
1123 // The above UTF16CharEnumerator::NextChar() calls can
1124 // fail, but if it does for anything other than no data to
1125 // look at (which can't happen here), it returns the
1126 // Unicode replacement character 0xFFFD for the invalid
1127 // data they were fed. Ignore that error and treat invalid
1128 // UTF16 as 0xFFFD.
1130 // This matches what our UTF16 to UTF8 conversion code
1131 // does, and thus a UTF8 string that came from an invalid
1132 // UTF16 string will compare equal to the invalid UTF16
1133 // string it came from. Same is true for any other UTF16
1134 // string differs only in the invalid part of the string.
1136 if (c8_32 != c16_32)
1137 return c8_32 < c16_32 ? -1 : 1;
1139 else
1141 if (c8_32 != *u16)
1142 return c8_32 > *u16 ? 1 : -1;
1144 ++u8;
1145 ++u16;
1149 if (u8 != u8end)
1151 // We get to the end of the UTF16 string, but no to the end of
1152 // the UTF8 string. The UTF8 string is longer than the UTF16
1153 // string
1155 return 1;
1158 if (u16 != u16end)
1160 // We get to the end of the UTF8 string, but no to the end of
1161 // the UTF16 string. The UTF16 string is longer than the UTF8
1162 // string
1164 return -1;
1167 // The two strings match.
1169 return 0;
1172 NS_COM
1173 void
1174 AppendUCS4ToUTF16(const PRUint32 aSource, nsAString& aDest)
1176 NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char");
1177 if (IS_IN_BMP(aSource))
1179 aDest.Append(PRUnichar(aSource));
1181 else
1183 aDest.Append(H_SURROGATE(aSource));
1184 aDest.Append(L_SURROGATE(aSource));