1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 2000
20 * the Initial Developer. All Rights Reserved.
23 * Scott Collins <scc@mozilla.org> (original author)
25 * Alternatively, the contents of this file may be used under the terms of
26 * either of the GNU General Public License Version 2 or later (the "GPL"),
27 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
39 #include "nsReadableUtils.h"
42 #include "nsUTF8Utils.h"
46 LossyCopyUTF16toASCII( const nsAString
& aSource
, nsACString
& aDest
)
49 LossyAppendUTF16toASCII(aSource
, aDest
);
54 CopyASCIItoUTF16( const nsACString
& aSource
, nsAString
& aDest
)
57 AppendASCIItoUTF16(aSource
, aDest
);
62 LossyCopyUTF16toASCII( const PRUnichar
* aSource
, nsACString
& aDest
)
66 LossyAppendUTF16toASCII(nsDependentString(aSource
), aDest
);
72 CopyASCIItoUTF16( const char* aSource
, nsAString
& aDest
)
76 AppendASCIItoUTF16(nsDependentCString(aSource
), aDest
);
82 CopyUTF16toUTF8( const nsAString
& aSource
, nsACString
& aDest
)
85 AppendUTF16toUTF8(aSource
, aDest
);
90 CopyUTF8toUTF16( const nsACString
& aSource
, nsAString
& aDest
)
93 AppendUTF8toUTF16(aSource
, aDest
);
98 CopyUTF16toUTF8( const PRUnichar
* aSource
, nsACString
& aDest
)
101 AppendUTF16toUTF8(aSource
, aDest
);
106 CopyUTF8toUTF16( const char* aSource
, nsAString
& aDest
)
109 AppendUTF8toUTF16(aSource
, aDest
);
112 // Like GetMutableData, but returns false if it can't
113 // allocate enough memory (e.g. due to OOM) rather than
114 // returning zero (which could have other meanings) and
115 // throws away the out-param pointer.
117 SetLengthForWriting(nsAString
& aDest
, PRUint32 aDesiredLength
)
120 PRUint32 len
= aDest
.GetMutableData(&dummy
, aDesiredLength
);
121 return (len
>= aDesiredLength
);
125 SetLengthForWritingC(nsACString
& aDest
, PRUint32 aDesiredLength
)
128 PRUint32 len
= aDest
.GetMutableData(&dummy
, aDesiredLength
);
129 return (len
>= aDesiredLength
);
135 LossyAppendUTF16toASCII( const nsAString
& aSource
, nsACString
& aDest
)
137 PRUint32 old_dest_length
= aDest
.Length();
138 if (!SetLengthForWritingC(aDest
, old_dest_length
+ aSource
.Length()))
141 nsAString::const_iterator fromBegin
, fromEnd
;
143 nsACString::iterator dest
;
144 aDest
.BeginWriting(dest
);
146 dest
.advance(old_dest_length
);
148 // right now, this won't work on multi-fragment destinations
149 LossyConvertEncoding
<PRUnichar
, char> converter(dest
.get());
151 copy_string(aSource
.BeginReading(fromBegin
), aSource
.EndReading(fromEnd
), converter
);
156 AppendASCIItoUTF16( const nsACString
& aSource
, nsAString
& aDest
)
158 PRUint32 old_dest_length
= aDest
.Length();
159 if (!SetLengthForWriting(aDest
, old_dest_length
+ aSource
.Length()))
162 nsACString::const_iterator fromBegin
, fromEnd
;
164 nsAString::iterator dest
;
165 aDest
.BeginWriting(dest
);
167 dest
.advance(old_dest_length
);
169 // right now, this won't work on multi-fragment destinations
170 LossyConvertEncoding
<char, PRUnichar
> converter(dest
.get());
172 copy_string(aSource
.BeginReading(fromBegin
), aSource
.EndReading(fromEnd
), converter
);
177 LossyAppendUTF16toASCII( const PRUnichar
* aSource
, nsACString
& aDest
)
180 LossyAppendUTF16toASCII(nsDependentString(aSource
), aDest
);
186 AppendASCIItoUTF16( const char* aSource
, nsAString
& aDest
)
189 AppendASCIItoUTF16(nsDependentCString(aSource
), aDest
);
195 AppendUTF16toUTF8( const nsAString
& aSource
, nsACString
& aDest
)
197 nsAString::const_iterator source_start
, source_end
;
198 CalculateUTF8Size calculator
;
199 copy_string(aSource
.BeginReading(source_start
),
200 aSource
.EndReading(source_end
), calculator
);
202 PRUint32 count
= calculator
.Size();
206 PRUint32 old_dest_length
= aDest
.Length();
208 // Grow the buffer if we need to.
209 if(!SetLengthForWritingC(aDest
, old_dest_length
+ count
))
212 nsACString::iterator dest
;
213 aDest
.BeginWriting(dest
);
215 dest
.advance(old_dest_length
);
217 if (count
<= (PRUint32
)dest
.size_forward())
219 // aDest has enough room in the fragment just past the end
220 // of its old data that it can hold what we're about to
221 // append. Append using copy_string().
223 // All ready? Time to convert
225 ConvertUTF16toUTF8
converter(dest
.get());
226 copy_string(aSource
.BeginReading(source_start
),
227 aSource
.EndReading(source_end
), converter
);
229 if (converter
.Size() != count
)
231 NS_ERROR("Input invalid or incorrect length was calculated");
233 aDest
.SetLength(old_dest_length
);
238 // This isn't the fastest way to do this, but it gets
239 // complicated to convert UTF16 into a fragmented UTF8
240 // string, so we'll take the easy way out here in this
243 aDest
.Replace(old_dest_length
, count
,
244 NS_ConvertUTF16toUTF8(aSource
));
251 AppendUTF8toUTF16( const nsACString
& aSource
, nsAString
& aDest
)
253 nsACString::const_iterator source_start
, source_end
;
254 CalculateUTF8Length calculator
;
255 copy_string(aSource
.BeginReading(source_start
),
256 aSource
.EndReading(source_end
), calculator
);
258 PRUint32 count
= calculator
.Length();
262 PRUint32 old_dest_length
= aDest
.Length();
264 // Grow the buffer if we need to.
265 if(!SetLengthForWriting(aDest
, old_dest_length
+ count
))
268 nsAString::iterator dest
;
269 aDest
.BeginWriting(dest
);
271 dest
.advance(old_dest_length
);
273 if (count
<= (PRUint32
)dest
.size_forward())
275 // aDest has enough room in the fragment just past the end
276 // of its old data that it can hold what we're about to
277 // append. Append using copy_string().
279 // All ready? Time to convert
281 ConvertUTF8toUTF16
converter(dest
.get());
282 copy_string(aSource
.BeginReading(source_start
),
283 aSource
.EndReading(source_end
), converter
);
285 if (converter
.Length() != count
)
287 NS_ERROR("Input wasn't UTF8 or incorrect length was calculated");
288 aDest
.SetLength(old_dest_length
);
293 // This isn't the fastest way to do this, but it gets
294 // complicated to convert parts of a UTF8 string into a
295 // UTF16 string, so we'll take the easy way out here in
296 // this rare situation.
298 aDest
.Replace(old_dest_length
, count
,
299 NS_ConvertUTF8toUTF16(aSource
));
306 AppendUTF16toUTF8( const PRUnichar
* aSource
, nsACString
& aDest
)
309 AppendUTF16toUTF8(nsDependentString(aSource
), aDest
);
315 AppendUTF8toUTF16( const char* aSource
, nsAString
& aDest
)
318 AppendUTF8toUTF16(nsDependentCString(aSource
), aDest
);
324 * A helper function that allocates a buffer of the desired character type big enough to hold a copy of the supplied string (plus a zero terminator).
326 * @param aSource an string you will eventually be making a copy of
327 * @return a new buffer (of the type specified by the second parameter) which you must free with |nsMemory::Free|.
330 template <class FromStringT
, class ToCharT
>
333 AllocateStringCopy( const FromStringT
& aSource
, ToCharT
* )
335 return static_cast<ToCharT
*>(nsMemory::Alloc((aSource
.Length()+1) * sizeof(ToCharT
)));
341 ToNewCString( const nsAString
& aSource
)
343 char* result
= AllocateStringCopy(aSource
, (char*)0);
347 nsAString::const_iterator fromBegin
, fromEnd
;
348 LossyConvertEncoding
<PRUnichar
, char> converter(result
);
349 copy_string(aSource
.BeginReading(fromBegin
), aSource
.EndReading(fromEnd
), converter
).write_terminator();
355 ToNewUTF8String( const nsAString
& aSource
, PRUint32
*aUTF8Count
)
357 nsAString::const_iterator start
, end
;
358 CalculateUTF8Size calculator
;
359 copy_string(aSource
.BeginReading(start
), aSource
.EndReading(end
),
363 *aUTF8Count
= calculator
.Size();
365 char *result
= static_cast<char*>
366 (nsMemory::Alloc(calculator
.Size() + 1));
370 ConvertUTF16toUTF8
converter(result
);
371 copy_string(aSource
.BeginReading(start
), aSource
.EndReading(end
),
372 converter
).write_terminator();
373 NS_ASSERTION(calculator
.Size() == converter
.Size(), "length mismatch");
380 ToNewCString( const nsACString
& aSource
)
382 // no conversion needed, just allocate a buffer of the correct length and copy into it
384 char* result
= AllocateStringCopy(aSource
, (char*)0);
388 nsACString::const_iterator fromBegin
, fromEnd
;
389 char* toBegin
= result
;
390 *copy_string(aSource
.BeginReading(fromBegin
), aSource
.EndReading(fromEnd
), toBegin
) = char(0);
396 ToNewUnicode( const nsAString
& aSource
)
398 // no conversion needed, just allocate a buffer of the correct length and copy into it
400 PRUnichar
* result
= AllocateStringCopy(aSource
, (PRUnichar
*)0);
404 nsAString::const_iterator fromBegin
, fromEnd
;
405 PRUnichar
* toBegin
= result
;
406 *copy_string(aSource
.BeginReading(fromBegin
), aSource
.EndReading(fromEnd
), toBegin
) = PRUnichar(0);
412 ToNewUnicode( const nsACString
& aSource
)
414 PRUnichar
* result
= AllocateStringCopy(aSource
, (PRUnichar
*)0);
418 nsACString::const_iterator fromBegin
, fromEnd
;
419 LossyConvertEncoding
<char, PRUnichar
> converter(result
);
420 copy_string(aSource
.BeginReading(fromBegin
), aSource
.EndReading(fromEnd
), converter
).write_terminator();
426 UTF8ToNewUnicode( const nsACString
& aSource
, PRUint32
*aUTF16Count
)
428 nsACString::const_iterator start
, end
;
429 CalculateUTF8Length calculator
;
430 copy_string(aSource
.BeginReading(start
), aSource
.EndReading(end
),
434 *aUTF16Count
= calculator
.Length();
436 PRUnichar
*result
= static_cast<PRUnichar
*>
437 (nsMemory::Alloc(sizeof(PRUnichar
) * (calculator
.Length() + 1)));
441 ConvertUTF8toUTF16
converter(result
);
442 copy_string(aSource
.BeginReading(start
), aSource
.EndReading(end
),
443 converter
).write_terminator();
444 NS_ASSERTION(calculator
.Length() == converter
.Length(), "length mismatch");
451 CopyUnicodeTo( const nsAString
& aSource
, PRUint32 aSrcOffset
, PRUnichar
* aDest
, PRUint32 aLength
)
453 nsAString::const_iterator fromBegin
, fromEnd
;
454 PRUnichar
* toBegin
= aDest
;
455 copy_string(aSource
.BeginReading(fromBegin
).advance( PRInt32(aSrcOffset
) ), aSource
.BeginReading(fromEnd
).advance( PRInt32(aSrcOffset
+aLength
) ), toBegin
);
461 CopyUnicodeTo( const nsAString::const_iterator
& aSrcStart
,
462 const nsAString::const_iterator
& aSrcEnd
,
465 nsAString::iterator writer
;
466 if (!SetLengthForWriting(aDest
, Distance(aSrcStart
, aSrcEnd
)))
469 aDest
.BeginWriting(writer
);
470 nsAString::const_iterator
fromBegin(aSrcStart
);
472 copy_string(fromBegin
, aSrcEnd
, writer
);
477 AppendUnicodeTo( const nsAString::const_iterator
& aSrcStart
,
478 const nsAString::const_iterator
& aSrcEnd
,
481 nsAString::iterator writer
;
482 PRUint32 oldLength
= aDest
.Length();
483 if(!SetLengthForWriting(aDest
, oldLength
+ Distance(aSrcStart
, aSrcEnd
)))
486 aDest
.BeginWriting(writer
).advance(oldLength
);
487 nsAString::const_iterator
fromBegin(aSrcStart
);
489 copy_string(fromBegin
, aSrcEnd
, writer
);
494 IsASCII( const nsAString
& aString
)
496 static const PRUnichar NOT_ASCII
= PRUnichar(~0x007F);
499 // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
501 nsAString::const_iterator iter
, done_reading
;
502 aString
.BeginReading(iter
);
503 aString
.EndReading(done_reading
);
505 const PRUnichar
* c
= iter
.get();
506 const PRUnichar
* end
= done_reading
.get();
510 if ( *c
++ & NOT_ASCII
)
519 IsASCII( const nsACString
& aString
)
521 static const char NOT_ASCII
= char(~0x7F);
524 // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
526 nsACString::const_iterator iter
, done_reading
;
527 aString
.BeginReading(iter
);
528 aString
.EndReading(done_reading
);
530 const char* c
= iter
.get();
531 const char* end
= done_reading
.get();
535 if ( *c
++ & NOT_ASCII
)
544 IsUTF8( const nsACString
& aString
)
546 nsReadingIterator
<char> done_reading
;
547 aString
.EndReading(done_reading
);
550 PRBool overlong
= PR_FALSE
;
551 PRBool surrogate
= PR_FALSE
;
552 PRBool nonchar
= PR_FALSE
;
553 PRUint16 olupper
= 0; // overlong byte upper bound.
554 PRUint16 slower
= 0; // surrogate byte lower bound.
556 nsReadingIterator
<char> iter
;
557 aString
.BeginReading(iter
);
559 const char* ptr
= iter
.get();
560 const char* end
= done_reading
.get();
569 if ( UTF8traits::isASCII(c
) )
572 if ( c
<= 0xC1 ) // [80-BF] where not expected, [C0-C1] for overlong.
574 else if ( UTF8traits::is2byte(c
) )
576 else if ( UTF8traits::is3byte(c
) )
579 if ( c
== 0xE0 ) // to exclude E0[80-9F][80-BF]
584 else if ( c
== 0xED ) // ED[A0-BF][80-BF] : surrogate codepoint
589 else if ( c
== 0xEF ) // EF BF [BE-BF] : non-character
592 else if ( c
<= 0xF4 ) // XXX replace /w UTF8traits::is4byte when it's updated to exclude [F5-F7].(bug 199090)
596 if ( c
== 0xF0 ) // to exclude F0[80-8F][80-BF]{2}
601 else if ( c
== 0xF4 ) // to exclude F4[90-BF][80-BF]
603 // actually not surrogates but codepoints beyond 0x10FFFF
609 return PR_FALSE
; // Not UTF-8 string
612 while ( ptr
< end
&& state
)
617 // non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF]
618 if ( nonchar
&& ( !state
&& c
< 0xBE ||
619 state
== 1 && c
!= 0xBF ||
620 state
== 2 && 0x0F != (0x0F & c
) ))
623 if ( !UTF8traits::isInSeq(c
) || overlong
&& c
<= olupper
||
624 surrogate
&& slower
<= c
|| nonchar
&& !state
)
625 return PR_FALSE
; // Not UTF-8 string
626 overlong
= surrogate
= PR_FALSE
;
629 return !state
; // state != 0 at the end indicates an invalid UTF-8 seq.
633 * A character sink for in-place case conversion.
635 class ConvertToUpperCase
638 typedef char value_type
;
641 write( const char* aSource
, PRUint32 aSourceLength
)
643 char* cp
= const_cast<char*>(aSource
);
644 const char* end
= aSource
+ aSourceLength
;
647 if ((ch
>= 'a') && (ch
<= 'z'))
648 *cp
= ch
- ('a' - 'A');
651 return aSourceLength
;
657 ToUpperCase( nsCSubstring
& aCString
)
659 ConvertToUpperCase converter
;
661 converter
.write(aCString
.BeginWriting(start
), aCString
.Length());
665 * A character sink for copying with case conversion.
667 class CopyToUpperCase
670 typedef char value_type
;
672 CopyToUpperCase( nsACString::iterator
& aDestIter
)
678 write( const char* aSource
, PRUint32 aSourceLength
)
680 PRUint32 len
= PR_MIN(PRUint32(mIter
.size_forward()), aSourceLength
);
681 char* cp
= mIter
.get();
682 const char* end
= aSource
+ len
;
683 while (aSource
!= end
) {
685 if ((ch
>= 'a') && (ch
<= 'z'))
686 *cp
= ch
- ('a' - 'A');
697 nsACString::iterator
& mIter
;
702 ToUpperCase( const nsACString
& aSource
, nsACString
& aDest
)
704 nsACString::const_iterator fromBegin
, fromEnd
;
705 nsACString::iterator toBegin
;
706 if (!SetLengthForWritingC(aDest
, aSource
.Length()))
709 CopyToUpperCase
converter(aDest
.BeginWriting(toBegin
));
710 copy_string(aSource
.BeginReading(fromBegin
), aSource
.EndReading(fromEnd
), converter
);
714 * A character sink for case conversion.
716 class ConvertToLowerCase
719 typedef char value_type
;
722 write( const char* aSource
, PRUint32 aSourceLength
)
724 char* cp
= const_cast<char*>(aSource
);
725 const char* end
= aSource
+ aSourceLength
;
728 if ((ch
>= 'A') && (ch
<= 'Z'))
729 *cp
= ch
+ ('a' - 'A');
732 return aSourceLength
;
738 ToLowerCase( nsCSubstring
& aCString
)
740 ConvertToLowerCase converter
;
742 converter
.write(aCString
.BeginWriting(start
), aCString
.Length());
746 * A character sink for copying with case conversion.
748 class CopyToLowerCase
751 typedef char value_type
;
753 CopyToLowerCase( nsACString::iterator
& aDestIter
)
759 write( const char* aSource
, PRUint32 aSourceLength
)
761 PRUint32 len
= PR_MIN(PRUint32(mIter
.size_forward()), aSourceLength
);
762 char* cp
= mIter
.get();
763 const char* end
= aSource
+ len
;
764 while (aSource
!= end
) {
766 if ((ch
>= 'A') && (ch
<= 'Z'))
767 *cp
= ch
+ ('a' - 'A');
778 nsACString::iterator
& mIter
;
783 ToLowerCase( const nsACString
& aSource
, nsACString
& aDest
)
785 nsACString::const_iterator fromBegin
, fromEnd
;
786 nsACString::iterator toBegin
;
787 if (!SetLengthForWritingC(aDest
, aSource
.Length()))
790 CopyToLowerCase
converter(aDest
.BeginWriting(toBegin
));
791 copy_string(aSource
.BeginReading(fromBegin
), aSource
.EndReading(fromEnd
), converter
);
794 template <class StringT
, class IteratorT
, class Comparator
>
796 FindInReadable_Impl( const StringT
& aPattern
, IteratorT
& aSearchStart
, IteratorT
& aSearchEnd
, const Comparator
& compare
)
798 PRBool found_it
= PR_FALSE
;
800 // only bother searching at all if we're given a non-empty range to search
801 if ( aSearchStart
!= aSearchEnd
)
803 IteratorT aPatternStart
, aPatternEnd
;
804 aPattern
.BeginReading(aPatternStart
);
805 aPattern
.EndReading(aPatternEnd
);
807 // outer loop keeps searching till we find it or run out of string to search
810 // fast inner loop (that's what it's called, not what it is) looks for a potential match
811 while ( aSearchStart
!= aSearchEnd
&&
812 compare(*aPatternStart
, *aSearchStart
) )
815 // if we broke out of the `fast' loop because we're out of string ... we're done: no match
816 if ( aSearchStart
== aSearchEnd
)
819 // otherwise, we're at a potential match, let's see if we really hit one
820 IteratorT
testPattern(aPatternStart
);
821 IteratorT
testSearch(aSearchStart
);
823 // slow inner loop verifies the potential match (found by the `fast' loop) at the current position
826 // we already compared the first character in the outer loop,
827 // so we'll advance before the next comparison
831 // if we verified all the way to the end of the pattern, then we found it!
832 if ( testPattern
== aPatternEnd
)
835 aSearchEnd
= testSearch
; // return the exact found range through the parameters
839 // if we got to end of the string we're searching before we hit the end of the
840 // pattern, we'll never find what we're looking for
841 if ( testSearch
== aSearchEnd
)
843 aSearchStart
= aSearchEnd
;
847 // else if we mismatched ... it's time to advance to the next search position
848 // and get back into the `fast' loop
849 if ( compare(*testPattern
, *testSearch
) )
862 * This searches the entire string from right to left, and returns the first match found, if any.
864 template <class StringT
, class IteratorT
, class Comparator
>
866 RFindInReadable_Impl( const StringT
& aPattern
, IteratorT
& aSearchStart
, IteratorT
& aSearchEnd
, const Comparator
& compare
)
868 IteratorT patternStart
, patternEnd
, searchEnd
= aSearchEnd
;
869 aPattern
.BeginReading(patternStart
);
870 aPattern
.EndReading(patternEnd
);
872 // Point to the last character in the pattern
874 // outer loop keeps searching till we run out of string to search
875 while ( aSearchStart
!= searchEnd
)
877 // Point to the end position of the next possible match
880 // Check last character, if a match, explore further from here
881 if ( compare(*patternEnd
, *searchEnd
) == 0 )
883 // We're at a potential match, let's see if we really hit one
884 IteratorT
testPattern(patternEnd
);
885 IteratorT
testSearch(searchEnd
);
887 // inner loop verifies the potential match at the current position
890 // if we verified all the way to the end of the pattern, then we found it!
891 if ( testPattern
== patternStart
)
893 aSearchStart
= testSearch
; // point to start of match
894 aSearchEnd
= ++searchEnd
; // point to end of match
898 // if we got to end of the string we're searching before we hit the end of the
899 // pattern, we'll never find what we're looking for
900 if ( testSearch
== aSearchStart
)
902 aSearchStart
= aSearchEnd
;
906 // test previous character for a match
910 while ( compare(*testPattern
, *testSearch
) == 0 );
914 aSearchStart
= aSearchEnd
;
920 FindInReadable( const nsAString
& aPattern
, nsAString::const_iterator
& aSearchStart
, nsAString::const_iterator
& aSearchEnd
, const nsStringComparator
& aComparator
)
922 return FindInReadable_Impl(aPattern
, aSearchStart
, aSearchEnd
, aComparator
);
927 FindInReadable( const nsACString
& aPattern
, nsACString::const_iterator
& aSearchStart
, nsACString::const_iterator
& aSearchEnd
, const nsCStringComparator
& aComparator
)
929 return FindInReadable_Impl(aPattern
, aSearchStart
, aSearchEnd
, aComparator
);
934 CaseInsensitiveFindInReadable( const nsACString
& aPattern
, nsACString::const_iterator
& aSearchStart
, nsACString::const_iterator
& aSearchEnd
)
936 return FindInReadable_Impl(aPattern
, aSearchStart
, aSearchEnd
, nsCaseInsensitiveCStringComparator());
941 RFindInReadable( const nsAString
& aPattern
, nsAString::const_iterator
& aSearchStart
, nsAString::const_iterator
& aSearchEnd
, const nsStringComparator
& aComparator
)
943 return RFindInReadable_Impl(aPattern
, aSearchStart
, aSearchEnd
, aComparator
);
948 RFindInReadable( const nsACString
& aPattern
, nsACString::const_iterator
& aSearchStart
, nsACString::const_iterator
& aSearchEnd
, const nsCStringComparator
& aComparator
)
950 return RFindInReadable_Impl(aPattern
, aSearchStart
, aSearchEnd
, aComparator
);
955 FindCharInReadable( PRUnichar aChar
, nsAString::const_iterator
& aSearchStart
, const nsAString::const_iterator
& aSearchEnd
)
957 PRInt32 fragmentLength
= aSearchEnd
.get() - aSearchStart
.get();
959 const PRUnichar
* charFoundAt
= nsCharTraits
<PRUnichar
>::find(aSearchStart
.get(), fragmentLength
, aChar
);
961 aSearchStart
.advance( charFoundAt
- aSearchStart
.get() );
965 aSearchStart
.advance(fragmentLength
);
971 FindCharInReadable( char aChar
, nsACString::const_iterator
& aSearchStart
, const nsACString::const_iterator
& aSearchEnd
)
973 PRInt32 fragmentLength
= aSearchEnd
.get() - aSearchStart
.get();
975 const char* charFoundAt
= nsCharTraits
<char>::find(aSearchStart
.get(), fragmentLength
, aChar
);
977 aSearchStart
.advance( charFoundAt
- aSearchStart
.get() );
981 aSearchStart
.advance(fragmentLength
);
987 CountCharInReadable( const nsAString
& aStr
,
991 nsAString::const_iterator begin
, end
;
993 aStr
.BeginReading(begin
);
994 aStr
.EndReading(end
);
996 while (begin
!= end
) {
997 if (*begin
== aChar
) {
1008 CountCharInReadable( const nsACString
& aStr
,
1012 nsACString::const_iterator begin
, end
;
1014 aStr
.BeginReading(begin
);
1015 aStr
.EndReading(end
);
1017 while (begin
!= end
) {
1018 if (*begin
== aChar
) {
1028 StringBeginsWith( const nsAString
& aSource
, const nsAString
& aSubstring
,
1029 const nsStringComparator
& aComparator
)
1031 nsAString::size_type src_len
= aSource
.Length(),
1032 sub_len
= aSubstring
.Length();
1033 if (sub_len
> src_len
)
1035 return Substring(aSource
, 0, sub_len
).Equals(aSubstring
, aComparator
);
1039 StringBeginsWith( const nsACString
& aSource
, const nsACString
& aSubstring
,
1040 const nsCStringComparator
& aComparator
)
1042 nsACString::size_type src_len
= aSource
.Length(),
1043 sub_len
= aSubstring
.Length();
1044 if (sub_len
> src_len
)
1046 return Substring(aSource
, 0, sub_len
).Equals(aSubstring
, aComparator
);
1050 StringEndsWith( const nsAString
& aSource
, const nsAString
& aSubstring
,
1051 const nsStringComparator
& aComparator
)
1053 nsAString::size_type src_len
= aSource
.Length(),
1054 sub_len
= aSubstring
.Length();
1055 if (sub_len
> src_len
)
1057 return Substring(aSource
, src_len
- sub_len
, sub_len
).Equals(aSubstring
,
1062 StringEndsWith( const nsACString
& aSource
, const nsACString
& aSubstring
,
1063 const nsCStringComparator
& aComparator
)
1065 nsACString::size_type src_len
= aSource
.Length(),
1066 sub_len
= aSubstring
.Length();
1067 if (sub_len
> src_len
)
1069 return Substring(aSource
, src_len
- sub_len
, sub_len
).Equals(aSubstring
,
1075 static const PRUnichar empty_buffer
[1] = { '\0' };
1078 const nsAFlatString
&
1081 static const nsDependentString
sEmpty(empty_buffer
);
1087 const nsAFlatCString
&
1090 static const nsDependentCString
sEmpty((const char *)empty_buffer
);
1096 CompareUTF8toUTF16(const nsASingleFragmentCString
& aUTF8String
,
1097 const nsASingleFragmentString
& aUTF16String
)
1099 static const PRUint32 NOT_ASCII
= PRUint32(~0x7F);
1101 const char *u8
, *u8end
;
1102 aUTF8String
.BeginReading(u8
);
1103 aUTF8String
.EndReading(u8end
);
1105 const PRUnichar
*u16
, *u16end
;
1106 aUTF16String
.BeginReading(u16
);
1107 aUTF16String
.EndReading(u16end
);
1109 while (u8
!= u8end
&& u16
!= u16end
)
1111 // Cast away the signedness of *u8 to prevent signextension when
1112 // converting to PRUint32
1113 PRUint32 c8_32
= (PRUint8
)*u8
;
1115 if (c8_32
& NOT_ASCII
)
1118 c8_32
= UTF8CharEnumerator::NextChar(&u8
, u8end
, &err
);
1120 return PR_INT32_MIN
;
1122 PRUint32 c16_32
= UTF16CharEnumerator::NextChar(&u16
, u16end
);
1123 // The above UTF16CharEnumerator::NextChar() calls can
1124 // fail, but if it does for anything other than no data to
1125 // look at (which can't happen here), it returns the
1126 // Unicode replacement character 0xFFFD for the invalid
1127 // data they were fed. Ignore that error and treat invalid
1130 // This matches what our UTF16 to UTF8 conversion code
1131 // does, and thus a UTF8 string that came from an invalid
1132 // UTF16 string will compare equal to the invalid UTF16
1133 // string it came from. Same is true for any other UTF16
1134 // string differs only in the invalid part of the string.
1136 if (c8_32
!= c16_32
)
1137 return c8_32
< c16_32
? -1 : 1;
1142 return c8_32
> *u16
? 1 : -1;
1151 // We get to the end of the UTF16 string, but no to the end of
1152 // the UTF8 string. The UTF8 string is longer than the UTF16
1160 // We get to the end of the UTF8 string, but no to the end of
1161 // the UTF16 string. The UTF16 string is longer than the UTF8
1167 // The two strings match.
1174 AppendUCS4ToUTF16(const PRUint32 aSource
, nsAString
& aDest
)
1176 NS_ASSERTION(IS_VALID_CHAR(aSource
), "Invalid UCS4 char");
1177 if (IS_IN_BMP(aSource
))
1179 aDest
.Append(PRUnichar(aSource
));
1183 aDest
.Append(H_SURROGATE(aSource
));
1184 aDest
.Append(L_SURROGATE(aSource
));