1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=2 sw=2 et tw=78: */
3 /* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
16 * The Original Code is mozilla.org code.
18 * The Initial Developer of the Original Code is
19 * Netscape Communications Corporation.
20 * Portions created by the Initial Developer are Copyright (C) 1998
21 * the Initial Developer. All Rights Reserved.
25 * Alternatively, the contents of this file may be used under the terms of
26 * either of the GNU General Public License Version 2 or later (the "GPL"),
27 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
39 //#define __INCREMENTAL 1
41 #include "nsScanner.h"
43 #include "nsIServiceManager.h"
44 #include "nsICharsetConverterManager.h"
45 #include "nsICharsetAlias.h"
46 #include "nsReadableUtils.h"
47 #include "nsIInputStream.h"
48 #include "nsILocalFile.h"
49 #include "nsNetUtil.h"
50 #include "nsUTF8Utils.h" // for LossyConvertEncoding
54 // We replace NUL characters with this character.
55 static PRUnichar sInvalid
= UCS2_REPLACEMENT_CHAR
;
57 nsReadEndCondition::nsReadEndCondition(const PRUnichar
* aTerminateChars
) :
58 mChars(aTerminateChars
), mFilter(PRUnichar(~0)) // All bits set
60 // Build filter that will be used to filter out characters with
61 // bits that none of the terminal chars have. This works very well
62 // because terminal chars often have only the last 4-6 bits set and
63 // normal ascii letters have bit 7 set. Other letters have even higher
67 const PRUnichar
*current
= aTerminateChars
;
68 PRUnichar terminalChar
= *current
;
69 while (terminalChar
) {
70 mFilter
&= ~terminalChar
;
72 terminalChar
= *current
;
79 const int kBufsize
=64;
83 * Use this constructor if you want i/o to be based on
84 * a single string you hand in during construction.
85 * This short cut was added for Javascript.
87 * @update gess 5/12/98
88 * @param aMode represents the parser mode (nav, other)
91 nsScanner::nsScanner(const nsAString
& anHTMLString
, const nsACString
& aCharset
,
95 MOZ_COUNT_CTOR(nsScanner
);
97 mSlidingBuffer
= nsnull
;
99 mFirstNonWhitespacePosition
= -1;
100 if (AppendToBuffer(anHTMLString
)) {
101 mSlidingBuffer
->BeginReading(mCurrentPosition
);
103 /* XXX see hack below, re: bug 182067 */
104 memset(&mCurrentPosition
, 0, sizeof(mCurrentPosition
));
105 mEndPosition
= mCurrentPosition
;
107 mMarkPosition
= mCurrentPosition
;
108 mIncremental
= PR_FALSE
;
110 mCharsetSource
= kCharsetUninitialized
;
114 * Use this constructor if you want i/o to be based on strings
115 * the scanner receives. If you pass a null filename, you
116 * can still provide data to the scanner via append.
118 * @update gess 5/12/98
119 * @param aFilename --
122 nsScanner::nsScanner(nsString
& aFilename
,PRBool aCreateStream
,
123 const nsACString
& aCharset
, PRInt32 aSource
)
124 : mFilename(aFilename
), mParser(nsnull
)
126 MOZ_COUNT_CTOR(nsScanner
);
127 NS_ASSERTION(!aCreateStream
, "This is always true.");
129 mSlidingBuffer
= nsnull
;
131 // XXX This is a big hack. We need to initialize the iterators to something.
132 // What matters is that mCurrentPosition == mEndPosition, so that our methods
133 // believe that we are at EOF (see bug 182067). We null out mCurrentPosition
134 // so that we have some hope of catching null pointer dereferences associated
135 // with this hack. --darin
136 memset(&mCurrentPosition
, 0, sizeof(mCurrentPosition
));
137 mMarkPosition
= mCurrentPosition
;
138 mEndPosition
= mCurrentPosition
;
140 mIncremental
= PR_TRUE
;
141 mFirstNonWhitespacePosition
= -1;
145 mCharsetSource
= kCharsetUninitialized
;
146 SetDocumentCharset(aCharset
, aSource
);
149 nsresult
nsScanner::SetDocumentCharset(const nsACString
& aCharset
, PRInt32 aSource
)
151 if (aSource
< mCharsetSource
) // priority is lower the the current one , just
154 nsICharsetAlias
* calias
= nsParser::GetCharsetAliasService();
155 NS_ASSERTION(calias
, "Must have the charset alias service!");
157 nsresult res
= NS_OK
;
158 if (!mCharset
.IsEmpty())
161 res
= calias
->Equals(aCharset
, mCharset
, &same
);
162 if(NS_SUCCEEDED(res
) && same
)
164 return NS_OK
; // no difference, don't change it
168 // different, need to change it
169 nsCString charsetName
;
170 res
= calias
->GetPreferred(aCharset
, charsetName
);
172 if(NS_FAILED(res
) && (mCharsetSource
== kCharsetUninitialized
))
174 // failed - unknown alias , fallback to ISO-8859-1
175 mCharset
.AssignLiteral("ISO-8859-1");
179 mCharset
.Assign(charsetName
);
182 mCharsetSource
= aSource
;
184 NS_ASSERTION(nsParser::GetCharsetConverterManager(),
185 "Must have the charset converter manager!");
187 return nsParser::GetCharsetConverterManager()->
188 GetUnicodeDecoderRaw(mCharset
.get(), getter_AddRefs(mUnicodeDecoder
));
195 * @update gess 3/25/98
199 nsScanner::~nsScanner() {
201 if (mSlidingBuffer
) {
202 delete mSlidingBuffer
;
205 MOZ_COUNT_DTOR(nsScanner
);
209 * Resets current offset position of input stream to marked position.
210 * This allows us to back up to this point if the need should arise,
211 * such as when tokenization gets interrupted.
212 * NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
214 * @update gess 5/12/98
218 void nsScanner::RewindToMark(void){
219 if (mSlidingBuffer
) {
220 mCountRemaining
+= (Distance(mMarkPosition
, mCurrentPosition
));
221 mCurrentPosition
= mMarkPosition
;
227 * Records current offset position in input stream. This allows us
228 * to back up to this point if the need should arise, such as when
229 * tokenization gets interrupted.
231 * @update gess 7/29/98
235 PRInt32
nsScanner::Mark() {
236 PRInt32 distance
= 0;
237 if (mSlidingBuffer
) {
238 nsScannerIterator oldStart
;
239 mSlidingBuffer
->BeginReading(oldStart
);
241 distance
= Distance(oldStart
, mCurrentPosition
);
243 mSlidingBuffer
->DiscardPrefix(mCurrentPosition
);
244 mSlidingBuffer
->BeginReading(mCurrentPosition
);
245 mMarkPosition
= mCurrentPosition
;
252 * Insert data to our underlying input buffer as
253 * if it were read from an input stream.
255 * @update harishd 01/12/99
258 PRBool
nsScanner::UngetReadable(const nsAString
& aBuffer
) {
259 if (!mSlidingBuffer
) {
263 mSlidingBuffer
->UngetReadable(aBuffer
,mCurrentPosition
);
264 mSlidingBuffer
->BeginReading(mCurrentPosition
); // Insertion invalidated our iterators
265 mSlidingBuffer
->EndReading(mEndPosition
);
267 PRUint32 length
= aBuffer
.Length();
268 mCountRemaining
+= length
; // Ref. bug 117441
273 * Append data to our underlying input buffer as
274 * if it were read from an input stream.
279 nsresult
nsScanner::Append(const nsAString
& aBuffer
) {
280 if (!AppendToBuffer(aBuffer
))
281 return NS_ERROR_OUT_OF_MEMORY
;
288 * @update gess 5/21/98
292 nsresult
nsScanner::Append(const char* aBuffer
, PRUint32 aLen
,
293 nsIRequest
*aRequest
)
296 PRUnichar
*unichars
, *start
;
297 if (mUnicodeDecoder
) {
298 PRInt32 unicharBufLen
= 0;
299 mUnicodeDecoder
->GetMaxLength(aBuffer
, aLen
, &unicharBufLen
);
300 nsScannerString::Buffer
* buffer
= nsScannerString::AllocBuffer(unicharBufLen
+ 1);
301 NS_ENSURE_TRUE(buffer
,NS_ERROR_OUT_OF_MEMORY
);
302 start
= unichars
= buffer
->DataStart();
304 PRInt32 totalChars
= 0;
305 PRInt32 unicharLength
= unicharBufLen
;
307 PRInt32 srcLength
= aLen
;
308 res
= mUnicodeDecoder
->Convert(aBuffer
, &srcLength
, unichars
, &unicharLength
);
310 totalChars
+= unicharLength
;
311 // Continuation of failure case
313 // if we failed, we consume one byte, replace it with U+FFFD
314 // and try the conversion again.
316 // This is only needed because some decoders don't follow the
317 // nsIUnicodeDecoder contract: they return a failure when *aDestLength
318 // is 0 rather than the correct NS_OK_UDEC_MOREOUTPUT. See bug 244177
319 if ((unichars
+ unicharLength
) >= buffer
->DataEnd()) {
320 NS_ERROR("Unexpected end of destination buffer");
324 unichars
[unicharLength
++] = (PRUnichar
)0xFFFD;
325 unichars
= unichars
+ unicharLength
;
326 unicharLength
= unicharBufLen
- (++totalChars
);
328 mUnicodeDecoder
->Reset();
330 if(((PRUint32
) (srcLength
+ 1)) > aLen
) {
337 aBuffer
+= srcLength
;
340 } while (NS_FAILED(res
) && (aLen
> 0));
342 buffer
->SetDataLength(totalChars
);
343 // Don't propagate return code of unicode decoder
344 // since it doesn't reflect on our success or failure
347 if (!AppendToBuffer(buffer
, aRequest
))
348 res
= NS_ERROR_OUT_OF_MEMORY
;
351 NS_WARNING("No decoder found.");
352 res
= NS_ERROR_FAILURE
;
359 * retrieve next char from scanners internal input stream
361 * @update gess 3/25/98
363 * @return error code reflecting read status
365 nsresult
nsScanner::GetChar(PRUnichar
& aChar
) {
366 if (!mSlidingBuffer
|| mCurrentPosition
== mEndPosition
) {
371 aChar
= *mCurrentPosition
++;
379 * peek ahead to consume next char from scanner's internal
382 * @update gess 3/25/98
386 nsresult
nsScanner::Peek(PRUnichar
& aChar
, PRUint32 aOffset
) {
389 if (!mSlidingBuffer
|| mCurrentPosition
== mEndPosition
) {
394 if (mCountRemaining
<= aOffset
)
397 nsScannerIterator pos
= mCurrentPosition
;
398 pos
.advance(aOffset
);
402 aChar
=*mCurrentPosition
;
408 nsresult
nsScanner::Peek(nsAString
& aStr
, PRInt32 aNumChars
, PRInt32 aOffset
)
410 if (!mSlidingBuffer
|| mCurrentPosition
== mEndPosition
) {
414 nsScannerIterator start
, end
;
416 start
= mCurrentPosition
;
418 if ((PRInt32
)mCountRemaining
<= aOffset
) {
423 start
.advance(aOffset
);
426 if (mCountRemaining
< PRUint32(aNumChars
+ aOffset
)) {
431 end
.advance(aNumChars
);
434 CopyUnicodeTo(start
, end
, aStr
);
441 * Skip whitespace on scanner input stream
443 * @update gess 3/25/98
445 * @return error status
447 nsresult
nsScanner::SkipWhitespace(PRInt32
& aNewlinesSkipped
) {
449 if (!mSlidingBuffer
) {
453 PRUnichar theChar
= 0;
454 nsresult result
= Peek(theChar
);
456 if (NS_FAILED(result
)) {
460 nsScannerIterator current
= mCurrentPosition
;
461 PRBool done
= PR_FALSE
;
462 PRBool skipped
= PR_FALSE
;
464 while (!done
&& current
!= mEndPosition
) {
467 case '\r': ++aNewlinesSkipped
;
472 PRUnichar thePrevChar
= theChar
;
473 theChar
= (++current
!= mEndPosition
) ? *current
: '\0';
474 if ((thePrevChar
== '\r' && theChar
== '\n') ||
475 (thePrevChar
== '\n' && theChar
== '\r')) {
476 theChar
= (++current
!= mEndPosition
) ? *current
: '\0'; // CRLF == LFCR => LF
487 SetPosition(current
);
488 if (current
== mEndPosition
) {
497 * Skip over chars as long as they equal given char
499 * @update gess 3/25/98
503 nsresult
nsScanner::SkipOver(PRUnichar aSkipChar
){
505 if (!mSlidingBuffer
) {
510 nsresult result
=NS_OK
;
512 while(NS_OK
==result
) {
514 if(NS_OK
== result
) {
527 void DoErrTest(nsString
& aString
) {
528 PRInt32 pos
=aString
.FindChar(0);
530 if(aString
.Length()-1!=pos
) {
535 void DoErrTest(nsCString
& aString
) {
536 PRInt32 pos
=aString
.FindChar(0);
538 if(aString
.Length()-1!=pos
) {
545 * Consume characters until you run into space, a '<', a '>', or a '/'.
547 * @param aString - receives new data from stream
550 nsresult
nsScanner::ReadTagIdentifier(nsScannerSharedSubstring
& aString
) {
552 if (!mSlidingBuffer
) {
557 nsresult result
=Peek(theChar
);
558 nsScannerIterator current
, end
;
559 PRBool found
=PR_FALSE
;
561 current
= mCurrentPosition
;
564 // Loop until we find an illegal character. Everything is then appended
566 while(current
!= end
&& !found
) {
583 ReplaceCharacter(current
, sInvalid
);
595 // Don't bother appending nothing.
596 if (current
!= mCurrentPosition
) {
597 AppendUnicodeTo(mCurrentPosition
, current
, aString
);
600 SetPosition(current
);
601 if (current
== end
) {
605 //DoErrTest(aString);
611 * Consume characters until you run into a char that's not valid in an
614 * @param aString - receives new data from stream
617 nsresult
nsScanner::ReadEntityIdentifier(nsString
& aString
) {
619 if (!mSlidingBuffer
) {
624 nsresult result
=Peek(theChar
);
625 nsScannerIterator origin
, current
, end
;
626 PRBool found
=PR_FALSE
;
628 origin
= mCurrentPosition
;
629 current
= mCurrentPosition
;
632 while(current
!= end
) {
641 // Don't allow ':' in entity names. See bug 23791
645 found
= ('a'<=theChar
&& theChar
<='z') ||
646 ('A'<=theChar
&& theChar
<='Z') ||
647 ('0'<=theChar
&& theChar
<='9');
652 AppendUnicodeTo(mCurrentPosition
, current
, aString
);
659 SetPosition(current
);
660 if (current
== end
) {
661 AppendUnicodeTo(origin
, current
, aString
);
665 //DoErrTest(aString);
673 * @param aString - should contain digits
676 nsresult
nsScanner::ReadNumber(nsString
& aString
,PRInt32 aBase
) {
678 if (!mSlidingBuffer
) {
682 NS_ASSERTION(aBase
== 10 || aBase
== 16,"base value not supported");
685 nsresult result
=Peek(theChar
);
686 nsScannerIterator origin
, current
, end
;
688 origin
= mCurrentPosition
;
692 PRBool done
= PR_FALSE
;
693 while(current
!= end
) {
696 done
= (theChar
< '0' || theChar
> '9') &&
697 ((aBase
== 16)? (theChar
< 'A' || theChar
> 'F') &&
698 (theChar
< 'a' || theChar
> 'f')
701 AppendUnicodeTo(origin
, current
, aString
);
708 SetPosition(current
);
709 if (current
== end
) {
710 AppendUnicodeTo(origin
, current
, aString
);
714 //DoErrTest(aString);
720 * Consume characters until you find the terminal char
722 * @update gess 3/25/98
723 * @param aString receives new data from stream
724 * @param addTerminal tells us whether to append terminal to aString
727 nsresult
nsScanner::ReadWhitespace(nsScannerSharedSubstring
& aString
,
728 PRInt32
& aNewlinesSkipped
,
733 if (!mSlidingBuffer
) {
737 PRUnichar theChar
= 0;
738 nsresult result
= Peek(theChar
);
740 if (NS_FAILED(result
)) {
744 nsScannerIterator origin
, current
, end
;
745 PRBool done
= PR_FALSE
;
747 origin
= mCurrentPosition
;
751 PRBool haveCR
= PR_FALSE
;
753 while(!done
&& current
!= end
) {
759 PRUnichar thePrevChar
= theChar
;
760 theChar
= (++current
!= end
) ? *current
: '\0';
761 if ((thePrevChar
== '\r' && theChar
== '\n') ||
762 (thePrevChar
== '\n' && theChar
== '\r')) {
763 theChar
= (++current
!= end
) ? *current
: '\0'; // CRLF == LFCR => LF
765 } else if (thePrevChar
== '\r') {
766 // Lone CR becomes CRLF; callers should know to remove extra CRs
767 AppendUnicodeTo(origin
, current
, aString
);
768 aString
.writable().Append(PRUnichar('\n'));
776 theChar
= (++current
!= end
) ? *current
: '\0';
780 AppendUnicodeTo(origin
, current
, aString
);
785 SetPosition(current
);
786 if (current
== end
) {
787 AppendUnicodeTo(origin
, current
, aString
);
795 //XXXbz callers of this have to manage their lone '\r' themselves if they want
796 //it to work. Good thing they're all in view-source and it deals.
797 nsresult
nsScanner::ReadWhitespace(nsScannerIterator
& aStart
,
798 nsScannerIterator
& aEnd
,
799 PRInt32
& aNewlinesSkipped
) {
801 if (!mSlidingBuffer
) {
805 PRUnichar theChar
= 0;
806 nsresult result
= Peek(theChar
);
808 if (NS_FAILED(result
)) {
812 nsScannerIterator origin
, current
, end
;
813 PRBool done
= PR_FALSE
;
815 origin
= mCurrentPosition
;
819 while(!done
&& current
!= end
) {
822 case '\r': ++aNewlinesSkipped
;
826 PRUnichar thePrevChar
= theChar
;
827 theChar
= (++current
!= end
) ? *current
: '\0';
828 if ((thePrevChar
== '\r' && theChar
== '\n') ||
829 (thePrevChar
== '\n' && theChar
== '\r')) {
830 theChar
= (++current
!= end
) ? *current
: '\0'; // CRLF == LFCR => LF
842 SetPosition(current
);
843 if (current
== end
) {
853 * Consume characters until you encounter one contained in given
856 * @update gess 3/25/98
857 * @param aString will contain the result of this method
858 * @param aTerminalSet is an ordered string that contains
859 * the set of INVALID characters
862 nsresult
nsScanner::ReadUntil(nsAString
& aString
,
863 const nsReadEndCondition
& aEndCondition
,
866 if (!mSlidingBuffer
) {
870 nsScannerIterator origin
, current
;
871 const PRUnichar
* setstart
= aEndCondition
.mChars
;
872 const PRUnichar
* setcurrent
;
874 origin
= mCurrentPosition
;
878 nsresult result
=Peek(theChar
);
880 if (NS_FAILED(result
)) {
884 while (current
!= mEndPosition
) {
886 if (theChar
== '\0') {
887 ReplaceCharacter(current
, sInvalid
);
891 // Filter out completely wrong characters
892 // Check if all bits are in the required area
893 if(!(theChar
& aEndCondition
.mFilter
)) {
894 // They were. Do a thorough check.
896 setcurrent
= setstart
;
897 while (*setcurrent
) {
898 if (*setcurrent
== theChar
) {
901 AppendUnicodeTo(origin
, current
, aString
);
902 SetPosition(current
);
904 //DoErrTest(aString);
915 // If we are here, we didn't find any terminator in the string and
916 // current = mEndPosition
917 SetPosition(current
);
918 AppendUnicodeTo(origin
, current
, aString
);
922 nsresult
nsScanner::ReadUntil(nsScannerSharedSubstring
& aString
,
923 const nsReadEndCondition
& aEndCondition
,
926 if (!mSlidingBuffer
) {
930 nsScannerIterator origin
, current
;
931 const PRUnichar
* setstart
= aEndCondition
.mChars
;
932 const PRUnichar
* setcurrent
;
934 origin
= mCurrentPosition
;
938 nsresult result
=Peek(theChar
);
940 if (NS_FAILED(result
)) {
944 while (current
!= mEndPosition
) {
946 if (theChar
== '\0') {
947 ReplaceCharacter(current
, sInvalid
);
951 // Filter out completely wrong characters
952 // Check if all bits are in the required area
953 if(!(theChar
& aEndCondition
.mFilter
)) {
954 // They were. Do a thorough check.
956 setcurrent
= setstart
;
957 while (*setcurrent
) {
958 if (*setcurrent
== theChar
) {
961 AppendUnicodeTo(origin
, current
, aString
);
962 SetPosition(current
);
964 //DoErrTest(aString);
975 // If we are here, we didn't find any terminator in the string and
976 // current = mEndPosition
977 SetPosition(current
);
978 AppendUnicodeTo(origin
, current
, aString
);
982 nsresult
nsScanner::ReadUntil(nsScannerIterator
& aStart
,
983 nsScannerIterator
& aEnd
,
984 const nsReadEndCondition
&aEndCondition
,
987 if (!mSlidingBuffer
) {
991 nsScannerIterator origin
, current
;
992 const PRUnichar
* setstart
= aEndCondition
.mChars
;
993 const PRUnichar
* setcurrent
;
995 origin
= mCurrentPosition
;
999 nsresult result
=Peek(theChar
);
1001 if (NS_FAILED(result
)) {
1002 aStart
= aEnd
= current
;
1006 while (current
!= mEndPosition
) {
1007 if (theChar
== '\0') {
1008 ReplaceCharacter(current
, sInvalid
);
1012 // Filter out completely wrong characters
1013 // Check if all bits are in the required area
1014 if(!(theChar
& aEndCondition
.mFilter
)) {
1015 // They were. Do a thorough check.
1016 setcurrent
= setstart
;
1017 while (*setcurrent
) {
1018 if (*setcurrent
== theChar
) {
1023 SetPosition(current
);
1035 // If we are here, we didn't find any terminator in the string and
1036 // current = mEndPosition
1037 SetPosition(current
);
1044 * Consumes chars until you see the given terminalChar
1046 * @update gess 3/25/98
1048 * @return error code
1050 nsresult
nsScanner::ReadUntil(nsAString
& aString
,
1051 PRUnichar aTerminalChar
,
1054 if (!mSlidingBuffer
) {
1058 nsScannerIterator origin
, current
;
1060 origin
= mCurrentPosition
;
1064 nsresult result
= Peek(theChar
);
1066 if (NS_FAILED(result
)) {
1070 while (current
!= mEndPosition
) {
1071 if (theChar
== '\0') {
1072 ReplaceCharacter(current
, sInvalid
);
1076 if (aTerminalChar
== theChar
) {
1079 AppendUnicodeTo(origin
, current
, aString
);
1080 SetPosition(current
);
1087 // If we are here, we didn't find any terminator in the string and
1088 // current = mEndPosition
1089 AppendUnicodeTo(origin
, current
, aString
);
1090 SetPosition(current
);
1095 void nsScanner::BindSubstring(nsScannerSubstring
& aSubstring
, const nsScannerIterator
& aStart
, const nsScannerIterator
& aEnd
)
1097 aSubstring
.Rebind(*mSlidingBuffer
, aStart
, aEnd
);
1100 void nsScanner::CurrentPosition(nsScannerIterator
& aPosition
)
1102 aPosition
= mCurrentPosition
;
1105 void nsScanner::EndReading(nsScannerIterator
& aPosition
)
1107 aPosition
= mEndPosition
;
1110 void nsScanner::SetPosition(nsScannerIterator
& aPosition
, PRBool aTerminate
, PRBool aReverse
)
1112 if (mSlidingBuffer
) {
1114 PRUint32 origRemaining
= mCountRemaining
;
1118 mCountRemaining
+= (Distance(aPosition
, mCurrentPosition
));
1121 mCountRemaining
-= (Distance(mCurrentPosition
, aPosition
));
1124 NS_ASSERTION((mCountRemaining
>= origRemaining
&& aReverse
) ||
1125 (mCountRemaining
<= origRemaining
&& !aReverse
),
1126 "Improper use of nsScanner::SetPosition. Make sure to set the"
1127 " aReverse parameter correctly");
1129 mCurrentPosition
= aPosition
;
1130 if (aTerminate
&& (mCurrentPosition
== mEndPosition
)) {
1131 mMarkPosition
= mCurrentPosition
;
1132 mSlidingBuffer
->DiscardPrefix(mCurrentPosition
);
1137 void nsScanner::ReplaceCharacter(nsScannerIterator
& aPosition
,
1140 if (mSlidingBuffer
) {
1141 mSlidingBuffer
->ReplaceCharacter(aPosition
, aChar
);
1145 PRBool
nsScanner::AppendToBuffer(nsScannerString::Buffer
* aBuf
,
1146 nsIRequest
*aRequest
)
1148 if (nsParser::sParserDataListeners
&& mParser
&&
1149 NS_FAILED(mParser
->DataAdded(Substring(aBuf
->DataStart(),
1150 aBuf
->DataEnd()), aRequest
))) {
1151 // Don't actually append on failure.
1153 return mSlidingBuffer
!= nsnull
;
1156 if (!mSlidingBuffer
) {
1157 mSlidingBuffer
= new nsScannerString(aBuf
);
1158 if (!mSlidingBuffer
)
1160 mSlidingBuffer
->BeginReading(mCurrentPosition
);
1161 mMarkPosition
= mCurrentPosition
;
1162 mSlidingBuffer
->EndReading(mEndPosition
);
1163 mCountRemaining
= aBuf
->DataLength();
1166 mSlidingBuffer
->AppendBuffer(aBuf
);
1167 if (mCurrentPosition
== mEndPosition
) {
1168 mSlidingBuffer
->BeginReading(mCurrentPosition
);
1170 mSlidingBuffer
->EndReading(mEndPosition
);
1171 mCountRemaining
+= aBuf
->DataLength();
1174 if (mFirstNonWhitespacePosition
== -1) {
1175 nsScannerIterator
iter(mCurrentPosition
);
1176 nsScannerIterator
end(mEndPosition
);
1178 while (iter
!= end
) {
1179 if (!nsCRT::IsAsciiSpace(*iter
)) {
1180 mFirstNonWhitespacePosition
= Distance(mCurrentPosition
, iter
);
1192 * call this to copy bytes out of the scanner that have not yet been consumed
1193 * by the tokenization process.
1195 * @update gess 5/12/98
1196 * @param aCopyBuffer is where the scanner buffer will be copied to
1199 void nsScanner::CopyUnusedData(nsString
& aCopyBuffer
) {
1200 if (!mSlidingBuffer
) {
1201 aCopyBuffer
.Truncate();
1205 nsScannerIterator start
, end
;
1206 start
= mCurrentPosition
;
1209 CopyUnicodeTo(start
, end
, aCopyBuffer
);
1213 * Retrieve the name of the file that the scanner is reading from.
1214 * In some cases, it's just a given name, because the scanner isn't
1215 * really reading from a file.
1217 * @update gess 5/12/98
1220 nsString
& nsScanner::GetFilename(void) {
1225 * Conduct self test. Actually, selftesting for this class
1226 * occurs in the parser selftest.
1228 * @update gess 3/25/98
1233 void nsScanner::SelfTest(void) {