Bug 470455 - test_database_sync_embed_visits.js leaks, r=sdwilsh
[wine-gecko.git] / parser / htmlparser / src / nsScanner.cpp
blobf874adef9e562520196ee2355fccae8ea2af2e1a
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=2 sw=2 et tw=78: */
3 /* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
16 * The Original Code is mozilla.org code.
18 * The Initial Developer of the Original Code is
19 * Netscape Communications Corporation.
20 * Portions created by the Initial Developer are Copyright (C) 1998
21 * the Initial Developer. All Rights Reserved.
23 * Contributor(s):
25 * Alternatively, the contents of this file may be used under the terms of
26 * either of the GNU General Public License Version 2 or later (the "GPL"),
27 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
39 //#define __INCREMENTAL 1
41 #include "nsScanner.h"
42 #include "nsDebug.h"
43 #include "nsIServiceManager.h"
44 #include "nsICharsetConverterManager.h"
45 #include "nsICharsetAlias.h"
46 #include "nsReadableUtils.h"
47 #include "nsIInputStream.h"
48 #include "nsILocalFile.h"
49 #include "nsNetUtil.h"
50 #include "nsUTF8Utils.h" // for LossyConvertEncoding
51 #include "nsCRT.h"
52 #include "nsParser.h"
54 // We replace NUL characters with this character.
55 static PRUnichar sInvalid = UCS2_REPLACEMENT_CHAR;
57 nsReadEndCondition::nsReadEndCondition(const PRUnichar* aTerminateChars) :
58 mChars(aTerminateChars), mFilter(PRUnichar(~0)) // All bits set
60 // Build filter that will be used to filter out characters with
61 // bits that none of the terminal chars have. This works very well
62 // because terminal chars often have only the last 4-6 bits set and
63 // normal ascii letters have bit 7 set. Other letters have even higher
64 // bits set.
66 // Calculate filter
67 const PRUnichar *current = aTerminateChars;
68 PRUnichar terminalChar = *current;
69 while (terminalChar) {
70 mFilter &= ~terminalChar;
71 ++current;
72 terminalChar = *current;
76 #ifdef __INCREMENTAL
77 const int kBufsize=1;
78 #else
79 const int kBufsize=64;
80 #endif
82 /**
83 * Use this constructor if you want i/o to be based on
84 * a single string you hand in during construction.
85 * This short cut was added for Javascript.
87 * @update gess 5/12/98
88 * @param aMode represents the parser mode (nav, other)
89 * @return
91 nsScanner::nsScanner(const nsAString& anHTMLString, const nsACString& aCharset,
92 PRInt32 aSource)
93 : mParser(nsnull)
95 MOZ_COUNT_CTOR(nsScanner);
97 mSlidingBuffer = nsnull;
98 mCountRemaining = 0;
99 mFirstNonWhitespacePosition = -1;
100 if (AppendToBuffer(anHTMLString)) {
101 mSlidingBuffer->BeginReading(mCurrentPosition);
102 } else {
103 /* XXX see hack below, re: bug 182067 */
104 memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
105 mEndPosition = mCurrentPosition;
107 mMarkPosition = mCurrentPosition;
108 mIncremental = PR_FALSE;
109 mUnicodeDecoder = 0;
110 mCharsetSource = kCharsetUninitialized;
114 * Use this constructor if you want i/o to be based on strings
115 * the scanner receives. If you pass a null filename, you
116 * can still provide data to the scanner via append.
118 * @update gess 5/12/98
119 * @param aFilename --
120 * @return
122 nsScanner::nsScanner(nsString& aFilename,PRBool aCreateStream,
123 const nsACString& aCharset, PRInt32 aSource)
124 : mFilename(aFilename), mParser(nsnull)
126 MOZ_COUNT_CTOR(nsScanner);
127 NS_ASSERTION(!aCreateStream, "This is always true.");
129 mSlidingBuffer = nsnull;
131 // XXX This is a big hack. We need to initialize the iterators to something.
132 // What matters is that mCurrentPosition == mEndPosition, so that our methods
133 // believe that we are at EOF (see bug 182067). We null out mCurrentPosition
134 // so that we have some hope of catching null pointer dereferences associated
135 // with this hack. --darin
136 memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
137 mMarkPosition = mCurrentPosition;
138 mEndPosition = mCurrentPosition;
140 mIncremental = PR_TRUE;
141 mFirstNonWhitespacePosition = -1;
142 mCountRemaining = 0;
144 mUnicodeDecoder = 0;
145 mCharsetSource = kCharsetUninitialized;
146 SetDocumentCharset(aCharset, aSource);
149 nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , PRInt32 aSource)
151 if (aSource < mCharsetSource) // priority is lower the the current one , just
152 return NS_OK;
154 nsICharsetAlias* calias = nsParser::GetCharsetAliasService();
155 NS_ASSERTION(calias, "Must have the charset alias service!");
157 nsresult res = NS_OK;
158 if (!mCharset.IsEmpty())
160 PRBool same;
161 res = calias->Equals(aCharset, mCharset, &same);
162 if(NS_SUCCEEDED(res) && same)
164 return NS_OK; // no difference, don't change it
168 // different, need to change it
169 nsCString charsetName;
170 res = calias->GetPreferred(aCharset, charsetName);
172 if(NS_FAILED(res) && (mCharsetSource == kCharsetUninitialized))
174 // failed - unknown alias , fallback to ISO-8859-1
175 mCharset.AssignLiteral("ISO-8859-1");
177 else
179 mCharset.Assign(charsetName);
182 mCharsetSource = aSource;
184 NS_ASSERTION(nsParser::GetCharsetConverterManager(),
185 "Must have the charset converter manager!");
187 return nsParser::GetCharsetConverterManager()->
188 GetUnicodeDecoderRaw(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
193 * default destructor
195 * @update gess 3/25/98
196 * @param
197 * @return
199 nsScanner::~nsScanner() {
201 if (mSlidingBuffer) {
202 delete mSlidingBuffer;
205 MOZ_COUNT_DTOR(nsScanner);
209 * Resets current offset position of input stream to marked position.
210 * This allows us to back up to this point if the need should arise,
211 * such as when tokenization gets interrupted.
212 * NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
214 * @update gess 5/12/98
215 * @param
216 * @return
218 void nsScanner::RewindToMark(void){
219 if (mSlidingBuffer) {
220 mCountRemaining += (Distance(mMarkPosition, mCurrentPosition));
221 mCurrentPosition = mMarkPosition;
227 * Records current offset position in input stream. This allows us
228 * to back up to this point if the need should arise, such as when
229 * tokenization gets interrupted.
231 * @update gess 7/29/98
232 * @param
233 * @return
235 PRInt32 nsScanner::Mark() {
236 PRInt32 distance = 0;
237 if (mSlidingBuffer) {
238 nsScannerIterator oldStart;
239 mSlidingBuffer->BeginReading(oldStart);
241 distance = Distance(oldStart, mCurrentPosition);
243 mSlidingBuffer->DiscardPrefix(mCurrentPosition);
244 mSlidingBuffer->BeginReading(mCurrentPosition);
245 mMarkPosition = mCurrentPosition;
248 return distance;
251 /**
252 * Insert data to our underlying input buffer as
253 * if it were read from an input stream.
255 * @update harishd 01/12/99
256 * @return error code
258 PRBool nsScanner::UngetReadable(const nsAString& aBuffer) {
259 if (!mSlidingBuffer) {
260 return PR_FALSE;
263 mSlidingBuffer->UngetReadable(aBuffer,mCurrentPosition);
264 mSlidingBuffer->BeginReading(mCurrentPosition); // Insertion invalidated our iterators
265 mSlidingBuffer->EndReading(mEndPosition);
267 PRUint32 length = aBuffer.Length();
268 mCountRemaining += length; // Ref. bug 117441
269 return PR_TRUE;
272 /**
273 * Append data to our underlying input buffer as
274 * if it were read from an input stream.
276 * @update gess4/3/98
277 * @return error code
279 nsresult nsScanner::Append(const nsAString& aBuffer) {
280 if (!AppendToBuffer(aBuffer))
281 return NS_ERROR_OUT_OF_MEMORY;
282 return NS_OK;
288 * @update gess 5/21/98
289 * @param
290 * @return
292 nsresult nsScanner::Append(const char* aBuffer, PRUint32 aLen,
293 nsIRequest *aRequest)
295 nsresult res=NS_OK;
296 PRUnichar *unichars, *start;
297 if (mUnicodeDecoder) {
298 PRInt32 unicharBufLen = 0;
299 mUnicodeDecoder->GetMaxLength(aBuffer, aLen, &unicharBufLen);
300 nsScannerString::Buffer* buffer = nsScannerString::AllocBuffer(unicharBufLen + 1);
301 NS_ENSURE_TRUE(buffer,NS_ERROR_OUT_OF_MEMORY);
302 start = unichars = buffer->DataStart();
304 PRInt32 totalChars = 0;
305 PRInt32 unicharLength = unicharBufLen;
306 do {
307 PRInt32 srcLength = aLen;
308 res = mUnicodeDecoder->Convert(aBuffer, &srcLength, unichars, &unicharLength);
310 totalChars += unicharLength;
311 // Continuation of failure case
312 if(NS_FAILED(res)) {
313 // if we failed, we consume one byte, replace it with U+FFFD
314 // and try the conversion again.
316 // This is only needed because some decoders don't follow the
317 // nsIUnicodeDecoder contract: they return a failure when *aDestLength
318 // is 0 rather than the correct NS_OK_UDEC_MOREOUTPUT. See bug 244177
319 if ((unichars + unicharLength) >= buffer->DataEnd()) {
320 NS_ERROR("Unexpected end of destination buffer");
321 break;
324 unichars[unicharLength++] = (PRUnichar)0xFFFD;
325 unichars = unichars + unicharLength;
326 unicharLength = unicharBufLen - (++totalChars);
328 mUnicodeDecoder->Reset();
330 if(((PRUint32) (srcLength + 1)) > aLen) {
331 srcLength = aLen;
333 else {
334 ++srcLength;
337 aBuffer += srcLength;
338 aLen -= srcLength;
340 } while (NS_FAILED(res) && (aLen > 0));
342 buffer->SetDataLength(totalChars);
343 // Don't propagate return code of unicode decoder
344 // since it doesn't reflect on our success or failure
345 // - Ref. bug 87110
346 res = NS_OK;
347 if (!AppendToBuffer(buffer, aRequest))
348 res = NS_ERROR_OUT_OF_MEMORY;
350 else {
351 NS_WARNING("No decoder found.");
352 res = NS_ERROR_FAILURE;
355 return res;
359 * retrieve next char from scanners internal input stream
361 * @update gess 3/25/98
362 * @param
363 * @return error code reflecting read status
365 nsresult nsScanner::GetChar(PRUnichar& aChar) {
366 if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
367 aChar = 0;
368 return kEOF;
371 aChar = *mCurrentPosition++;
372 --mCountRemaining;
374 return NS_OK;
379 * peek ahead to consume next char from scanner's internal
380 * input buffer
382 * @update gess 3/25/98
383 * @param
384 * @return
386 nsresult nsScanner::Peek(PRUnichar& aChar, PRUint32 aOffset) {
387 aChar = 0;
389 if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
390 return kEOF;
393 if (aOffset > 0) {
394 if (mCountRemaining <= aOffset)
395 return kEOF;
397 nsScannerIterator pos = mCurrentPosition;
398 pos.advance(aOffset);
399 aChar=*pos;
401 else {
402 aChar=*mCurrentPosition;
405 return NS_OK;
408 nsresult nsScanner::Peek(nsAString& aStr, PRInt32 aNumChars, PRInt32 aOffset)
410 if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
411 return kEOF;
414 nsScannerIterator start, end;
416 start = mCurrentPosition;
418 if ((PRInt32)mCountRemaining <= aOffset) {
419 return kEOF;
422 if (aOffset > 0) {
423 start.advance(aOffset);
426 if (mCountRemaining < PRUint32(aNumChars + aOffset)) {
427 end = mEndPosition;
429 else {
430 end = start;
431 end.advance(aNumChars);
434 CopyUnicodeTo(start, end, aStr);
436 return NS_OK;
441 * Skip whitespace on scanner input stream
443 * @update gess 3/25/98
444 * @param
445 * @return error status
447 nsresult nsScanner::SkipWhitespace(PRInt32& aNewlinesSkipped) {
449 if (!mSlidingBuffer) {
450 return kEOF;
453 PRUnichar theChar = 0;
454 nsresult result = Peek(theChar);
456 if (NS_FAILED(result)) {
457 return result;
460 nsScannerIterator current = mCurrentPosition;
461 PRBool done = PR_FALSE;
462 PRBool skipped = PR_FALSE;
464 while (!done && current != mEndPosition) {
465 switch(theChar) {
466 case '\n':
467 case '\r': ++aNewlinesSkipped;
468 case ' ' :
469 case '\t':
471 skipped = PR_TRUE;
472 PRUnichar thePrevChar = theChar;
473 theChar = (++current != mEndPosition) ? *current : '\0';
474 if ((thePrevChar == '\r' && theChar == '\n') ||
475 (thePrevChar == '\n' && theChar == '\r')) {
476 theChar = (++current != mEndPosition) ? *current : '\0'; // CRLF == LFCR => LF
479 break;
480 default:
481 done = PR_TRUE;
482 break;
486 if (skipped) {
487 SetPosition(current);
488 if (current == mEndPosition) {
489 result = kEOF;
493 return result;
497 * Skip over chars as long as they equal given char
499 * @update gess 3/25/98
500 * @param
501 * @return error code
503 nsresult nsScanner::SkipOver(PRUnichar aSkipChar){
505 if (!mSlidingBuffer) {
506 return kEOF;
509 PRUnichar ch=0;
510 nsresult result=NS_OK;
512 while(NS_OK==result) {
513 result=Peek(ch);
514 if(NS_OK == result) {
515 if(ch!=aSkipChar) {
516 break;
518 GetChar(ch);
520 else break;
521 } //while
522 return result;
526 #if 0
527 void DoErrTest(nsString& aString) {
528 PRInt32 pos=aString.FindChar(0);
529 if(kNotFound<pos) {
530 if(aString.Length()-1!=pos) {
535 void DoErrTest(nsCString& aString) {
536 PRInt32 pos=aString.FindChar(0);
537 if(kNotFound<pos) {
538 if(aString.Length()-1!=pos) {
542 #endif
545 * Consume characters until you run into space, a '<', a '>', or a '/'.
547 * @param aString - receives new data from stream
548 * @return error code
550 nsresult nsScanner::ReadTagIdentifier(nsScannerSharedSubstring& aString) {
552 if (!mSlidingBuffer) {
553 return kEOF;
556 PRUnichar theChar=0;
557 nsresult result=Peek(theChar);
558 nsScannerIterator current, end;
559 PRBool found=PR_FALSE;
561 current = mCurrentPosition;
562 end = mEndPosition;
564 // Loop until we find an illegal character. Everything is then appended
565 // later.
566 while(current != end && !found) {
567 theChar=*current;
569 switch(theChar) {
570 case '\n':
571 case '\r':
572 case ' ' :
573 case '\t':
574 case '\v':
575 case '\f':
576 case '<':
577 case '>':
578 case '/':
579 found = PR_TRUE;
580 break;
582 case '\0':
583 ReplaceCharacter(current, sInvalid);
584 break;
586 default:
587 break;
590 if (!found) {
591 ++current;
595 // Don't bother appending nothing.
596 if (current != mCurrentPosition) {
597 AppendUnicodeTo(mCurrentPosition, current, aString);
600 SetPosition(current);
601 if (current == end) {
602 result = kEOF;
605 //DoErrTest(aString);
607 return result;
611 * Consume characters until you run into a char that's not valid in an
612 * entity name
614 * @param aString - receives new data from stream
615 * @return error code
617 nsresult nsScanner::ReadEntityIdentifier(nsString& aString) {
619 if (!mSlidingBuffer) {
620 return kEOF;
623 PRUnichar theChar=0;
624 nsresult result=Peek(theChar);
625 nsScannerIterator origin, current, end;
626 PRBool found=PR_FALSE;
628 origin = mCurrentPosition;
629 current = mCurrentPosition;
630 end = mEndPosition;
632 while(current != end) {
634 theChar=*current;
635 if(theChar) {
636 found=PR_FALSE;
637 switch(theChar) {
638 case '_':
639 case '-':
640 case '.':
641 // Don't allow ':' in entity names. See bug 23791
642 found = PR_TRUE;
643 break;
644 default:
645 found = ('a'<=theChar && theChar<='z') ||
646 ('A'<=theChar && theChar<='Z') ||
647 ('0'<=theChar && theChar<='9');
648 break;
651 if(!found) {
652 AppendUnicodeTo(mCurrentPosition, current, aString);
653 break;
656 ++current;
659 SetPosition(current);
660 if (current == end) {
661 AppendUnicodeTo(origin, current, aString);
662 return kEOF;
665 //DoErrTest(aString);
667 return result;
671 * Consume digits
673 * @param aString - should contain digits
674 * @return error code
676 nsresult nsScanner::ReadNumber(nsString& aString,PRInt32 aBase) {
678 if (!mSlidingBuffer) {
679 return kEOF;
682 NS_ASSERTION(aBase == 10 || aBase == 16,"base value not supported");
684 PRUnichar theChar=0;
685 nsresult result=Peek(theChar);
686 nsScannerIterator origin, current, end;
688 origin = mCurrentPosition;
689 current = origin;
690 end = mEndPosition;
692 PRBool done = PR_FALSE;
693 while(current != end) {
694 theChar=*current;
695 if(theChar) {
696 done = (theChar < '0' || theChar > '9') &&
697 ((aBase == 16)? (theChar < 'A' || theChar > 'F') &&
698 (theChar < 'a' || theChar > 'f')
699 :PR_TRUE);
700 if(done) {
701 AppendUnicodeTo(origin, current, aString);
702 break;
705 ++current;
708 SetPosition(current);
709 if (current == end) {
710 AppendUnicodeTo(origin, current, aString);
711 return kEOF;
714 //DoErrTest(aString);
716 return result;
720 * Consume characters until you find the terminal char
722 * @update gess 3/25/98
723 * @param aString receives new data from stream
724 * @param addTerminal tells us whether to append terminal to aString
725 * @return error code
727 nsresult nsScanner::ReadWhitespace(nsScannerSharedSubstring& aString,
728 PRInt32& aNewlinesSkipped,
729 PRBool& aHaveCR) {
731 aHaveCR = PR_FALSE;
733 if (!mSlidingBuffer) {
734 return kEOF;
737 PRUnichar theChar = 0;
738 nsresult result = Peek(theChar);
740 if (NS_FAILED(result)) {
741 return result;
744 nsScannerIterator origin, current, end;
745 PRBool done = PR_FALSE;
747 origin = mCurrentPosition;
748 current = origin;
749 end = mEndPosition;
751 PRBool haveCR = PR_FALSE;
753 while(!done && current != end) {
754 switch(theChar) {
755 case '\n':
756 case '\r':
758 ++aNewlinesSkipped;
759 PRUnichar thePrevChar = theChar;
760 theChar = (++current != end) ? *current : '\0';
761 if ((thePrevChar == '\r' && theChar == '\n') ||
762 (thePrevChar == '\n' && theChar == '\r')) {
763 theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF
764 haveCR = PR_TRUE;
765 } else if (thePrevChar == '\r') {
766 // Lone CR becomes CRLF; callers should know to remove extra CRs
767 AppendUnicodeTo(origin, current, aString);
768 aString.writable().Append(PRUnichar('\n'));
769 origin = current;
770 haveCR = PR_TRUE;
773 break;
774 case ' ' :
775 case '\t':
776 theChar = (++current != end) ? *current : '\0';
777 break;
778 default:
779 done = PR_TRUE;
780 AppendUnicodeTo(origin, current, aString);
781 break;
785 SetPosition(current);
786 if (current == end) {
787 AppendUnicodeTo(origin, current, aString);
788 result = kEOF;
791 aHaveCR = haveCR;
792 return result;
795 //XXXbz callers of this have to manage their lone '\r' themselves if they want
796 //it to work. Good thing they're all in view-source and it deals.
797 nsresult nsScanner::ReadWhitespace(nsScannerIterator& aStart,
798 nsScannerIterator& aEnd,
799 PRInt32& aNewlinesSkipped) {
801 if (!mSlidingBuffer) {
802 return kEOF;
805 PRUnichar theChar = 0;
806 nsresult result = Peek(theChar);
808 if (NS_FAILED(result)) {
809 return result;
812 nsScannerIterator origin, current, end;
813 PRBool done = PR_FALSE;
815 origin = mCurrentPosition;
816 current = origin;
817 end = mEndPosition;
819 while(!done && current != end) {
820 switch(theChar) {
821 case '\n':
822 case '\r': ++aNewlinesSkipped;
823 case ' ' :
824 case '\t':
826 PRUnichar thePrevChar = theChar;
827 theChar = (++current != end) ? *current : '\0';
828 if ((thePrevChar == '\r' && theChar == '\n') ||
829 (thePrevChar == '\n' && theChar == '\r')) {
830 theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF
833 break;
834 default:
835 done = PR_TRUE;
836 aStart = origin;
837 aEnd = current;
838 break;
842 SetPosition(current);
843 if (current == end) {
844 aStart = origin;
845 aEnd = current;
846 result = kEOF;
849 return result;
853 * Consume characters until you encounter one contained in given
854 * input set.
856 * @update gess 3/25/98
857 * @param aString will contain the result of this method
858 * @param aTerminalSet is an ordered string that contains
859 * the set of INVALID characters
860 * @return error code
862 nsresult nsScanner::ReadUntil(nsAString& aString,
863 const nsReadEndCondition& aEndCondition,
864 PRBool addTerminal)
866 if (!mSlidingBuffer) {
867 return kEOF;
870 nsScannerIterator origin, current;
871 const PRUnichar* setstart = aEndCondition.mChars;
872 const PRUnichar* setcurrent;
874 origin = mCurrentPosition;
875 current = origin;
877 PRUnichar theChar=0;
878 nsresult result=Peek(theChar);
880 if (NS_FAILED(result)) {
881 return result;
884 while (current != mEndPosition) {
885 theChar = *current;
886 if (theChar == '\0') {
887 ReplaceCharacter(current, sInvalid);
888 theChar = sInvalid;
891 // Filter out completely wrong characters
892 // Check if all bits are in the required area
893 if(!(theChar & aEndCondition.mFilter)) {
894 // They were. Do a thorough check.
896 setcurrent = setstart;
897 while (*setcurrent) {
898 if (*setcurrent == theChar) {
899 if(addTerminal)
900 ++current;
901 AppendUnicodeTo(origin, current, aString);
902 SetPosition(current);
904 //DoErrTest(aString);
906 return NS_OK;
908 ++setcurrent;
912 ++current;
915 // If we are here, we didn't find any terminator in the string and
916 // current = mEndPosition
917 SetPosition(current);
918 AppendUnicodeTo(origin, current, aString);
919 return kEOF;
922 nsresult nsScanner::ReadUntil(nsScannerSharedSubstring& aString,
923 const nsReadEndCondition& aEndCondition,
924 PRBool addTerminal)
926 if (!mSlidingBuffer) {
927 return kEOF;
930 nsScannerIterator origin, current;
931 const PRUnichar* setstart = aEndCondition.mChars;
932 const PRUnichar* setcurrent;
934 origin = mCurrentPosition;
935 current = origin;
937 PRUnichar theChar=0;
938 nsresult result=Peek(theChar);
940 if (NS_FAILED(result)) {
941 return result;
944 while (current != mEndPosition) {
945 theChar = *current;
946 if (theChar == '\0') {
947 ReplaceCharacter(current, sInvalid);
948 theChar = sInvalid;
951 // Filter out completely wrong characters
952 // Check if all bits are in the required area
953 if(!(theChar & aEndCondition.mFilter)) {
954 // They were. Do a thorough check.
956 setcurrent = setstart;
957 while (*setcurrent) {
958 if (*setcurrent == theChar) {
959 if(addTerminal)
960 ++current;
961 AppendUnicodeTo(origin, current, aString);
962 SetPosition(current);
964 //DoErrTest(aString);
966 return NS_OK;
968 ++setcurrent;
972 ++current;
975 // If we are here, we didn't find any terminator in the string and
976 // current = mEndPosition
977 SetPosition(current);
978 AppendUnicodeTo(origin, current, aString);
979 return kEOF;
982 nsresult nsScanner::ReadUntil(nsScannerIterator& aStart,
983 nsScannerIterator& aEnd,
984 const nsReadEndCondition &aEndCondition,
985 PRBool addTerminal)
987 if (!mSlidingBuffer) {
988 return kEOF;
991 nsScannerIterator origin, current;
992 const PRUnichar* setstart = aEndCondition.mChars;
993 const PRUnichar* setcurrent;
995 origin = mCurrentPosition;
996 current = origin;
998 PRUnichar theChar=0;
999 nsresult result=Peek(theChar);
1001 if (NS_FAILED(result)) {
1002 aStart = aEnd = current;
1003 return result;
1006 while (current != mEndPosition) {
1007 if (theChar == '\0') {
1008 ReplaceCharacter(current, sInvalid);
1009 theChar = sInvalid;
1012 // Filter out completely wrong characters
1013 // Check if all bits are in the required area
1014 if(!(theChar & aEndCondition.mFilter)) {
1015 // They were. Do a thorough check.
1016 setcurrent = setstart;
1017 while (*setcurrent) {
1018 if (*setcurrent == theChar) {
1019 if(addTerminal)
1020 ++current;
1021 aStart = origin;
1022 aEnd = current;
1023 SetPosition(current);
1025 return NS_OK;
1027 ++setcurrent;
1031 ++current;
1032 theChar = *current;
1035 // If we are here, we didn't find any terminator in the string and
1036 // current = mEndPosition
1037 SetPosition(current);
1038 aStart = origin;
1039 aEnd = current;
1040 return kEOF;
1044 * Consumes chars until you see the given terminalChar
1046 * @update gess 3/25/98
1047 * @param
1048 * @return error code
1050 nsresult nsScanner::ReadUntil(nsAString& aString,
1051 PRUnichar aTerminalChar,
1052 PRBool addTerminal)
1054 if (!mSlidingBuffer) {
1055 return kEOF;
1058 nsScannerIterator origin, current;
1060 origin = mCurrentPosition;
1061 current = origin;
1063 PRUnichar theChar;
1064 nsresult result = Peek(theChar);
1066 if (NS_FAILED(result)) {
1067 return result;
1070 while (current != mEndPosition) {
1071 if (theChar == '\0') {
1072 ReplaceCharacter(current, sInvalid);
1073 theChar = sInvalid;
1076 if (aTerminalChar == theChar) {
1077 if(addTerminal)
1078 ++current;
1079 AppendUnicodeTo(origin, current, aString);
1080 SetPosition(current);
1081 return NS_OK;
1083 ++current;
1084 theChar = *current;
1087 // If we are here, we didn't find any terminator in the string and
1088 // current = mEndPosition
1089 AppendUnicodeTo(origin, current, aString);
1090 SetPosition(current);
1091 return kEOF;
1095 void nsScanner::BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd)
1097 aSubstring.Rebind(*mSlidingBuffer, aStart, aEnd);
1100 void nsScanner::CurrentPosition(nsScannerIterator& aPosition)
1102 aPosition = mCurrentPosition;
1105 void nsScanner::EndReading(nsScannerIterator& aPosition)
1107 aPosition = mEndPosition;
1110 void nsScanner::SetPosition(nsScannerIterator& aPosition, PRBool aTerminate, PRBool aReverse)
1112 if (mSlidingBuffer) {
1113 #ifdef DEBUG
1114 PRUint32 origRemaining = mCountRemaining;
1115 #endif
1117 if (aReverse) {
1118 mCountRemaining += (Distance(aPosition, mCurrentPosition));
1120 else {
1121 mCountRemaining -= (Distance(mCurrentPosition, aPosition));
1124 NS_ASSERTION((mCountRemaining >= origRemaining && aReverse) ||
1125 (mCountRemaining <= origRemaining && !aReverse),
1126 "Improper use of nsScanner::SetPosition. Make sure to set the"
1127 " aReverse parameter correctly");
1129 mCurrentPosition = aPosition;
1130 if (aTerminate && (mCurrentPosition == mEndPosition)) {
1131 mMarkPosition = mCurrentPosition;
1132 mSlidingBuffer->DiscardPrefix(mCurrentPosition);
1137 void nsScanner::ReplaceCharacter(nsScannerIterator& aPosition,
1138 PRUnichar aChar)
1140 if (mSlidingBuffer) {
1141 mSlidingBuffer->ReplaceCharacter(aPosition, aChar);
1145 PRBool nsScanner::AppendToBuffer(nsScannerString::Buffer* aBuf,
1146 nsIRequest *aRequest)
1148 if (nsParser::sParserDataListeners && mParser &&
1149 NS_FAILED(mParser->DataAdded(Substring(aBuf->DataStart(),
1150 aBuf->DataEnd()), aRequest))) {
1151 // Don't actually append on failure.
1153 return mSlidingBuffer != nsnull;
1156 if (!mSlidingBuffer) {
1157 mSlidingBuffer = new nsScannerString(aBuf);
1158 if (!mSlidingBuffer)
1159 return PR_FALSE;
1160 mSlidingBuffer->BeginReading(mCurrentPosition);
1161 mMarkPosition = mCurrentPosition;
1162 mSlidingBuffer->EndReading(mEndPosition);
1163 mCountRemaining = aBuf->DataLength();
1165 else {
1166 mSlidingBuffer->AppendBuffer(aBuf);
1167 if (mCurrentPosition == mEndPosition) {
1168 mSlidingBuffer->BeginReading(mCurrentPosition);
1170 mSlidingBuffer->EndReading(mEndPosition);
1171 mCountRemaining += aBuf->DataLength();
1174 if (mFirstNonWhitespacePosition == -1) {
1175 nsScannerIterator iter(mCurrentPosition);
1176 nsScannerIterator end(mEndPosition);
1178 while (iter != end) {
1179 if (!nsCRT::IsAsciiSpace(*iter)) {
1180 mFirstNonWhitespacePosition = Distance(mCurrentPosition, iter);
1182 break;
1185 ++iter;
1188 return PR_TRUE;
1192 * call this to copy bytes out of the scanner that have not yet been consumed
1193 * by the tokenization process.
1195 * @update gess 5/12/98
1196 * @param aCopyBuffer is where the scanner buffer will be copied to
1197 * @return nada
1199 void nsScanner::CopyUnusedData(nsString& aCopyBuffer) {
1200 if (!mSlidingBuffer) {
1201 aCopyBuffer.Truncate();
1202 return;
1205 nsScannerIterator start, end;
1206 start = mCurrentPosition;
1207 end = mEndPosition;
1209 CopyUnicodeTo(start, end, aCopyBuffer);
1213 * Retrieve the name of the file that the scanner is reading from.
1214 * In some cases, it's just a given name, because the scanner isn't
1215 * really reading from a file.
1217 * @update gess 5/12/98
1218 * @return
1220 nsString& nsScanner::GetFilename(void) {
1221 return mFilename;
1225 * Conduct self test. Actually, selftesting for this class
1226 * occurs in the parser selftest.
1228 * @update gess 3/25/98
1229 * @param
1230 * @return
1233 void nsScanner::SelfTest(void) {
1234 #ifdef _DEBUG
1235 #endif