1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
24 * Alternatively, the contents of this file may be used under the terms of
25 * either of the GNU General Public License Version 2 or later (the "GPL"),
26 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
43 * The scanner is a low-level service class that knows
44 * how to consume characters out of an (internal) stream.
45 * This class also offers a series of utility methods
46 * that most tokenizers want, such as readUntil()
47 * and SkipWhitespace().
56 #include "nsIParser.h"
58 #include "nsIUnicodeDecoder.h"
59 #include "nsScannerString.h"
63 class nsReadEndCondition
{
65 const PRUnichar
*mChars
;
67 explicit nsReadEndCondition(const PRUnichar
* aTerminateChars
);
69 nsReadEndCondition(const nsReadEndCondition
& aOther
); // No copying
70 void operator=(const nsReadEndCondition
& aOther
); // No assigning
77 * Use this constructor if you want i/o to be based on
78 * a single string you hand in during construction.
79 * This short cut was added for Javascript.
81 * @update ftang 3/02/99
82 * @param aCharset charset
83 * @param aCharsetSource - where the charset info came from
84 * @param aMode represents the parser mode (nav, other)
87 nsScanner(const nsAString
& anHTMLString
, const nsACString
& aCharset
, PRInt32 aSource
);
90 * Use this constructor if you want i/o to be based on
91 * a file (therefore a stream) or just data you provide via Append().
93 * @update ftang 3/02/99
94 * @param aCharset charset
95 * @param aCharsetSource - where the charset info came from
96 * @param aMode represents the parser mode (nav, other)
99 nsScanner(nsString
& aFilename
,PRBool aCreateStream
, const nsACString
& aCharset
, PRInt32 aSource
);
104 * retrieve next char from internal input stream
106 * @update gess 3/25/98
107 * @param ch is the char to accept new value
108 * @return error code reflecting read status
110 nsresult
GetChar(PRUnichar
& ch
);
113 * peek ahead to consume next char from scanner's internal
116 * @update gess 3/25/98
117 * @param ch is the char to accept new value
118 * @return error code reflecting read status
120 nsresult
Peek(PRUnichar
& ch
, PRUint32 aOffset
=0);
122 nsresult
Peek(nsAString
& aStr
, PRInt32 aNumChars
, PRInt32 aOffset
= 0);
125 * Skip over chars as long as they equal given char
127 * @update gess 3/25/98
128 * @param char to be skipped
131 nsresult
SkipOver(PRUnichar aSkipChar
);
134 * Skip whitespace on scanner input stream
136 * @update gess 3/25/98
137 * @return error status
139 nsresult
SkipWhitespace(PRInt32
& aNewlinesSkipped
);
142 * Consume characters until you run into space, a '<', a '>', or a '/'.
144 * @param aString - receives new data from stream
147 nsresult
ReadTagIdentifier(nsScannerSharedSubstring
& aString
);
150 * Consume characters until you run into a char that's not valid in an
153 * @param aString - receives new data from stream
156 nsresult
ReadEntityIdentifier(nsString
& aString
);
157 nsresult
ReadNumber(nsString
& aString
,PRInt32 aBase
);
158 nsresult
ReadWhitespace(nsScannerSharedSubstring
& aString
,
159 PRInt32
& aNewlinesSkipped
,
161 nsresult
ReadWhitespace(nsScannerIterator
& aStart
,
162 nsScannerIterator
& aEnd
,
163 PRInt32
& aNewlinesSkipped
);
166 * Consume characters until you find the terminal char
168 * @update gess 3/25/98
169 * @param aString receives new data from stream
170 * @param aTerminal contains terminating char
171 * @param addTerminal tells us whether to append terminal to aString
174 nsresult
ReadUntil(nsAString
& aString
,
179 * Consume characters until you find one contained in given
182 * @update gess 3/25/98
183 * @param aString receives new data from stream
184 * @param aTermSet contains set of terminating chars
185 * @param addTerminal tells us whether to append terminal to aString
188 nsresult
ReadUntil(nsAString
& aString
,
189 const nsReadEndCondition
& aEndCondition
,
192 nsresult
ReadUntil(nsScannerSharedSubstring
& aString
,
193 const nsReadEndCondition
& aEndCondition
,
196 nsresult
ReadUntil(nsScannerIterator
& aStart
,
197 nsScannerIterator
& aEnd
,
198 const nsReadEndCondition
& aEndCondition
,
202 * Records current offset position in input stream. This allows us
203 * to back up to this point if the need should arise, such as when
204 * tokenization gets interrupted.
206 * @update gess 5/12/98
213 * Resets current offset position of input stream to marked position.
214 * This allows us to back up to this point if the need should arise,
215 * such as when tokenization gets interrupted.
216 * NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
218 * @update gess 5/12/98
222 void RewindToMark(void);
228 * @update harishd 01/12/99
232 PRBool
UngetReadable(const nsAString
& aBuffer
);
237 * @update gess 5/13/98
241 nsresult
Append(const nsAString
& aBuffer
);
246 * @update gess 5/21/98
250 nsresult
Append(const char* aBuffer
, PRUint32 aLen
,
251 nsIRequest
*aRequest
);
254 * Call this to copy bytes out of the scanner that have not yet been consumed
255 * by the tokenization process.
257 * @update gess 5/12/98
258 * @param aCopyBuffer is where the scanner buffer will be copied to
261 void CopyUnusedData(nsString
& aCopyBuffer
);
264 * Retrieve the name of the file that the scanner is reading from.
265 * In some cases, it's just a given name, because the scanner isn't
266 * really reading from a file.
268 * @update gess 5/12/98
271 nsString
& GetFilename(void);
273 static void SelfTest();
276 * Use this setter to change the scanner's unicode decoder
278 * @update ftang 3/02/99
279 * @param aCharset a normalized (alias resolved) charset name
280 * @param aCharsetSource- where the charset info came from
283 nsresult
SetDocumentCharset(const nsACString
& aCharset
, PRInt32 aSource
);
285 void BindSubstring(nsScannerSubstring
& aSubstring
, const nsScannerIterator
& aStart
, const nsScannerIterator
& aEnd
);
286 void CurrentPosition(nsScannerIterator
& aPosition
);
287 void EndReading(nsScannerIterator
& aPosition
);
288 void SetPosition(nsScannerIterator
& aPosition
,
289 PRBool aTruncate
= PR_FALSE
,
290 PRBool aReverse
= PR_FALSE
);
291 void ReplaceCharacter(nsScannerIterator
& aPosition
,
295 * Internal method used to cause the internal buffer to
296 * be filled with data.
300 PRBool
IsIncremental(void) {return mIncremental
;}
301 void SetIncremental(PRBool anIncrValue
) {mIncremental
=anIncrValue
;}
304 * Return the position of the first non-whitespace
305 * character. This is only reliable before consumers start
306 * reading from this scanner.
308 PRInt32
FirstNonWhitespacePosition()
310 return mFirstNonWhitespacePosition
;
313 void SetParser(nsParser
*aParser
)
320 PRBool
AppendToBuffer(nsScannerString::Buffer
*, nsIRequest
*aRequest
);
321 PRBool
AppendToBuffer(const nsAString
& aStr
)
323 nsScannerString::Buffer
* buf
= nsScannerString::AllocBufferFromString(aStr
);
326 AppendToBuffer(buf
, nsnull
);
330 nsScannerString
* mSlidingBuffer
;
331 nsScannerIterator mCurrentPosition
; // The position we will next read from in the scanner buffer
332 nsScannerIterator mMarkPosition
; // The position last marked (we may rewind to here)
333 nsScannerIterator mEndPosition
; // The current end of the scanner buffer
335 PRUint32 mCountRemaining
; // The number of bytes still to be read
336 // from the scanner buffer
337 PRPackedBool mIncremental
;
338 PRInt32 mFirstNonWhitespacePosition
;
339 PRInt32 mCharsetSource
;
341 nsCOMPtr
<nsIUnicodeDecoder
> mUnicodeDecoder
;
345 nsScanner
&operator =(const nsScanner
&); // Not implemented.