Bug 436663. Work around ATSUI crasher caused by long Hebrew sequence. r=roc, sr=vlad
[wine-gecko.git] / parser / htmlparser / src / nsScanner.h
blob767aee23f286a49170e349ca65a12679651e8d8c
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
24 * Alternatively, the contents of this file may be used under the terms of
25 * either of the GNU General Public License Version 2 or later (the "GPL"),
26 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
39 /**
40 * MODULE NOTES:
41 * @update gess 4/1/98
43 * The scanner is a low-level service class that knows
44 * how to consume characters out of an (internal) stream.
45 * This class also offers a series of utility methods
46 * that most tokenizers want, such as readUntil()
47 * and SkipWhitespace().
51 #ifndef SCANNER
52 #define SCANNER
54 #include "nsCOMPtr.h"
55 #include "nsString.h"
56 #include "nsIParser.h"
57 #include "prtypes.h"
58 #include "nsIUnicodeDecoder.h"
59 #include "nsScannerString.h"
61 class nsParser;
63 class nsReadEndCondition {
64 public:
65 const PRUnichar *mChars;
66 PRUnichar mFilter;
67 explicit nsReadEndCondition(const PRUnichar* aTerminateChars);
68 private:
69 nsReadEndCondition(const nsReadEndCondition& aOther); // No copying
70 void operator=(const nsReadEndCondition& aOther); // No assigning
73 class nsScanner {
74 public:
76 /**
77 * Use this constructor if you want i/o to be based on
78 * a single string you hand in during construction.
79 * This short cut was added for Javascript.
81 * @update ftang 3/02/99
82 * @param aCharset charset
83 * @param aCharsetSource - where the charset info came from
84 * @param aMode represents the parser mode (nav, other)
85 * @return
87 nsScanner(const nsAString& anHTMLString, const nsACString& aCharset, PRInt32 aSource);
89 /**
90 * Use this constructor if you want i/o to be based on
91 * a file (therefore a stream) or just data you provide via Append().
93 * @update ftang 3/02/99
94 * @param aCharset charset
95 * @param aCharsetSource - where the charset info came from
96 * @param aMode represents the parser mode (nav, other)
97 * @return
99 nsScanner(nsString& aFilename,PRBool aCreateStream, const nsACString& aCharset, PRInt32 aSource);
101 ~nsScanner();
104 * retrieve next char from internal input stream
106 * @update gess 3/25/98
107 * @param ch is the char to accept new value
108 * @return error code reflecting read status
110 nsresult GetChar(PRUnichar& ch);
113 * peek ahead to consume next char from scanner's internal
114 * input buffer
116 * @update gess 3/25/98
117 * @param ch is the char to accept new value
118 * @return error code reflecting read status
120 nsresult Peek(PRUnichar& ch, PRUint32 aOffset=0);
122 nsresult Peek(nsAString& aStr, PRInt32 aNumChars, PRInt32 aOffset = 0);
125 * Skip over chars as long as they equal given char
127 * @update gess 3/25/98
128 * @param char to be skipped
129 * @return error code
131 nsresult SkipOver(PRUnichar aSkipChar);
134 * Skip whitespace on scanner input stream
136 * @update gess 3/25/98
137 * @return error status
139 nsresult SkipWhitespace(PRInt32& aNewlinesSkipped);
142 * Consume characters until you run into space, a '<', a '>', or a '/'.
144 * @param aString - receives new data from stream
145 * @return error code
147 nsresult ReadTagIdentifier(nsScannerSharedSubstring& aString);
150 * Consume characters until you run into a char that's not valid in an
151 * entity name
153 * @param aString - receives new data from stream
154 * @return error code
156 nsresult ReadEntityIdentifier(nsString& aString);
157 nsresult ReadNumber(nsString& aString,PRInt32 aBase);
158 nsresult ReadWhitespace(nsScannerSharedSubstring& aString,
159 PRInt32& aNewlinesSkipped,
160 PRBool& aHaveCR);
161 nsresult ReadWhitespace(nsScannerIterator& aStart,
162 nsScannerIterator& aEnd,
163 PRInt32& aNewlinesSkipped);
166 * Consume characters until you find the terminal char
168 * @update gess 3/25/98
169 * @param aString receives new data from stream
170 * @param aTerminal contains terminating char
171 * @param addTerminal tells us whether to append terminal to aString
172 * @return error code
174 nsresult ReadUntil(nsAString& aString,
175 PRUnichar aTerminal,
176 PRBool addTerminal);
179 * Consume characters until you find one contained in given
180 * terminal set.
182 * @update gess 3/25/98
183 * @param aString receives new data from stream
184 * @param aTermSet contains set of terminating chars
185 * @param addTerminal tells us whether to append terminal to aString
186 * @return error code
188 nsresult ReadUntil(nsAString& aString,
189 const nsReadEndCondition& aEndCondition,
190 PRBool addTerminal);
192 nsresult ReadUntil(nsScannerSharedSubstring& aString,
193 const nsReadEndCondition& aEndCondition,
194 PRBool addTerminal);
196 nsresult ReadUntil(nsScannerIterator& aStart,
197 nsScannerIterator& aEnd,
198 const nsReadEndCondition& aEndCondition,
199 PRBool addTerminal);
202 * Records current offset position in input stream. This allows us
203 * to back up to this point if the need should arise, such as when
204 * tokenization gets interrupted.
206 * @update gess 5/12/98
207 * @param
208 * @return
210 void Mark(void);
213 * Resets current offset position of input stream to marked position.
214 * This allows us to back up to this point if the need should arise,
215 * such as when tokenization gets interrupted.
216 * NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
218 * @update gess 5/12/98
219 * @param
220 * @return
222 void RewindToMark(void);
228 * @update harishd 01/12/99
229 * @param
230 * @return
232 PRBool UngetReadable(const nsAString& aBuffer);
237 * @update gess 5/13/98
238 * @param
239 * @return
241 nsresult Append(const nsAString& aBuffer);
246 * @update gess 5/21/98
247 * @param
248 * @return
250 nsresult Append(const char* aBuffer, PRUint32 aLen,
251 nsIRequest *aRequest);
254 * Call this to copy bytes out of the scanner that have not yet been consumed
255 * by the tokenization process.
257 * @update gess 5/12/98
258 * @param aCopyBuffer is where the scanner buffer will be copied to
259 * @return nada
261 void CopyUnusedData(nsString& aCopyBuffer);
264 * Retrieve the name of the file that the scanner is reading from.
265 * In some cases, it's just a given name, because the scanner isn't
266 * really reading from a file.
268 * @update gess 5/12/98
269 * @return
271 nsString& GetFilename(void);
273 static void SelfTest();
276 * Use this setter to change the scanner's unicode decoder
278 * @update ftang 3/02/99
279 * @param aCharset a normalized (alias resolved) charset name
280 * @param aCharsetSource- where the charset info came from
281 * @return
283 nsresult SetDocumentCharset(const nsACString& aCharset, PRInt32 aSource);
285 void BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd);
286 void CurrentPosition(nsScannerIterator& aPosition);
287 void EndReading(nsScannerIterator& aPosition);
288 void SetPosition(nsScannerIterator& aPosition,
289 PRBool aTruncate = PR_FALSE,
290 PRBool aReverse = PR_FALSE);
291 void ReplaceCharacter(nsScannerIterator& aPosition,
292 PRUnichar aChar);
295 * Internal method used to cause the internal buffer to
296 * be filled with data.
298 * @update gess4/3/98
300 PRBool IsIncremental(void) {return mIncremental;}
301 void SetIncremental(PRBool anIncrValue) {mIncremental=anIncrValue;}
304 * Return the position of the first non-whitespace
305 * character. This is only reliable before consumers start
306 * reading from this scanner.
308 PRInt32 FirstNonWhitespacePosition()
310 return mFirstNonWhitespacePosition;
313 void SetParser(nsParser *aParser)
315 mParser = aParser;
318 protected:
320 PRBool AppendToBuffer(nsScannerString::Buffer *, nsIRequest *aRequest);
321 PRBool AppendToBuffer(const nsAString& aStr)
323 nsScannerString::Buffer* buf = nsScannerString::AllocBufferFromString(aStr);
324 if (!buf)
325 return PR_FALSE;
326 AppendToBuffer(buf, nsnull);
327 return PR_TRUE;
330 nsScannerString* mSlidingBuffer;
331 nsScannerIterator mCurrentPosition; // The position we will next read from in the scanner buffer
332 nsScannerIterator mMarkPosition; // The position last marked (we may rewind to here)
333 nsScannerIterator mEndPosition; // The current end of the scanner buffer
334 nsString mFilename;
335 PRUint32 mCountRemaining; // The number of bytes still to be read
336 // from the scanner buffer
337 PRPackedBool mIncremental;
338 PRInt32 mFirstNonWhitespacePosition;
339 PRInt32 mCharsetSource;
340 nsCString mCharset;
341 nsIUnicodeDecoder *mUnicodeDecoder;
342 nsParser *mParser;
345 #endif