Bug 436663. Work around ATSUI crasher caused by long Hebrew sequence. r=roc, sr=vlad
[wine-gecko.git] / parser / htmlparser / public / nsHTMLTokens.h
blob4dcefe582d53aeaa7e516a05490047ce194a4445
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
24 * Alternatively, the contents of this file may be used under the terms of
25 * either of the GNU General Public License Version 2 or later (the "GPL"),
26 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
38 /**
39 * MODULE NOTES:
40 * @update gess 4/1/98
42 * This file contains the declarations for all the HTML specific token types that
43 * our DTD's understand. In fact, the same set of token types are used for XML.
44 * Currently we have tokens for text, comments, start and end tags, entities,
45 * attributes, style, script and skipped content. Whitespace and newlines also
46 * have their own token types, but don't count on them to stay forever.
48 * If you're looking for the html tags, they're in a file called nsHTMLTag.h/cpp.
50 * Most of the token types have a similar API. They have methods to get the type
51 * of token (GetTokenType); those that represent HTML tags also have a method to
52 * get type tag type (GetTypeID). In addition, most have a method that causes the
53 * token to help in the parsing process called (Consume). We've also thrown in a
54 * few standard debugging methods as well.
57 #ifndef HTMLTOKENS_H
58 #define HTMLTOKENS_H
60 #include "nsToken.h"
61 #include "nsHTMLTags.h"
62 #include "nsString.h"
63 #include "nsScannerString.h"
65 class nsScanner;
67 /*******************************************************************
68 * This enum defines the set of token types that we currently support.
69 *******************************************************************/
71 enum eHTMLTokenTypes {
72 eToken_unknown=0,
73 eToken_start=1, eToken_end, eToken_comment, eToken_entity,
74 eToken_whitespace, eToken_newline, eToken_text, eToken_attribute,
75 eToken_instruction, eToken_cdatasection, eToken_doctypeDecl, eToken_markupDecl,
76 eToken_last //make sure this stays the last token...
79 nsresult ConsumeQuotedString(PRUnichar aChar,nsString& aString,nsScanner& aScanner);
80 nsresult ConsumeAttributeText(PRUnichar aChar,nsString& aString,nsScanner& aScanner);
81 const PRUnichar* GetTagName(PRInt32 aTag);
82 //PRInt32 FindEntityIndex(nsString& aString,PRInt32 aCount=-1);
86 /**
87 * This declares the basic token type used in the HTML DTD's.
88 * @update gess 3/25/98
90 class CHTMLToken : public CToken {
91 public:
92 virtual ~CHTMLToken();
93 CHTMLToken(eHTMLTags aTag);
95 virtual eContainerInfo GetContainerInfo(void) const {return eFormUnknown;}
96 virtual void SetContainerInfo(eContainerInfo aInfo) { }
98 protected:
102 * This declares start tokens, which always take the form <xxxx>.
103 * This class also knows how to consume related attributes.
105 * @update gess 3/25/98
107 class CStartToken: public CHTMLToken {
108 CTOKEN_IMPL_SIZEOF
110 public:
111 CStartToken(eHTMLTags aTag=eHTMLTag_unknown);
112 CStartToken(const nsAString& aString);
113 CStartToken(const nsAString& aName,eHTMLTags aTag);
115 virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
116 virtual PRInt32 GetTypeID(void);
117 virtual PRInt32 GetTokenType(void);
119 virtual PRBool IsEmpty(void);
120 virtual void SetEmpty(PRBool aValue);
122 virtual const nsSubstring& GetStringValue();
123 virtual void GetSource(nsString& anOutputString);
124 virtual void AppendSourceTo(nsAString& anOutputString);
126 // the following info is used to set well-formedness state on start tags...
127 virtual eContainerInfo GetContainerInfo(void) const {return mContainerInfo;}
128 virtual void SetContainerInfo(eContainerInfo aContainerInfo) {
129 if (eFormUnknown==mContainerInfo) {
130 mContainerInfo=aContainerInfo;
133 virtual PRBool IsWellFormed(void) const {
134 return eWellFormed == mContainerInfo;
137 nsString mTextValue;
138 protected:
139 eContainerInfo mContainerInfo;
140 PRPackedBool mEmpty;
141 #ifdef DEBUG
142 PRPackedBool mAttributed;
143 #endif
148 * This declares end tokens, which always take the
149 * form </xxxx>. This class also knows how to consume
150 * related attributes.
152 * @update gess 3/25/98
154 class CEndToken: public CHTMLToken {
155 CTOKEN_IMPL_SIZEOF
157 public:
158 CEndToken(eHTMLTags aTag);
159 CEndToken(const nsAString& aString);
160 CEndToken(const nsAString& aName,eHTMLTags aTag);
161 virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
162 virtual PRInt32 GetTypeID(void);
163 virtual PRInt32 GetTokenType(void);
165 virtual const nsSubstring& GetStringValue();
166 virtual void GetSource(nsString& anOutputString);
167 virtual void AppendSourceTo(nsAString& anOutputString);
169 protected:
170 nsString mTextValue;
175 * This declares comment tokens. Comments are usually
176 * thought of as tokens, but we treat them that way
177 * here so that the parser can have a consistent view
178 * of all tokens.
180 * @update gess 3/25/98
182 class CCommentToken: public CHTMLToken {
183 CTOKEN_IMPL_SIZEOF
185 public:
186 CCommentToken();
187 CCommentToken(const nsAString& aString);
188 virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
189 virtual PRInt32 GetTokenType(void);
190 virtual const nsSubstring& GetStringValue(void);
191 virtual void AppendSourceTo(nsAString& anOutputString);
193 nsresult ConsumeStrictComment(nsScanner& aScanner);
194 nsresult ConsumeQuirksComment(nsScanner& aScanner);
196 protected:
197 nsScannerSubstring mComment; // does not include MDO & MDC
198 nsScannerSubstring mCommentDecl; // includes MDO & MDC
203 * This class declares entity tokens, which always take
204 * the form &xxxx;. This class also offers a few utility
205 * methods that allow you to easily reduce entities.
207 * @update gess 3/25/98
209 class CEntityToken : public CHTMLToken {
210 CTOKEN_IMPL_SIZEOF
212 public:
213 CEntityToken();
214 CEntityToken(const nsAString& aString);
215 virtual PRInt32 GetTokenType(void);
216 PRInt32 TranslateToUnicodeStr(nsString& aString);
217 virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
218 static nsresult ConsumeEntity(PRUnichar aChar, nsString& aString,
219 nsScanner& aScanner);
220 static PRInt32 TranslateToUnicodeStr(PRInt32 aValue,nsString& aString);
222 virtual const nsSubstring& GetStringValue(void);
223 virtual void GetSource(nsString& anOutputString);
224 virtual void AppendSourceTo(nsAString& anOutputString);
226 protected:
227 nsString mTextValue;
232 * Whitespace tokens are used where whitespace can be
233 * detected as distinct from text. This allows us to
234 * easily skip leading/trailing whitespace when desired.
236 * @update gess 3/25/98
238 class CWhitespaceToken: public CHTMLToken {
239 CTOKEN_IMPL_SIZEOF
241 public:
242 CWhitespaceToken();
243 CWhitespaceToken(const nsAString& aString);
244 virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
245 virtual PRInt32 GetTokenType(void);
246 virtual const nsSubstring& GetStringValue(void);
248 protected:
249 nsScannerSharedSubstring mTextValue;
253 * Text tokens contain the normalized form of html text.
254 * These tokens are guaranteed not to contain entities,
255 * start or end tags, or newlines.
257 * @update gess 3/25/98
259 class CTextToken: public CHTMLToken {
260 CTOKEN_IMPL_SIZEOF
262 public:
263 CTextToken();
264 CTextToken(const nsAString& aString);
265 virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
266 virtual PRInt32 GetTokenType(void);
267 virtual PRInt32 GetTextLength(void);
268 virtual void CopyTo(nsAString& aStr);
269 virtual const nsSubstring& GetStringValue(void);
270 virtual void Bind(nsScanner* aScanner, nsScannerIterator& aStart,
271 nsScannerIterator& aEnd);
272 virtual void Bind(const nsAString& aStr);
274 nsresult ConsumeCharacterData(PRBool aIgnoreComments,
275 nsScanner& aScanner,
276 const nsAString& aEndTagName,
277 PRInt32 aFlag,
278 PRBool& aFlushTokens);
280 nsresult ConsumeParsedCharacterData(PRBool aDiscardFirstNewline,
281 PRBool aConservativeConsume,
282 nsScanner& aScanner,
283 const nsAString& aEndTagName,
284 PRInt32 aFlag,
285 PRBool& aFound);
287 protected:
288 nsScannerSubstring mTextValue;
293 * CDATASection tokens contain raw unescaped text content delimited by
294 * a ![CDATA[ and ]].
295 * XXX Not really a HTML construct - maybe we need a separation
297 * @update vidur 11/12/98
299 class CCDATASectionToken : public CHTMLToken {
300 CTOKEN_IMPL_SIZEOF
302 public:
303 CCDATASectionToken(eHTMLTags aTag = eHTMLTag_unknown);
304 CCDATASectionToken(const nsAString& aString);
305 virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
306 virtual PRInt32 GetTokenType(void);
307 virtual const nsSubstring& GetStringValue(void);
309 protected:
310 nsString mTextValue;
315 * Declaration tokens contain raw unescaped text content (not really, but
316 * right now we use this only for view source).
317 * XXX Not really a HTML construct - maybe we need a separation
320 class CMarkupDeclToken : public CHTMLToken {
321 CTOKEN_IMPL_SIZEOF
323 public:
324 CMarkupDeclToken();
325 CMarkupDeclToken(const nsAString& aString);
326 virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
327 virtual PRInt32 GetTokenType(void);
328 virtual const nsSubstring& GetStringValue(void);
330 protected:
331 nsScannerSubstring mTextValue;
336 * Attribute tokens are used to contain attribute key/value
337 * pairs whereever they may occur. Typically, they should
338 * occur only in start tokens. However, we may expand that
339 * ability when XML tokens become commonplace.
341 * @update gess 3/25/98
343 class CAttributeToken: public CHTMLToken {
344 CTOKEN_IMPL_SIZEOF
346 public:
347 CAttributeToken();
348 CAttributeToken(const nsAString& aString);
349 CAttributeToken(const nsAString& aKey, const nsAString& aString);
350 ~CAttributeToken() {}
351 virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
352 virtual PRInt32 GetTokenType(void);
353 const nsSubstring& GetKey(void) { return mTextKey.AsString(); }
354 virtual void SetKey(const nsAString& aKey);
355 virtual void BindKey(nsScanner* aScanner, nsScannerIterator& aStart,
356 nsScannerIterator& aEnd);
357 const nsSubstring& GetValue(void) {return mTextValue.str();}
358 virtual const nsSubstring& GetStringValue(void);
359 virtual void GetSource(nsString& anOutputString);
360 virtual void AppendSourceTo(nsAString& anOutputString);
362 PRPackedBool mHasEqualWithoutValue;
363 protected:
364 nsScannerSharedSubstring mTextValue;
365 nsScannerSubstring mTextKey;
370 * Newline tokens contain, you guessed it, newlines.
371 * They consume newline (CR/LF) either alone or in pairs.
373 * @update gess 3/25/98
375 class CNewlineToken: public CHTMLToken {
376 CTOKEN_IMPL_SIZEOF
378 public:
379 CNewlineToken();
380 virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
381 virtual PRInt32 GetTokenType(void);
382 virtual const nsSubstring& GetStringValue(void);
384 static void AllocNewline();
385 static void FreeNewline();
390 * Whitespace tokens are used where whitespace can be
391 * detected as distinct from text. This allows us to
392 * easily skip leading/trailing whitespace when desired.
394 * @update gess 3/25/98
396 class CInstructionToken: public CHTMLToken {
397 CTOKEN_IMPL_SIZEOF
399 public:
400 CInstructionToken();
401 CInstructionToken(const nsAString& aString);
402 virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
403 virtual PRInt32 GetTokenType(void);
404 virtual const nsSubstring& GetStringValue(void);
406 protected:
407 nsString mTextValue;
412 * This token is generated by the HTML and Expat tokenizers
413 * when they see the doctype declaration ("<!DOCTYPE ... >")
417 class CDoctypeDeclToken: public CHTMLToken {
418 CTOKEN_IMPL_SIZEOF
420 public:
421 CDoctypeDeclToken(eHTMLTags aTag=eHTMLTag_unknown);
422 CDoctypeDeclToken(const nsAString& aString,eHTMLTags aTag=eHTMLTag_unknown);
423 virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
424 virtual PRInt32 GetTokenType(void);
425 virtual const nsSubstring& GetStringValue(void);
426 virtual void SetStringValue(const nsAString& aStr);
428 protected:
429 nsString mTextValue;
432 #endif