Bug 436663. Work around ATSUI crasher caused by long Hebrew sequence. r=roc, sr=vlad
[wine-gecko.git] / parser / htmlparser / src / nsParser.h
blob2799fc79b081dce247fc8b9a570b5f0ed7c75465
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
24 * Alternatively, the contents of this file may be used under the terms of
25 * either of the GNU General Public License Version 2 or later (the "GPL"),
26 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
38 /**
39 * MODULE NOTES:
41 * This class does two primary jobs:
42 * 1) It iterates the tokens provided during the
43 * tokenization process, identifing where elements
44 * begin and end (doing validation and normalization).
45 * 2) It controls and coordinates with an instance of
46 * the IContentSink interface, to coordinate the
47 * the production of the content model.
49 * The basic operation of this class assumes that an HTML
50 * document is non-normalized. Therefore, we don't process
51 * the document in a normalized way. Don't bother to look
52 * for methods like: doHead() or doBody().
54 * Instead, in order to be backward compatible, we must
55 * scan the set of tokens and perform this basic set of
56 * operations:
57 * 1) Determine the token type (easy, since the tokens know)
58 * 2) Determine the appropriate section of the HTML document
59 * each token belongs in (HTML,HEAD,BODY,FRAMESET).
60 * 3) Insert content into our document (via the sink) into
61 * the correct section.
62 * 4) In the case of tags that belong in the BODY, we must
63 * ensure that our underlying document state reflects
64 * the appropriate context for our tag.
66 * For example,if we see a <TR>, we must ensure our
67 * document contains a table into which the row can
68 * be placed. This may result in "implicit containers"
69 * created to ensure a well-formed document.
73 #ifndef NS_PARSER__
74 #define NS_PARSER__
76 #include "nsIParser.h"
77 #include "nsDeque.h"
78 #include "nsParserNode.h"
79 #include "nsIURL.h"
80 #include "CParserContext.h"
81 #include "nsParserCIID.h"
82 #include "nsITokenizer.h"
83 #include "nsHTMLTags.h"
84 #include "nsDTDUtils.h"
85 #include "nsTimer.h"
86 #include "nsThreadUtils.h"
87 #include "nsIContentSink.h"
88 #include "nsIParserFilter.h"
89 #include "nsCOMArray.h"
90 #include "nsIUnicharStreamListener.h"
91 #include "nsCycleCollectionParticipant.h"
93 class nsICharsetConverterManager;
94 class nsICharsetAlias;
95 class nsIDTD;
96 class nsScanner;
97 class nsIProgressEventSink;
99 #ifdef _MSC_VER
100 #pragma warning( disable : 4275 )
101 #endif
104 class nsParser : public nsIParser,
105 public nsIStreamListener{
108 public:
109 friend class CTokenHandler;
111 * Called on module init
113 static nsresult Init();
116 * Called on module shutdown
118 static void Shutdown();
120 NS_DECL_CYCLE_COLLECTING_ISUPPORTS
121 NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsParser, nsIParser)
124 * default constructor
125 * @update gess5/11/98
127 nsParser();
130 * Destructor
131 * @update gess5/11/98
133 virtual ~nsParser();
136 * Select given content sink into parser for parser output
137 * @update gess5/11/98
138 * @param aSink is the new sink to be used by parser
139 * @return old sink, or NULL
141 NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink);
144 * retrive the sink set into the parser
145 * @update gess5/11/98
146 * @param aSink is the new sink to be used by parser
147 * @return old sink, or NULL
149 NS_IMETHOD_(nsIContentSink*) GetContentSink(void);
152 * Call this method once you've created a parser, and want to instruct it
153 * about the command which caused the parser to be constructed. For example,
154 * this allows us to select a DTD which can do, say, view-source.
156 * @update gess 3/25/98
157 * @param aCommand -- ptrs to string that contains command
158 * @return nada
160 NS_IMETHOD_(void) GetCommand(nsCString& aCommand);
161 NS_IMETHOD_(void) SetCommand(const char* aCommand);
162 NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand);
165 * Call this method once you've created a parser, and want to instruct it
166 * about what charset to load
168 * @update ftang 4/23/99
169 * @param aCharset- the charset of a document
170 * @param aCharsetSource- the source of the charset
171 * @return nada
173 NS_IMETHOD_(void) SetDocumentCharset(const nsACString& aCharset, PRInt32 aSource);
175 NS_IMETHOD_(void) GetDocumentCharset(nsACString& aCharset, PRInt32& aSource)
177 aCharset = mCharset;
178 aSource = mCharsetSource;
182 NS_IMETHOD_(void) SetParserFilter(nsIParserFilter* aFilter);
185 * Retrieve the scanner from the topmost parser context
187 * @update gess 6/9/98
188 * @return ptr to scanner
190 NS_IMETHOD_(nsDTDMode) GetParseMode(void);
193 * Cause parser to parse input from given URL
194 * @update gess5/11/98
195 * @param aURL is a descriptor for source document
196 * @param aListener is a listener to forward notifications to
197 * @return TRUE if all went well -- FALSE otherwise
199 NS_IMETHOD Parse(nsIURI* aURL,
200 nsIRequestObserver* aListener = nsnull,
201 void* aKey = 0,
202 nsDTDMode aMode = eDTDMode_autodetect);
205 * @update gess5/11/98
206 * @param anHTMLString contains a string-full of real HTML
207 * @param appendTokens tells us whether we should insert tokens inline, or append them.
208 * @return TRUE if all went well -- FALSE otherwise
210 NS_IMETHOD Parse(const nsAString& aSourceBuffer,
211 void* aKey,
212 const nsACString& aContentType,
213 PRBool aLastCall,
214 nsDTDMode aMode = eDTDMode_autodetect);
216 NS_IMETHOD_(void *) GetRootContextKey();
219 * This method needs documentation
221 NS_IMETHOD ParseFragment(const nsAString& aSourceBuffer,
222 void* aKey,
223 nsTArray<nsString>& aTagStack,
224 PRBool aXMLMode,
225 const nsACString& aContentType,
226 nsDTDMode aMode = eDTDMode_autodetect);
230 * This method gets called when the tokens have been consumed, and it's time
231 * to build the model via the content sink.
232 * @update gess5/11/98
233 * @return YES if model building went well -- NO otherwise.
235 NS_IMETHOD BuildModel(void);
238 * Call this when you want control whether or not the parser will parse
239 * and tokenize input (TRUE), or whether it just caches input to be
240 * parsed later (FALSE).
242 * @update gess 9/1/98
243 * @param aState determines whether we parse/tokenize or just cache.
244 * @return current state
246 NS_IMETHOD ContinueParsing();
247 NS_IMETHOD ContinueInterruptedParsing();
248 NS_IMETHOD_(void) BlockParser();
249 NS_IMETHOD_(void) UnblockParser();
250 NS_IMETHOD Terminate(void);
253 * Call this to query whether the parser is enabled or not.
255 * @update vidur 4/12/99
256 * @return current state
258 NS_IMETHOD_(PRBool) IsParserEnabled();
261 * Call this to query whether the parser thinks it's done with parsing.
263 * @update rickg 5/12/01
264 * @return complete state
266 NS_IMETHOD_(PRBool) IsComplete();
269 * This rather arcane method (hack) is used as a signal between the
270 * DTD and the parser. It allows the DTD to tell the parser that content
271 * that comes through (parser::parser(string)) but not consumed should
272 * propagate into the next string based parse call.
274 * @update gess 9/1/98
275 * @param aState determines whether we propagate unused string content.
276 * @return current state
278 void SetUnusedInput(nsString& aBuffer);
281 * This method gets called (automatically) during incremental parsing
282 * @update gess5/11/98
283 * @return TRUE if all went well, otherwise FALSE
285 virtual nsresult ResumeParse(PRBool allowIteration = PR_TRUE,
286 PRBool aIsFinalChunk = PR_FALSE,
287 PRBool aCanInterrupt = PR_TRUE);
289 //*********************************************
290 // These methods are callback methods used by
291 // net lib to let us know about our inputstream.
292 //*********************************************
293 // nsIRequestObserver methods:
294 NS_DECL_NSIREQUESTOBSERVER
296 // nsIStreamListener methods:
297 NS_DECL_NSISTREAMLISTENER
299 void PushContext(CParserContext& aContext);
300 CParserContext* PopContext();
301 CParserContext* PeekContext() {return mParserContext;}
303 /**
304 * Get the channel associated with this parser
305 * @update harishd,gagan 07/17/01
306 * @param aChannel out param that will contain the result
307 * @return NS_OK if successful
309 NS_IMETHOD GetChannel(nsIChannel** aChannel);
311 /**
312 * Get the DTD associated with this parser
313 * @update vidur 9/29/99
314 * @param aDTD out param that will contain the result
315 * @return NS_OK if successful, NS_ERROR_FAILURE for runtime error
317 NS_IMETHOD GetDTD(nsIDTD** aDTD);
319 /**
320 * Detects the existence of a META tag with charset information in
321 * the given buffer.
323 PRBool DetectMetaTag(const char* aBytes,
324 PRInt32 aLen,
325 nsCString& oCharset,
326 PRInt32& oCharsetSource);
328 void SetSinkCharset(nsACString& aCharset);
331 * Removes continue parsing events
332 * @update kmcclusk 5/18/98
335 NS_IMETHODIMP CancelParsingEvents();
337 /**
338 * Indicates whether the parser is in a state where it
339 * can be interrupted.
340 * @return PR_TRUE if parser can be interrupted, PR_FALSE if it can not be interrupted.
341 * @update kmcclusk 5/18/98
343 PRBool CanInterrupt(void);
345 /**
346 * Set to parser state to indicate whether parsing tokens can be interrupted
347 * @param aCanInterrupt PR_TRUE if parser can be interrupted, PR_FALSE if it can not be interrupted.
348 * @update kmcclusk 5/18/98
350 void SetCanInterrupt(PRBool aCanInterrupt);
353 * This is called when the final chunk has been
354 * passed to the parser and the content sink has
355 * interrupted token processing. It schedules
356 * a ParserContinue PL_Event which will ask the parser
357 * to HandleParserContinueEvent when it is handled.
358 * @update kmcclusk6/1/2001
360 nsresult PostContinueEvent();
363 * Fired when the continue parse event is triggered.
364 * @update kmcclusk 5/18/98
366 void HandleParserContinueEvent(class nsParserContinueEvent *);
369 * Called by top-level scanners when data from necko is added to
370 * the scanner.
372 nsresult DataAdded(const nsSubstring& aData, nsIRequest *aRequest);
374 static nsCOMArray<nsIUnicharStreamListener> *sParserDataListeners;
376 static nsICharsetAlias* GetCharsetAliasService() {
377 return sCharsetAliasService;
380 static nsICharsetConverterManager* GetCharsetConverterManager() {
381 return sCharsetConverterManager;
384 virtual void Reset() {
385 Cleanup();
386 Initialize();
389 protected:
391 void Initialize(PRBool aConstructor = PR_FALSE);
392 void Cleanup();
396 * @update gess5/18/98
397 * @param
398 * @return
400 nsresult WillBuildModel(nsString& aFilename);
404 * @update gess5/18/98
405 * @param
406 * @return
408 nsresult DidBuildModel(nsresult anErrorCode);
410 private:
412 /*******************************************
413 These are the tokenization methods...
414 *******************************************/
417 * Part of the code sandwich, this gets called right before
418 * the tokenization process begins. The main reason for
419 * this call is to allow the delegate to do initialization.
421 * @update gess 3/25/98
422 * @param
423 * @return TRUE if it's ok to proceed
425 PRBool WillTokenize(PRBool aIsFinalChunk = PR_FALSE);
429 * This is the primary control routine. It iteratively
430 * consumes tokens until an error occurs or you run out
431 * of data.
433 * @update gess 3/25/98
434 * @return error code
436 nsresult Tokenize(PRBool aIsFinalChunk = PR_FALSE);
439 * This is the tail-end of the code sandwich for the
440 * tokenization process. It gets called once tokenziation
441 * has completed.
443 * @update gess 3/25/98
444 * @param
445 * @return TRUE if all went well
447 PRBool DidTokenize(PRBool aIsFinalChunk = PR_FALSE);
450 protected:
451 //*********************************************
452 // And now, some data members...
453 //*********************************************
456 CParserContext* mParserContext;
457 nsCOMPtr<nsIRequestObserver> mObserver;
458 nsCOMPtr<nsIContentSink> mSink;
459 nsIRunnable* mContinueEvent; // weak ref
461 nsCOMPtr<nsIParserFilter> mParserFilter;
462 nsTokenAllocator mTokenAllocator;
464 eParserCommands mCommand;
465 nsresult mInternalState;
466 PRInt32 mStreamStatus;
467 PRInt32 mCharsetSource;
469 PRUint16 mFlags;
471 nsString mUnusedInput;
472 nsCString mCharset;
473 nsCString mCommandStr;
475 static nsICharsetAlias* sCharsetAliasService;
476 static nsICharsetConverterManager* sCharsetConverterManager;
478 public:
480 MOZ_TIMER_DECLARE(mParseTime)
481 MOZ_TIMER_DECLARE(mDTDTime)
482 MOZ_TIMER_DECLARE(mTokenizeTime)
485 #endif