Bug 470455 - test_database_sync_embed_visits.js leaks, r=sdwilsh
[wine-gecko.git] / parser / htmlparser / src / nsParser.cpp
blob32db962f1e2128c6be5d98b1eae1e3773b7366bc
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set sw=2 ts=2 et tw=79: */
3 /* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
16 * The Original Code is mozilla.org code.
18 * The Initial Developer of the Original Code is
19 * Netscape Communications Corporation.
20 * Portions created by the Initial Developer are Copyright (C) 1998
21 * the Initial Developer. All Rights Reserved.
23 * Contributor(s):
24 * Pierre Phaneuf <pp@ludusdesign.com>
26 * Alternatively, the contents of this file may be used under the terms of
27 * either of the GNU General Public License Version 2 or later (the "GPL"),
28 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
40 #include "nsIAtom.h"
41 #include "nsParser.h"
42 #include "nsString.h"
43 #include "nsCRT.h"
44 #include "nsScanner.h"
45 #include "plstr.h"
46 #include "nsIStringStream.h"
47 #include "nsIChannel.h"
48 #include "nsICachingChannel.h"
49 #include "nsICacheEntryDescriptor.h"
50 #include "nsICharsetAlias.h"
51 #include "nsICharsetConverterManager.h"
52 #include "nsIInputStream.h"
53 #include "CNavDTD.h"
54 #include "prenv.h"
55 #include "prlock.h"
56 #include "prcvar.h"
57 #include "nsAutoLock.h"
58 #include "nsParserCIID.h"
59 #include "nsReadableUtils.h"
60 #include "nsCOMPtr.h"
61 #include "nsExpatDriver.h"
62 #include "nsIServiceManager.h"
63 #include "nsICategoryManager.h"
64 #include "nsISupportsPrimitives.h"
65 #include "nsIFragmentContentSink.h"
66 #include "nsStreamUtils.h"
67 #include "nsHTMLTokenizer.h"
68 #include "nsIDocument.h"
69 #include "nsNetUtil.h"
70 #include "nsScriptLoader.h"
71 #include "nsDataHashtable.h"
72 #include "nsIThreadPool.h"
73 #include "nsXPCOMCIDInternal.h"
75 #ifdef MOZ_VIEW_SOURCE
76 #include "nsViewSourceHTML.h"
77 #endif
79 #define NS_PARSER_FLAG_PARSER_ENABLED 0x00000002
80 #define NS_PARSER_FLAG_OBSERVERS_ENABLED 0x00000004
81 #define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000008
82 #define NS_PARSER_FLAG_CAN_INTERRUPT 0x00000010
83 #define NS_PARSER_FLAG_FLUSH_TOKENS 0x00000020
84 #define NS_PARSER_FLAG_CAN_TOKENIZE 0x00000040
86 static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
87 static NS_DEFINE_CID(kCParserCID, NS_PARSER_CID);
88 static NS_DEFINE_IID(kIParserIID, NS_IPARSER_IID);
90 //-------------------------------------------------------------------
92 nsCOMArray<nsIUnicharStreamListener> *nsParser::sParserDataListeners;
94 //-------------- Begin ParseContinue Event Definition ------------------------
96 The parser can be explicitly interrupted by passing a return value of
97 NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause
98 the parser to stop processing and allow the application to return to the event
99 loop. The data which was left at the time of interruption will be processed
100 the next time OnDataAvailable is called. If the parser has received its final
101 chunk of data then OnDataAvailable will no longer be called by the networking
102 module, so the parser will schedule a nsParserContinueEvent which will call
103 the parser to process the remaining data after returning to the event loop.
104 If the parser is interrupted while processing the remaining data it will
105 schedule another ParseContinueEvent. The processing of data followed by
106 scheduling of the continue events will proceed until either:
108 1) All of the remaining data can be processed without interrupting
109 2) The parser has been cancelled.
112 This capability is currently used in CNavDTD and nsHTMLContentSink. The
113 nsHTMLContentSink is notified by CNavDTD when a chunk of tokens is going to be
114 processed and when each token is processed. The nsHTML content sink records
115 the time when the chunk has started processing and will return
116 NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has exceeded a
117 threshold called max tokenizing processing time. This allows the content sink
118 to limit how much data is processed in a single chunk which in turn gates how
119 much time is spent away from the event loop. Processing smaller chunks of data
120 also reduces the time spent in subsequent reflows.
122 This capability is most apparent when loading large documents. If the maximum
123 token processing time is set small enough the application will remain
124 responsive during document load.
126 A side-effect of this capability is that document load is not complete when
127 the last chunk of data is passed to OnDataAvailable since the parser may have
128 been interrupted when the last chunk of data arrived. The document is complete
129 when all of the document has been tokenized and there aren't any pending
130 nsParserContinueEvents. This can cause problems if the application assumes
131 that it can monitor the load requests to determine when the document load has
132 been completed. This is what happens in Mozilla. The document is considered
133 completely loaded when all of the load requests have been satisfied. To delay
134 the document load until all of the parsing has been completed the
135 nsHTMLContentSink adds a dummy parser load request which is not removed until
136 the nsHTMLContentSink's DidBuildModel is called. The CNavDTD will not call
137 DidBuildModel until the final chunk of data has been passed to the parser
138 through the OnDataAvailable and there aren't any pending
139 nsParserContineEvents.
141 Currently the parser is ignores requests to be interrupted during the
142 processing of script. This is because a document.write followed by JavaScript
143 calls to manipulate the DOM may fail if the parser was interrupted during the
144 document.write.
146 For more details @see bugzilla bug 76722
150 class nsParserContinueEvent : public nsRunnable
152 public:
153 nsRefPtr<nsParser> mParser;
155 nsParserContinueEvent(nsParser* aParser)
156 : mParser(aParser)
159 NS_IMETHOD Run()
161 mParser->HandleParserContinueEvent(this);
162 return NS_OK;
166 //-------------- End ParseContinue Event Definition ------------------------
168 template <class Type>
169 class Holder {
170 public:
171 typedef void (*Reaper)(Type *);
173 Holder(Reaper aReaper)
174 : mHoldee(nsnull), mReaper(aReaper)
178 ~Holder() {
179 if (mHoldee) {
180 mReaper(mHoldee);
184 Type *get() {
185 return mHoldee;
187 const Holder &operator =(Type *aHoldee) {
188 if (mHoldee && aHoldee != mHoldee) {
189 mReaper(mHoldee);
191 mHoldee = aHoldee;
192 return *this;
195 private:
196 Type *mHoldee;
197 Reaper mReaper;
200 class nsSpeculativeScriptThread : public nsIRunnable {
201 public:
202 nsSpeculativeScriptThread()
203 : mLock(nsAutoLock::DestroyLock),
204 mCVar(PR_DestroyCondVar),
205 mKeepParsing(0),
206 mCurrentlyParsing(0),
207 mNumURIs(0),
208 mNumConsumed(0),
209 mContext(nsnull),
210 mTerminated(PR_FALSE) {
213 ~nsSpeculativeScriptThread() {
214 NS_ASSERTION(NS_IsMainThread() || !mDocument,
215 "Destroying the document on the wrong thread");
218 NS_DECL_ISUPPORTS
219 NS_DECL_NSIRUNNABLE
221 nsresult StartParsing(nsParser *aParser);
222 void StopParsing(PRBool aFromDocWrite);
224 enum PrefetchType { SCRIPT, STYLESHEET, IMAGE };
225 struct PrefetchEntry {
226 PrefetchType type;
227 nsString uri;
228 nsString charset;
229 nsString elementType;
232 nsIDocument *GetDocument() {
233 NS_ASSERTION(NS_IsMainThread(), "Potential threadsafety hazard");
234 return mDocument;
237 PRBool Parsing() {
238 return mCurrentlyParsing;
241 CParserContext *Context() {
242 return mContext;
245 typedef nsDataHashtable<nsCStringHashKey, PRBool> PreloadedType;
246 PreloadedType& GetPreloadedURIs() {
247 return mPreloadedURIs;
250 void Terminate() {
251 mTerminated = PR_TRUE;
252 StopParsing(PR_FALSE);
254 PRBool Terminated() {
255 return mTerminated;
258 private:
260 void ProcessToken(CToken *aToken);
262 void AddToPrefetchList(const nsAString &src,
263 const nsAString &charset,
264 const nsAString &elementType,
265 PrefetchType type);
267 // These members are only accessed on the speculatively parsing thread.
268 nsTokenAllocator mTokenAllocator;
270 // The following members are shared across the main thread and the
271 // speculatively parsing thread.
272 Holder<PRLock> mLock;
273 Holder<PRCondVar> mCVar;
275 volatile PRUint32 mKeepParsing;
276 volatile PRUint32 mCurrentlyParsing;
277 nsRefPtr<nsHTMLTokenizer> mTokenizer;
278 nsAutoPtr<nsScanner> mScanner;
280 enum { kBatchPrefetchURIs = 5 };
281 nsAutoTArray<PrefetchEntry, kBatchPrefetchURIs> mURIs;
282 PRUint16 mNumURIs;
284 // Number of characters consumed by the last speculative parse.
285 PRUint32 mNumConsumed;
287 // These members are only accessed on the main thread.
288 nsCOMPtr<nsIDocument> mDocument;
289 CParserContext *mContext;
290 PreloadedType mPreloadedURIs;
291 PRBool mTerminated;
294 class nsPreloadURIs : public nsIRunnable {
295 public:
296 nsPreloadURIs(nsAutoTArray<nsSpeculativeScriptThread::PrefetchEntry, 5> &aURIs,
297 nsSpeculativeScriptThread *aScriptThread)
298 : mURIs(aURIs),
299 mScriptThread(aScriptThread) {
302 NS_DECL_ISUPPORTS
303 NS_DECL_NSIRUNNABLE
305 static void PreloadURIs(const nsAutoTArray<nsSpeculativeScriptThread::PrefetchEntry, 5> &aURIs,
306 nsSpeculativeScriptThread *aScriptThread);
308 private:
309 nsAutoTArray<nsSpeculativeScriptThread::PrefetchEntry, 5> mURIs;
310 nsRefPtr<nsSpeculativeScriptThread> mScriptThread;
313 NS_IMPL_THREADSAFE_ISUPPORTS1(nsPreloadURIs, nsIRunnable)
315 NS_IMETHODIMP
316 nsPreloadURIs::Run()
318 PreloadURIs(mURIs, mScriptThread);
319 return NS_OK;
322 void
323 nsPreloadURIs::PreloadURIs(const nsAutoTArray<nsSpeculativeScriptThread::PrefetchEntry, 5> &aURIs,
324 nsSpeculativeScriptThread *aScriptThread)
326 NS_ASSERTION(NS_IsMainThread(), "Touching non-threadsafe objects off thread");
328 if (aScriptThread->Terminated()) {
329 return;
332 nsIDocument *doc = aScriptThread->GetDocument();
333 NS_ASSERTION(doc, "We shouldn't have started preloading without a document");
335 // Note: Per the code in the HTML content sink, we should be keeping track
336 // of each <base href> as it comes. However, because we do our speculative
337 // parsing off the main thread, this is hard to emulate. For now, just load
338 // the URIs using the document's base URI at the potential cost of being
339 // wrong and having to re-load a given relative URI later.
340 nsIURI *base = doc->GetBaseURI();
341 const nsCString &charset = doc->GetDocumentCharacterSet();
342 nsSpeculativeScriptThread::PreloadedType &alreadyPreloaded =
343 aScriptThread->GetPreloadedURIs();
344 for (PRUint32 i = 0, e = aURIs.Length(); i < e; ++i) {
345 const nsSpeculativeScriptThread::PrefetchEntry &pe = aURIs[i];
346 if (pe.type != nsSpeculativeScriptThread::SCRIPT) {
347 continue;
350 nsCOMPtr<nsIURI> uri;
351 nsresult rv = NS_NewURI(getter_AddRefs(uri), pe.uri, charset.get(), base);
352 if (NS_FAILED(rv)) {
353 NS_WARNING("Failed to create a URI");
354 continue;
357 nsCAutoString spec;
358 uri->GetSpec(spec);
359 PRBool answer;
360 if (alreadyPreloaded.Get(spec, &answer)) {
361 // Already preloaded. Don't preload again.
362 continue;
365 alreadyPreloaded.Put(spec, PR_TRUE);
367 doc->ScriptLoader()->PreloadURI(uri, pe.charset, pe.elementType);
371 NS_IMPL_THREADSAFE_ISUPPORTS1(nsSpeculativeScriptThread, nsIRunnable)
373 NS_IMETHODIMP
374 nsSpeculativeScriptThread::Run()
376 NS_ASSERTION(!NS_IsMainThread(), "Speculative parsing on the main thread?");
378 mNumConsumed = 0;
380 mTokenizer->WillTokenize(PR_FALSE, &mTokenAllocator);
381 while (mKeepParsing) {
382 PRBool flushTokens = PR_FALSE;
383 nsresult rv = mTokenizer->ConsumeToken(*mScanner, flushTokens);
384 if (NS_FAILED(rv)) {
385 break;
388 mNumConsumed += mScanner->Mark();
390 // TODO Don't pop the tokens.
391 CToken *token;
392 while (mKeepParsing && (token = mTokenizer->PopToken())) {
393 ProcessToken(token);
396 mTokenizer->DidTokenize(PR_FALSE);
399 nsAutoLock al(mLock.get());
401 mCurrentlyParsing = 0;
402 PR_NotifyCondVar(mCVar.get());
404 return NS_OK;
407 nsresult
408 nsSpeculativeScriptThread::StartParsing(nsParser *aParser)
410 NS_ASSERTION(NS_IsMainThread(), "Called on the wrong thread");
411 NS_ASSERTION(!mCurrentlyParsing, "Bad race happening");
413 if (!aParser->ThreadPool()) {
414 return NS_OK;
417 nsIContentSink *sink = aParser->GetContentSink();
418 if (!sink) {
419 return NS_OK;
422 nsCOMPtr<nsIDocument> doc = do_QueryInterface(sink->GetTarget());
423 if (!doc) {
424 return NS_OK;
427 nsAutoString toScan;
428 CParserContext *context = aParser->PeekContext();
429 if (!mLock.get()) {
430 mLock = nsAutoLock::NewLock("nsSpeculativeScriptThread::mLock");
431 if (!mLock.get()) {
432 return NS_ERROR_OUT_OF_MEMORY;
435 mCVar = PR_NewCondVar(mLock.get());
436 if (!mCVar.get()) {
437 return NS_ERROR_OUT_OF_MEMORY;
440 if (!mPreloadedURIs.Init(15)) {
441 return NS_ERROR_OUT_OF_MEMORY;
444 mTokenizer = new nsHTMLTokenizer(context->mDTDMode, context->mDocType,
445 context->mParserCommand, 0);
446 if (!mTokenizer) {
447 return NS_ERROR_OUT_OF_MEMORY;
449 mTokenizer->CopyState(context->mTokenizer);
450 context->mScanner->CopyUnusedData(toScan);
451 if (toScan.IsEmpty()) {
452 return NS_OK;
454 } else if (context == mContext) {
455 // Don't parse the same part of the document twice.
456 nsScannerIterator end;
457 context->mScanner->EndReading(end);
459 nsScannerIterator start;
460 context->mScanner->CurrentPosition(start);
462 if (mNumConsumed > context->mNumConsumed) {
463 // We consumed more the last time we tried speculatively parsing than we
464 // did the last time we actually parsed.
465 PRUint32 distance = Distance(start, end);
466 start.advance(PR_MIN(mNumConsumed - context->mNumConsumed, distance));
469 if (start == end) {
470 // We're at the end of this context's buffer, nothing else to do.
471 return NS_OK;
474 CopyUnicodeTo(start, end, toScan);
475 } else {
476 // Grab all of the context.
477 context->mScanner->CopyUnusedData(toScan);
478 if (toScan.IsEmpty()) {
479 // Nothing to parse, don't do anything.
480 return NS_OK;
484 nsCAutoString charset;
485 PRInt32 source;
486 aParser->GetDocumentCharset(charset, source);
488 mScanner = new nsScanner(toScan, charset, source);
489 if (!mScanner) {
490 return NS_ERROR_OUT_OF_MEMORY;
493 mDocument.swap(doc);
494 mKeepParsing = 1;
495 mCurrentlyParsing = 1;
496 mContext = context;
497 return aParser->ThreadPool()->Dispatch(this, NS_DISPATCH_NORMAL);
500 void
501 nsSpeculativeScriptThread::StopParsing(PRBool /*aFromDocWrite*/)
503 NS_ASSERTION(NS_IsMainThread(), "Can't stop parsing from another thread");
505 if (!mLock.get()) {
506 // If we bailed early out of StartParsing, don't do anything.
507 return;
511 nsAutoLock al(mLock.get());
513 mKeepParsing = 0;
514 if (mCurrentlyParsing) {
515 PR_WaitCondVar(mCVar.get(), PR_INTERVAL_NO_TIMEOUT);
516 NS_ASSERTION(!mCurrentlyParsing, "Didn't actually stop parsing?");
520 // The thread is now idle.
521 if (mTerminated) {
522 // If we're terminated, then we need to ensure that we release our document
523 // and tokenizer here on the main thread so that our last reference to them
524 // isn't our alter-ego rescheduled on another thread.
525 mDocument = nsnull;
526 mTokenizer = nsnull;
527 mScanner = nsnull;
528 } else if (mNumURIs) {
529 // Note: Don't do this if we're terminated.
530 nsPreloadURIs::PreloadURIs(mURIs, this);
531 mNumURIs = 0;
532 mURIs.Clear();
535 // Note: Currently, we pop the tokens off (see the comment in Run) so this
536 // isn't a problem. If and when we actually use the tokens created
537 // off-thread, we'll need to use aFromDocWrite for real.
540 void
541 nsSpeculativeScriptThread::ProcessToken(CToken *aToken)
543 // Only called on the speculative script thread.
545 CHTMLToken *token = static_cast<CHTMLToken *>(aToken);
546 switch (static_cast<eHTMLTokenTypes>(token->GetTokenType())) {
547 case eToken_start: {
548 CStartToken *start = static_cast<CStartToken *>(aToken);
549 nsHTMLTag tag = static_cast<nsHTMLTag>(start->GetTypeID());
550 PRInt16 attrs = start->GetAttributeCount();
551 PRInt16 i = 0;
552 nsAutoString src;
553 nsAutoString elementType;
554 nsAutoString charset;
555 PrefetchType ptype;
557 switch (tag) {
558 #if 0 // TODO Support stylesheet and image preloading.
559 case eHTMLTag_link: {
560 // If this is a <link rel=stylesheet> find the src.
561 PRBool isRelStylesheet = PR_FALSE;
562 for (; i < attrs; ++i) {
563 CAttributeToken *attr = static_cast<CAttributeToken *>(mTokenizer->PopToken());
564 NS_ASSERTION(attr->GetTokenType() == eToken_attribute, "Weird token");
566 if (attr->GetKey().EqualsLiteral("rel")) {
567 if (!attr->GetValue().EqualsLiteral("stylesheet")) {
568 IF_FREE(attr, &mTokenAllocator);
569 break;
571 isRelStylesheet = PR_TRUE;
572 } else if (attr->GetKey().EqualsLiteral("src")) {
573 src.Assign(attr->GetValue());
574 if (isRelStylesheet) {
575 IF_FREE(attr, &mTokenAllocator);
576 break;
580 IF_FREE(attr, &mTokenAllocator);
583 if (isRelStylesheet && !src.IsEmpty()) {
584 AddToPrefetchList(src, STYLESHEET);
586 break;
589 case eHTMLTag_style:
590 ptype = STYLESHEET;
591 case eHTMLTag_img:
592 if (tag == eHTMLTag_img)
593 ptype = IMAGE;
594 #endif
595 case eHTMLTag_script:
596 if (tag == eHTMLTag_script)
597 ptype = SCRIPT;
599 for (; i < attrs; ++i) {
600 CAttributeToken *attr = static_cast<CAttributeToken *>(mTokenizer->PopToken());
601 NS_ASSERTION(attr->GetTokenType() == eToken_attribute, "Weird token");
603 if (attr->GetKey().EqualsLiteral("src")) {
604 src.Assign(attr->GetValue());
605 } else if (attr->GetKey().EqualsLiteral("charset")) {
606 charset.Assign(attr->GetValue());
607 } else if (attr->GetKey().EqualsLiteral("type")) {
608 elementType.Assign(attr->GetValue());
610 IF_FREE(attr, &mTokenAllocator);
613 if (!src.IsEmpty()) {
614 AddToPrefetchList(src, charset, elementType, ptype);
616 break;
618 default:
619 break;
622 for (; i < attrs; ++i) {
623 CToken *attr = mTokenizer->PopToken();
624 if (!attr) {
625 break;
627 NS_ASSERTION(attr->GetTokenType() == eToken_attribute, "Weird token");
628 IF_FREE(attr, &mTokenAllocator);
631 break;
634 default:
635 break;
638 IF_FREE(aToken, &mTokenAllocator);
641 void
642 nsSpeculativeScriptThread::AddToPrefetchList(const nsAString &src,
643 const nsAString &charset,
644 const nsAString &elementType,
645 PrefetchType type)
647 PrefetchEntry *pe = mURIs.InsertElementAt(mNumURIs++);
648 pe->type = type;
649 pe->uri = src;
650 pe->charset = charset;
651 pe->elementType = elementType;
653 if (mNumURIs == kBatchPrefetchURIs) {
654 nsCOMPtr<nsIRunnable> r = new nsPreloadURIs(mURIs, this);
656 mNumURIs = 0;
657 mURIs.Clear();
658 NS_DispatchToMainThread(r, NS_DISPATCH_NORMAL);
662 nsICharsetAlias* nsParser::sCharsetAliasService = nsnull;
663 nsICharsetConverterManager* nsParser::sCharsetConverterManager = nsnull;
664 nsIThreadPool* nsParser::sSpeculativeThreadPool = nsnull;
667 * This gets called when the htmlparser module is initialized.
669 // static
670 nsresult
671 nsParser::Init()
673 nsresult rv;
674 nsCOMPtr<nsICategoryManager> cm =
675 do_GetService(NS_CATEGORYMANAGER_CONTRACTID, &rv);
676 NS_ENSURE_SUCCESS(rv, rv);
678 nsCOMPtr<nsISimpleEnumerator> e;
679 rv = cm->EnumerateCategory("Parser data listener", getter_AddRefs(e));
680 NS_ENSURE_SUCCESS(rv, rv);
682 nsCAutoString categoryEntry;
683 nsXPIDLCString contractId;
684 nsCOMPtr<nsISupports> entry;
686 while (NS_SUCCEEDED(e->GetNext(getter_AddRefs(entry)))) {
687 nsCOMPtr<nsISupportsCString> category(do_QueryInterface(entry));
689 if (!category) {
690 NS_WARNING("Category entry not an nsISupportsCString!");
691 continue;
694 rv = category->GetData(categoryEntry);
695 NS_ENSURE_SUCCESS(rv, rv);
697 rv = cm->GetCategoryEntry("Parser data listener", categoryEntry.get(),
698 getter_Copies(contractId));
699 NS_ENSURE_SUCCESS(rv, rv);
701 nsCOMPtr<nsIUnicharStreamListener> listener =
702 do_CreateInstance(contractId.get());
704 if (listener) {
705 if (!sParserDataListeners) {
706 sParserDataListeners = new nsCOMArray<nsIUnicharStreamListener>();
708 if (!sParserDataListeners)
709 return NS_ERROR_OUT_OF_MEMORY;
712 sParserDataListeners->AppendObject(listener);
716 nsCOMPtr<nsICharsetAlias> charsetAlias =
717 do_GetService(NS_CHARSETALIAS_CONTRACTID, &rv);
718 NS_ENSURE_SUCCESS(rv, rv);
720 nsCOMPtr<nsICharsetConverterManager> charsetConverter =
721 do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
722 NS_ENSURE_SUCCESS(rv, rv);
724 charsetAlias.swap(sCharsetAliasService);
725 charsetConverter.swap(sCharsetConverterManager);
727 nsCOMPtr<nsIThreadPool> threadPool =
728 do_CreateInstance(NS_THREADPOOL_CONTRACTID, &rv);
729 NS_ENSURE_SUCCESS(rv, rv);
731 rv = threadPool->SetThreadLimit(kSpeculativeThreadLimit);
732 NS_ENSURE_SUCCESS(rv, rv);
734 rv = threadPool->SetIdleThreadLimit(kIdleThreadLimit);
735 NS_ENSURE_SUCCESS(rv, rv);
737 rv = threadPool->SetIdleThreadTimeout(kIdleThreadTimeout);
738 NS_ENSURE_SUCCESS(rv, rv);
740 threadPool.swap(sSpeculativeThreadPool);
742 return NS_OK;
747 * This gets called when the htmlparser module is shutdown.
749 // static
750 void nsParser::Shutdown()
752 delete sParserDataListeners;
753 sParserDataListeners = nsnull;
755 NS_IF_RELEASE(sCharsetAliasService);
756 NS_IF_RELEASE(sCharsetConverterManager);
757 if (sSpeculativeThreadPool) {
758 sSpeculativeThreadPool->Shutdown();
759 NS_RELEASE(sSpeculativeThreadPool);
763 #ifdef DEBUG
764 static PRBool gDumpContent=PR_FALSE;
765 #endif
768 * default constructor
770 nsParser::nsParser()
772 Initialize(PR_TRUE);
775 nsParser::~nsParser()
777 Cleanup();
780 void
781 nsParser::Initialize(PRBool aConstructor)
783 #ifdef NS_DEBUG
784 if (!gDumpContent) {
785 gDumpContent = PR_GetEnv("PARSER_DUMP_CONTENT") != nsnull;
787 #endif
789 if (aConstructor) {
790 // Raw pointer
791 mParserContext = 0;
793 else {
794 // nsCOMPtrs
795 mObserver = nsnull;
796 mParserFilter = nsnull;
797 mUnusedInput.Truncate();
800 mContinueEvent = nsnull;
801 mCharsetSource = kCharsetUninitialized;
802 mCharset.AssignLiteral("ISO-8859-1");
803 mInternalState = NS_OK;
804 mStreamStatus = 0;
805 mCommand = eViewNormal;
806 mFlags = NS_PARSER_FLAG_OBSERVERS_ENABLED |
807 NS_PARSER_FLAG_PARSER_ENABLED |
808 NS_PARSER_FLAG_CAN_TOKENIZE;
809 mScriptsExecuting = 0;
811 MOZ_TIMER_DEBUGLOG(("Reset: Parse Time: nsParser::nsParser(), this=%p\n", this));
812 MOZ_TIMER_RESET(mParseTime);
813 MOZ_TIMER_RESET(mDTDTime);
814 MOZ_TIMER_RESET(mTokenizeTime);
817 void
818 nsParser::Cleanup()
820 #ifdef NS_DEBUG
821 if (gDumpContent) {
822 if (mSink) {
823 // Sink (HTMLContentSink at this time) supports nsIDebugDumpContent
824 // interface. We can get to the content model through the sink.
825 nsresult result = NS_OK;
826 nsCOMPtr<nsIDebugDumpContent> trigger = do_QueryInterface(mSink, &result);
827 if (NS_SUCCEEDED(result)) {
828 trigger->DumpContentModel();
832 #endif
834 #ifdef DEBUG
835 if (mParserContext && mParserContext->mPrevContext) {
836 NS_WARNING("Extra parser contexts still on the parser stack");
838 #endif
840 while (mParserContext) {
841 CParserContext *pc = mParserContext->mPrevContext;
842 delete mParserContext;
843 mParserContext = pc;
846 // It should not be possible for this flag to be set when we are getting
847 // destroyed since this flag implies a pending nsParserContinueEvent, which
848 // has an owning reference to |this|.
849 NS_ASSERTION(!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT), "bad");
850 if (mSpeculativeScriptThread) {
851 mSpeculativeScriptThread->Terminate();
852 mSpeculativeScriptThread = nsnull;
856 NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser)
858 NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser)
859 NS_IMPL_CYCLE_COLLECTION_UNLINK_NSCOMPTR(mSink)
860 NS_IMPL_CYCLE_COLLECTION_UNLINK_NSCOMPTR(mObserver)
861 NS_IMPL_CYCLE_COLLECTION_UNLINK_END
863 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser)
864 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_NSCOMPTR(mSink)
865 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_NSCOMPTR(mObserver)
866 CParserContext *pc = tmp->mParserContext;
867 while (pc) {
868 cb.NoteXPCOMChild(pc->mDTD);
869 cb.NoteXPCOMChild(pc->mTokenizer);
870 pc = pc->mPrevContext;
872 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
874 NS_IMPL_CYCLE_COLLECTING_ADDREF_AMBIGUOUS(nsParser, nsIParser)
875 NS_IMPL_CYCLE_COLLECTING_RELEASE_AMBIGUOUS(nsParser, nsIParser)
876 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser)
877 NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
878 NS_INTERFACE_MAP_ENTRY(nsIParser)
879 NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
880 NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIParser)
881 NS_INTERFACE_MAP_END
883 // The parser continue event is posted only if
884 // all of the data to parse has been passed to ::OnDataAvailable
885 // and the parser has been interrupted by the content sink
886 // because the processing of tokens took too long.
888 nsresult
889 nsParser::PostContinueEvent()
891 if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT)) {
892 // If this flag isn't set, then there shouldn't be a live continue event!
893 NS_ASSERTION(!mContinueEvent, "bad");
895 // This creates a reference cycle between this and the event that is
896 // broken when the event fires.
897 nsCOMPtr<nsIRunnable> event = new nsParserContinueEvent(this);
898 if (NS_FAILED(NS_DispatchToCurrentThread(event))) {
899 NS_WARNING("failed to dispatch parser continuation event");
900 } else {
901 mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
902 mContinueEvent = event;
905 return NS_OK;
908 NS_IMETHODIMP_(void)
909 nsParser::SetParserFilter(nsIParserFilter * aFilter)
911 mParserFilter = aFilter;
914 NS_IMETHODIMP_(void)
915 nsParser::GetCommand(nsCString& aCommand)
917 aCommand = mCommandStr;
921 * Call this method once you've created a parser, and want to instruct it
922 * about the command which caused the parser to be constructed. For example,
923 * this allows us to select a DTD which can do, say, view-source.
925 * @param aCommand the command string to set
927 NS_IMETHODIMP_(void)
928 nsParser::SetCommand(const char* aCommand)
930 mCommandStr.Assign(aCommand);
931 if (mCommandStr.Equals(kViewSourceCommand)) {
932 mCommand = eViewSource;
933 } else if (mCommandStr.Equals(kViewFragmentCommand)) {
934 mCommand = eViewFragment;
935 } else {
936 mCommand = eViewNormal;
941 * Call this method once you've created a parser, and want to instruct it
942 * about the command which caused the parser to be constructed. For example,
943 * this allows us to select a DTD which can do, say, view-source.
945 * @param aParserCommand the command to set
947 NS_IMETHODIMP_(void)
948 nsParser::SetCommand(eParserCommands aParserCommand)
950 mCommand = aParserCommand;
954 * Call this method once you've created a parser, and want to instruct it
955 * about what charset to load
957 * @param aCharset- the charset of a document
958 * @param aCharsetSource- the source of the charset
960 NS_IMETHODIMP_(void)
961 nsParser::SetDocumentCharset(const nsACString& aCharset, PRInt32 aCharsetSource)
963 mCharset = aCharset;
964 mCharsetSource = aCharsetSource;
965 if (mParserContext && mParserContext->mScanner) {
966 mParserContext->mScanner->SetDocumentCharset(aCharset, aCharsetSource);
970 void
971 nsParser::SetSinkCharset(nsACString& aCharset)
973 if (mSink) {
974 mSink->SetDocumentCharset(aCharset);
979 * This method gets called in order to set the content
980 * sink for this parser to dump nodes to.
982 * @param nsIContentSink interface for node receiver
984 NS_IMETHODIMP_(void)
985 nsParser::SetContentSink(nsIContentSink* aSink)
987 NS_PRECONDITION(aSink, "sink cannot be null!");
988 mSink = aSink;
990 if (mSink) {
991 mSink->SetParser(this);
996 * retrieve the sink set into the parser
997 * @return current sink
999 NS_IMETHODIMP_(nsIContentSink*)
1000 nsParser::GetContentSink()
1002 return mSink;
1006 * Retrieve parsemode from topmost parser context
1008 * @return parsemode
1010 NS_IMETHODIMP_(nsDTDMode)
1011 nsParser::GetParseMode()
1013 if (mParserContext) {
1014 return mParserContext->mDTDMode;
1016 NS_NOTREACHED("no parser context");
1017 return eDTDMode_unknown;
1021 * Determine what DTD mode (and thus what layout nsCompatibility mode)
1022 * to use for this document based on the first chunk of data received
1023 * from the network (each parsercontext can have its own mode). (No,
1024 * this is not an optimal solution -- we really don't need to know until
1025 * after we've received the DOCTYPE, and this could easily be part of
1026 * the regular parsing process if the parser were designed in a way that
1027 * made such modifications easy.)
1030 // Parse the PS production in the SGML spec (excluding the part dealing
1031 // with entity references) starting at theIndex into theBuffer, and
1032 // return the first index after the end of the production.
1033 static PRInt32
1034 ParsePS(const nsString& aBuffer, PRInt32 aIndex)
1036 for (;;) {
1037 PRUnichar ch = aBuffer.CharAt(aIndex);
1038 if ((ch == PRUnichar(' ')) || (ch == PRUnichar('\t')) ||
1039 (ch == PRUnichar('\n')) || (ch == PRUnichar('\r'))) {
1040 ++aIndex;
1041 } else if (ch == PRUnichar('-')) {
1042 PRInt32 tmpIndex;
1043 if (aBuffer.CharAt(aIndex+1) == PRUnichar('-') &&
1044 kNotFound != (tmpIndex=aBuffer.Find("--",PR_FALSE,aIndex+2,-1))) {
1045 aIndex = tmpIndex + 2;
1046 } else {
1047 return aIndex;
1049 } else {
1050 return aIndex;
1055 #define PARSE_DTD_HAVE_DOCTYPE (1<<0)
1056 #define PARSE_DTD_HAVE_PUBLIC_ID (1<<1)
1057 #define PARSE_DTD_HAVE_SYSTEM_ID (1<<2)
1058 #define PARSE_DTD_HAVE_INTERNAL_SUBSET (1<<3)
1060 // return PR_TRUE on success (includes not present), PR_FALSE on failure
1061 static PRBool
1062 ParseDocTypeDecl(const nsString &aBuffer,
1063 PRInt32 *aResultFlags,
1064 nsString &aPublicID,
1065 nsString &aSystemID)
1067 PRBool haveDoctype = PR_FALSE;
1068 *aResultFlags = 0;
1070 // Skip through any comments and processing instructions
1071 // The PI-skipping is a bit of a hack.
1072 PRInt32 theIndex = 0;
1073 do {
1074 theIndex = aBuffer.FindChar('<', theIndex);
1075 if (theIndex == kNotFound) break;
1076 PRUnichar nextChar = aBuffer.CharAt(theIndex+1);
1077 if (nextChar == PRUnichar('!')) {
1078 PRInt32 tmpIndex = theIndex + 2;
1079 if (kNotFound !=
1080 (theIndex=aBuffer.Find("DOCTYPE", PR_TRUE, tmpIndex, 0))) {
1081 haveDoctype = PR_TRUE;
1082 theIndex += 7; // skip "DOCTYPE"
1083 break;
1085 theIndex = ParsePS(aBuffer, tmpIndex);
1086 theIndex = aBuffer.FindChar('>', theIndex);
1087 } else if (nextChar == PRUnichar('?')) {
1088 theIndex = aBuffer.FindChar('>', theIndex);
1089 } else {
1090 break;
1092 } while (theIndex != kNotFound);
1094 if (!haveDoctype)
1095 return PR_TRUE;
1096 *aResultFlags |= PARSE_DTD_HAVE_DOCTYPE;
1098 theIndex = ParsePS(aBuffer, theIndex);
1099 theIndex = aBuffer.Find("HTML", PR_TRUE, theIndex, 0);
1100 if (kNotFound == theIndex)
1101 return PR_FALSE;
1102 theIndex = ParsePS(aBuffer, theIndex+4);
1103 PRInt32 tmpIndex = aBuffer.Find("PUBLIC", PR_TRUE, theIndex, 0);
1105 if (kNotFound != tmpIndex) {
1106 theIndex = ParsePS(aBuffer, tmpIndex+6);
1108 // We get here only if we've read <!DOCTYPE HTML PUBLIC
1109 // (not case sensitive) possibly with comments within.
1111 // Now find the beginning and end of the public identifier
1112 // and the system identifier (if present).
1114 PRUnichar lit = aBuffer.CharAt(theIndex);
1115 if ((lit != PRUnichar('\"')) && (lit != PRUnichar('\'')))
1116 return PR_FALSE;
1118 // Start is the first character, excluding the quote, and End is
1119 // the final quote, so there are (end-start) characters.
1121 PRInt32 PublicIDStart = theIndex + 1;
1122 PRInt32 PublicIDEnd = aBuffer.FindChar(lit, PublicIDStart);
1123 if (kNotFound == PublicIDEnd)
1124 return PR_FALSE;
1125 theIndex = ParsePS(aBuffer, PublicIDEnd + 1);
1126 PRUnichar next = aBuffer.CharAt(theIndex);
1127 if (next == PRUnichar('>')) {
1128 // There was a public identifier, but no system
1129 // identifier,
1130 // so do nothing.
1131 // This is needed to avoid the else at the end, and it's
1132 // also the most common case.
1133 } else if ((next == PRUnichar('\"')) ||
1134 (next == PRUnichar('\''))) {
1135 // We found a system identifier.
1136 *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID;
1137 PRInt32 SystemIDStart = theIndex + 1;
1138 PRInt32 SystemIDEnd = aBuffer.FindChar(next, SystemIDStart);
1139 if (kNotFound == SystemIDEnd)
1140 return PR_FALSE;
1141 aSystemID =
1142 Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart);
1143 } else if (next == PRUnichar('[')) {
1144 // We found an internal subset.
1145 *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET;
1146 } else {
1147 // Something's wrong.
1148 return PR_FALSE;
1151 // Since a public ID is a minimum literal, we must trim
1152 // and collapse whitespace
1153 aPublicID = Substring(aBuffer, PublicIDStart, PublicIDEnd - PublicIDStart);
1154 aPublicID.CompressWhitespace(PR_TRUE, PR_TRUE);
1155 *aResultFlags |= PARSE_DTD_HAVE_PUBLIC_ID;
1156 } else {
1157 tmpIndex=aBuffer.Find("SYSTEM", PR_TRUE, theIndex, 0);
1158 if (kNotFound != tmpIndex) {
1159 // DOCTYPES with system ID but no Public ID
1160 *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID;
1162 theIndex = ParsePS(aBuffer, tmpIndex+6);
1163 PRUnichar next = aBuffer.CharAt(theIndex);
1164 if (next != PRUnichar('\"') && next != PRUnichar('\''))
1165 return PR_FALSE;
1167 PRInt32 SystemIDStart = theIndex + 1;
1168 PRInt32 SystemIDEnd = aBuffer.FindChar(next, SystemIDStart);
1170 if (kNotFound == SystemIDEnd)
1171 return PR_FALSE;
1172 aSystemID =
1173 Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart);
1174 theIndex = ParsePS(aBuffer, SystemIDEnd + 1);
1177 PRUnichar nextChar = aBuffer.CharAt(theIndex);
1178 if (nextChar == PRUnichar('['))
1179 *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET;
1180 else if (nextChar != PRUnichar('>'))
1181 return PR_FALSE;
1183 return PR_TRUE;
1186 struct PubIDInfo
1188 enum eMode {
1189 eQuirks, /* always quirks mode, unless there's an internal subset */
1190 eAlmostStandards,/* eCompatibility_AlmostStandards */
1191 eFullStandards /* eCompatibility_FullStandards */
1193 * public IDs that should trigger strict mode are not listed
1194 * since we want all future public IDs to trigger strict mode as
1195 * well
1199 const char* name;
1200 eMode mode_if_no_sysid;
1201 eMode mode_if_sysid;
1204 #define ELEMENTS_OF(array_) (sizeof(array_)/sizeof(array_[0]))
1206 // These must be in nsCRT::strcmp order so binary-search can be used.
1207 // This is verified, |#ifdef DEBUG|, below.
1209 // Even though public identifiers should be case sensitive, we will do
1210 // all comparisons after converting to lower case in order to do
1211 // case-insensitive comparison since there are a number of existing web
1212 // sites that use the incorrect case. Therefore all of the public
1213 // identifiers below are in lower case (with the correct case following,
1214 // in comments). The case is verified, |#ifdef DEBUG|, below.
1215 static const PubIDInfo kPublicIDs[] = {
1216 {"+//silmaril//dtd html pro v0r11 19970101//en" /* "+//Silmaril//dtd html Pro v0r11 19970101//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1217 {"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en" /* "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1218 {"-//as//dtd html 3.0 aswedit + extensions//en" /* "-//AS//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1219 {"-//ietf//dtd html 2.0 level 1//en" /* "-//IETF//DTD HTML 2.0 Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1220 {"-//ietf//dtd html 2.0 level 2//en" /* "-//IETF//DTD HTML 2.0 Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1221 {"-//ietf//dtd html 2.0 strict level 1//en" /* "-//IETF//DTD HTML 2.0 Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1222 {"-//ietf//dtd html 2.0 strict level 2//en" /* "-//IETF//DTD HTML 2.0 Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1223 {"-//ietf//dtd html 2.0 strict//en" /* "-//IETF//DTD HTML 2.0 Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1224 {"-//ietf//dtd html 2.0//en" /* "-//IETF//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1225 {"-//ietf//dtd html 2.1e//en" /* "-//IETF//DTD HTML 2.1E//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1226 {"-//ietf//dtd html 3.0//en" /* "-//IETF//DTD HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1227 {"-//ietf//dtd html 3.0//en//" /* "-//IETF//DTD HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1228 {"-//ietf//dtd html 3.2 final//en" /* "-//IETF//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1229 {"-//ietf//dtd html 3.2//en" /* "-//IETF//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1230 {"-//ietf//dtd html 3//en" /* "-//IETF//DTD HTML 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1231 {"-//ietf//dtd html level 0//en" /* "-//IETF//DTD HTML Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1232 {"-//ietf//dtd html level 0//en//2.0" /* "-//IETF//DTD HTML Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1233 {"-//ietf//dtd html level 1//en" /* "-//IETF//DTD HTML Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1234 {"-//ietf//dtd html level 1//en//2.0" /* "-//IETF//DTD HTML Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1235 {"-//ietf//dtd html level 2//en" /* "-//IETF//DTD HTML Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1236 {"-//ietf//dtd html level 2//en//2.0" /* "-//IETF//DTD HTML Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1237 {"-//ietf//dtd html level 3//en" /* "-//IETF//DTD HTML Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1238 {"-//ietf//dtd html level 3//en//3.0" /* "-//IETF//DTD HTML Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1239 {"-//ietf//dtd html strict level 0//en" /* "-//IETF//DTD HTML Strict Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1240 {"-//ietf//dtd html strict level 0//en//2.0" /* "-//IETF//DTD HTML Strict Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1241 {"-//ietf//dtd html strict level 1//en" /* "-//IETF//DTD HTML Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1242 {"-//ietf//dtd html strict level 1//en//2.0" /* "-//IETF//DTD HTML Strict Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1243 {"-//ietf//dtd html strict level 2//en" /* "-//IETF//DTD HTML Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1244 {"-//ietf//dtd html strict level 2//en//2.0" /* "-//IETF//DTD HTML Strict Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1245 {"-//ietf//dtd html strict level 3//en" /* "-//IETF//DTD HTML Strict Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1246 {"-//ietf//dtd html strict level 3//en//3.0" /* "-//IETF//DTD HTML Strict Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1247 {"-//ietf//dtd html strict//en" /* "-//IETF//DTD HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1248 {"-//ietf//dtd html strict//en//2.0" /* "-//IETF//DTD HTML Strict//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1249 {"-//ietf//dtd html strict//en//3.0" /* "-//IETF//DTD HTML Strict//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1250 {"-//ietf//dtd html//en" /* "-//IETF//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1251 {"-//ietf//dtd html//en//2.0" /* "-//IETF//DTD HTML//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1252 {"-//ietf//dtd html//en//3.0" /* "-//IETF//DTD HTML//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1253 {"-//metrius//dtd metrius presentational//en" /* "-//Metrius//DTD Metrius Presentational//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1254 {"-//microsoft//dtd internet explorer 2.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1255 {"-//microsoft//dtd internet explorer 2.0 html//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1256 {"-//microsoft//dtd internet explorer 2.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 2.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1257 {"-//microsoft//dtd internet explorer 3.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1258 {"-//microsoft//dtd internet explorer 3.0 html//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1259 {"-//microsoft//dtd internet explorer 3.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 3.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1260 {"-//netscape comm. corp.//dtd html//en" /* "-//Netscape Comm. Corp.//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1261 {"-//netscape comm. corp.//dtd strict html//en" /* "-//Netscape Comm. Corp.//DTD Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1262 {"-//o'reilly and associates//dtd html 2.0//en" /* "-//O'Reilly and Associates//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1263 {"-//o'reilly and associates//dtd html extended 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1264 {"-//o'reilly and associates//dtd html extended relaxed 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1265 {"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//en" /* "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1266 {"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//en" /* "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1267 {"-//spyglass//dtd html 2.0 extended//en" /* "-//Spyglass//DTD HTML 2.0 Extended//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1268 {"-//sq//dtd html 2.0 hotmetal + extensions//en" /* "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1269 {"-//sun microsystems corp.//dtd hotjava html//en" /* "-//Sun Microsystems Corp.//DTD HotJava HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1270 {"-//sun microsystems corp.//dtd hotjava strict html//en" /* "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1271 {"-//w3c//dtd html 3 1995-03-24//en" /* "-//W3C//DTD HTML 3 1995-03-24//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1272 {"-//w3c//dtd html 3.2 draft//en" /* "-//W3C//DTD HTML 3.2 Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1273 {"-//w3c//dtd html 3.2 final//en" /* "-//W3C//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1274 {"-//w3c//dtd html 3.2//en" /* "-//W3C//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1275 {"-//w3c//dtd html 3.2s draft//en" /* "-//W3C//DTD HTML 3.2S Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1276 {"-//w3c//dtd html 4.0 frameset//en" /* "-//W3C//DTD HTML 4.0 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1277 {"-//w3c//dtd html 4.0 transitional//en" /* "-//W3C//DTD HTML 4.0 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1278 {"-//w3c//dtd html 4.01 frameset//en" /* "-//W3C//DTD HTML 4.01 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards},
1279 {"-//w3c//dtd html 4.01 transitional//en" /* "-//W3C//DTD HTML 4.01 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards},
1280 {"-//w3c//dtd html experimental 19960712//en" /* "-//W3C//DTD HTML Experimental 19960712//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1281 {"-//w3c//dtd html experimental 970421//en" /* "-//W3C//DTD HTML Experimental 970421//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1282 {"-//w3c//dtd w3 html//en" /* "-//W3C//DTD W3 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1283 {"-//w3c//dtd xhtml 1.0 frameset//en" /* "-//W3C//DTD XHTML 1.0 Frameset//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards},
1284 {"-//w3c//dtd xhtml 1.0 transitional//en" /* "-//W3C//DTD XHTML 1.0 Transitional//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards},
1285 {"-//w3o//dtd w3 html 3.0//en" /* "-//W3O//DTD W3 HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1286 {"-//w3o//dtd w3 html 3.0//en//" /* "-//W3O//DTD W3 HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1287 {"-//w3o//dtd w3 html strict 3.0//en//" /* "-//W3O//DTD W3 HTML Strict 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1288 {"-//webtechs//dtd mozilla html 2.0//en" /* "-//WebTechs//DTD Mozilla HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1289 {"-//webtechs//dtd mozilla html//en" /* "-//WebTechs//DTD Mozilla HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1290 {"-/w3c/dtd html 4.0 transitional/en" /* "-/W3C/DTD HTML 4.0 Transitional/EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1291 {"html" /* "HTML" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
1294 #ifdef DEBUG
1295 static void
1296 VerifyPublicIDs()
1298 static PRBool gVerified = PR_FALSE;
1299 if (!gVerified) {
1300 gVerified = PR_TRUE;
1301 PRUint32 i;
1302 for (i = 0; i < ELEMENTS_OF(kPublicIDs) - 1; ++i) {
1303 if (nsCRT::strcmp(kPublicIDs[i].name, kPublicIDs[i+1].name) >= 0) {
1304 NS_NOTREACHED("doctypes out of order");
1305 printf("Doctypes %s and %s out of order.\n",
1306 kPublicIDs[i].name, kPublicIDs[i+1].name);
1309 for (i = 0; i < ELEMENTS_OF(kPublicIDs); ++i) {
1310 nsCAutoString lcPubID(kPublicIDs[i].name);
1311 ToLowerCase(lcPubID);
1312 if (nsCRT::strcmp(kPublicIDs[i].name, lcPubID.get()) != 0) {
1313 NS_NOTREACHED("doctype not lower case");
1314 printf("Doctype %s not lower case.\n", kPublicIDs[i].name);
1319 #endif
1321 static void
1322 DetermineHTMLParseMode(const nsString& aBuffer,
1323 nsDTDMode& aParseMode,
1324 eParserDocType& aDocType)
1326 #ifdef DEBUG
1327 VerifyPublicIDs();
1328 #endif
1329 PRInt32 resultFlags;
1330 nsAutoString publicIDUCS2, sysIDUCS2;
1331 if (ParseDocTypeDecl(aBuffer, &resultFlags, publicIDUCS2, sysIDUCS2)) {
1332 if (!(resultFlags & PARSE_DTD_HAVE_DOCTYPE)) {
1333 // no DOCTYPE
1334 aParseMode = eDTDMode_quirks;
1335 aDocType = eHTML_Quirks;
1336 } else if ((resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) ||
1337 !(resultFlags & PARSE_DTD_HAVE_PUBLIC_ID)) {
1338 // A doctype with an internal subset is always full_standards.
1339 // A doctype without a public ID is always full_standards.
1340 aDocType = eHTML_Strict;
1341 aParseMode = eDTDMode_full_standards;
1343 // Special hack for IBM's custom DOCTYPE.
1344 if (!(resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) &&
1345 sysIDUCS2 == NS_LITERAL_STRING(
1346 "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")) {
1347 aParseMode = eDTDMode_quirks;
1348 aDocType = eHTML_Quirks;
1351 } else {
1352 // We have to check our list of public IDs to see what to do.
1353 // Yes, we want UCS2 to ASCII lossy conversion.
1354 nsCAutoString publicID;
1355 publicID.AssignWithConversion(publicIDUCS2);
1357 // See comment above definition of kPublicIDs about case
1358 // sensitivity.
1359 ToLowerCase(publicID);
1361 // Binary search to see if we can find the correct public ID
1362 // These must be signed since maximum can go below zero and we'll
1363 // crash if it's unsigned.
1364 PRInt32 minimum = 0;
1365 PRInt32 maximum = ELEMENTS_OF(kPublicIDs) - 1;
1366 PRInt32 index;
1367 for (;;) {
1368 index = (minimum + maximum) / 2;
1369 PRInt32 comparison =
1370 nsCRT::strcmp(publicID.get(), kPublicIDs[index].name);
1371 if (comparison == 0)
1372 break;
1373 if (comparison < 0)
1374 maximum = index - 1;
1375 else
1376 minimum = index + 1;
1378 if (maximum < minimum) {
1379 // The DOCTYPE is not in our list, so it must be full_standards.
1380 aParseMode = eDTDMode_full_standards;
1381 aDocType = eHTML_Strict;
1382 return;
1386 switch ((resultFlags & PARSE_DTD_HAVE_SYSTEM_ID)
1387 ? kPublicIDs[index].mode_if_sysid
1388 : kPublicIDs[index].mode_if_no_sysid)
1390 case PubIDInfo::eQuirks:
1391 aParseMode = eDTDMode_quirks;
1392 aDocType = eHTML_Quirks;
1393 break;
1394 case PubIDInfo::eAlmostStandards:
1395 aParseMode = eDTDMode_almost_standards;
1396 aDocType = eHTML_Strict;
1397 break;
1398 case PubIDInfo::eFullStandards:
1399 aParseMode = eDTDMode_full_standards;
1400 aDocType = eHTML_Strict;
1401 break;
1402 default:
1403 NS_NOTREACHED("no other cases!");
1406 } else {
1407 // badly formed DOCTYPE -> quirks
1408 aParseMode = eDTDMode_quirks;
1409 aDocType = eHTML_Quirks;
1413 static void
1414 DetermineParseMode(const nsString& aBuffer, nsDTDMode& aParseMode,
1415 eParserDocType& aDocType, const nsACString& aMimeType)
1417 if (aMimeType.EqualsLiteral(kHTMLTextContentType)) {
1418 DetermineHTMLParseMode(aBuffer, aParseMode, aDocType);
1419 } else if (aMimeType.EqualsLiteral(kPlainTextContentType) ||
1420 aMimeType.EqualsLiteral(kTextCSSContentType) ||
1421 aMimeType.EqualsLiteral(kApplicationJSContentType) ||
1422 aMimeType.EqualsLiteral(kApplicationXJSContentType) ||
1423 aMimeType.EqualsLiteral(kTextECMAScriptContentType) ||
1424 aMimeType.EqualsLiteral(kApplicationECMAScriptContentType) ||
1425 aMimeType.EqualsLiteral(kTextJSContentType)) {
1426 aDocType = ePlainText;
1427 aParseMode = eDTDMode_quirks;
1428 } else { // Some form of XML
1429 aDocType = eXML;
1430 aParseMode = eDTDMode_full_standards;
1434 static nsresult
1435 FindSuitableDTD(CParserContext& aParserContext)
1437 NS_ASSERTION(!aParserContext.mDTD, "Already found a DTD");
1439 // We always find a DTD.
1440 aParserContext.mAutoDetectStatus = ePrimaryDetect;
1442 #ifdef MOZ_VIEW_SOURCE
1443 // Quick check for view source.
1444 if (aParserContext.mParserCommand == eViewSource) {
1445 aParserContext.mDTD = new CViewSourceHTML();
1446 return aParserContext.mDTD ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
1448 #endif
1450 // Now see if we're parsing HTML (which, as far as we're concerned, simply
1451 // means "not XML").
1452 if (aParserContext.mDocType != eXML) {
1453 aParserContext.mDTD = new CNavDTD();
1454 return aParserContext.mDTD ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
1457 // If we're here, then we'd better be parsing XML.
1458 NS_ASSERTION(aParserContext.mDocType == eXML, "What are you trying to send me, here?");
1459 aParserContext.mDTD = new nsExpatDriver();
1460 return aParserContext.mDTD ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
1463 NS_IMETHODIMP
1464 nsParser::CancelParsingEvents()
1466 if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) {
1467 NS_ASSERTION(mContinueEvent, "mContinueEvent is null");
1468 // Revoke the pending continue parsing event
1469 mContinueEvent = nsnull;
1470 mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
1472 return NS_OK;
1475 ////////////////////////////////////////////////////////////////////////
1479 * This gets called just prior to the model actually
1480 * being constructed. It's important to make this the
1481 * last thing that happens right before parsing, so we
1482 * can delay until the last moment the resolution of
1483 * which DTD to use (unless of course we're assigned one).
1485 nsresult
1486 nsParser::WillBuildModel(nsString& aFilename)
1488 if (!mParserContext)
1489 return kInvalidParserContext;
1491 if (eUnknownDetect != mParserContext->mAutoDetectStatus)
1492 return NS_OK;
1494 if (eDTDMode_unknown == mParserContext->mDTDMode ||
1495 eDTDMode_autodetect == mParserContext->mDTDMode) {
1496 PRUnichar buf[1025];
1497 nsFixedString theBuffer(buf, 1024, 0);
1499 // Grab 1024 characters, starting at the first non-whitespace
1500 // character, to look for the doctype in.
1501 mParserContext->mScanner->Peek(theBuffer, 1024, mParserContext->mScanner->FirstNonWhitespacePosition());
1502 DetermineParseMode(theBuffer, mParserContext->mDTDMode,
1503 mParserContext->mDocType, mParserContext->mMimeType);
1506 nsresult rv = FindSuitableDTD(*mParserContext);
1507 NS_ENSURE_SUCCESS(rv, rv);
1509 nsITokenizer* tokenizer;
1510 rv = mParserContext->GetTokenizer(mParserContext->mDTD->GetType(), mSink, tokenizer);
1511 NS_ENSURE_SUCCESS(rv, rv);
1513 return mParserContext->mDTD->WillBuildModel(*mParserContext, tokenizer, mSink);
1517 * This gets called when the parser is done with its input.
1518 * Note that the parser may have been called recursively, so we
1519 * have to check for a prev. context before closing out the DTD/sink.
1521 nsresult
1522 nsParser::DidBuildModel(nsresult anErrorCode)
1524 nsresult result = anErrorCode;
1526 if (IsComplete()) {
1527 if (mParserContext && !mParserContext->mPrevContext) {
1528 if (mParserContext->mDTD) {
1529 result = mParserContext->mDTD->DidBuildModel(anErrorCode,PR_TRUE,this,mSink);
1532 //Ref. to bug 61462.
1533 mParserContext->mRequest = 0;
1537 return result;
1540 void
1541 nsParser::SpeculativelyParse()
1543 if (mParserContext->mParserCommand == eViewNormal &&
1544 !mParserContext->mMimeType.EqualsLiteral("text/html")) {
1545 return;
1548 if (!mSpeculativeScriptThread) {
1549 mSpeculativeScriptThread = new nsSpeculativeScriptThread();
1550 if (!mSpeculativeScriptThread) {
1551 return;
1555 nsresult rv = mSpeculativeScriptThread->StartParsing(this);
1556 if (NS_FAILED(rv)) {
1557 mSpeculativeScriptThread = nsnull;
1562 * This method adds a new parser context to the list,
1563 * pushing the current one to the next position.
1565 * @param ptr to new context
1567 void
1568 nsParser::PushContext(CParserContext& aContext)
1570 aContext.mPrevContext = mParserContext;
1571 mParserContext = &aContext;
1575 * This method pops the topmost context off the stack,
1576 * returning it to the user. The next context (if any)
1577 * becomes the current context.
1578 * @update gess7/22/98
1579 * @return prev. context
1581 CParserContext*
1582 nsParser::PopContext()
1584 CParserContext* oldContext = mParserContext;
1585 if (oldContext) {
1586 mParserContext = oldContext->mPrevContext;
1587 if (mParserContext) {
1588 // If the old context was blocked, propagate the blocked state
1589 // back to the new one. Also, propagate the stream listener state
1590 // but don't override onStop state to guarantee the call to DidBuildModel().
1591 if (mParserContext->mStreamListenerState != eOnStop) {
1592 mParserContext->mStreamListenerState = oldContext->mStreamListenerState;
1594 // Update the current context's tokenizer to any information gleaned
1595 // while parsing document.write() calls (such as "a plaintext tag was
1596 // found")
1597 if (mParserContext->mTokenizer) {
1598 mParserContext->mTokenizer->CopyState(oldContext->mTokenizer);
1602 return oldContext;
1606 * Call this when you want control whether or not the parser will parse
1607 * and tokenize input (TRUE), or whether it just caches input to be
1608 * parsed later (FALSE).
1610 * @param aState determines whether we parse/tokenize or just cache.
1611 * @return current state
1613 void
1614 nsParser::SetUnusedInput(nsString& aBuffer)
1616 mUnusedInput = aBuffer;
1619 NS_IMETHODIMP_(void *)
1620 nsParser::GetRootContextKey()
1622 CParserContext* pc = mParserContext;
1623 if (!pc) {
1624 return nsnull;
1627 while (pc->mPrevContext) {
1628 pc = pc->mPrevContext;
1631 return pc->mKey;
1635 * Call this when you want to *force* the parser to terminate the
1636 * parsing process altogether. This is binary -- so once you terminate
1637 * you can't resume without restarting altogether.
1639 NS_IMETHODIMP
1640 nsParser::Terminate(void)
1642 // We should only call DidBuildModel once, so don't do anything if this is
1643 // the second time that Terminate has been called.
1644 if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
1645 return NS_OK;
1648 nsresult result = NS_OK;
1649 // XXX - [ until we figure out a way to break parser-sink circularity ]
1650 // Hack - Hold a reference until we are completely done...
1651 nsCOMPtr<nsIParser> kungFuDeathGrip(this);
1652 mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING;
1654 // CancelParsingEvents must be called to avoid leaking the nsParser object
1655 // @see bug 108049
1656 // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then CancelParsingEvents
1657 // will reset it so DidBuildModel will call DidBuildModel on the DTD. Note:
1658 // The IsComplete() call inside of DidBuildModel looks at the pendingContinueEvents flag.
1659 CancelParsingEvents();
1660 if (mSpeculativeScriptThread) {
1661 mSpeculativeScriptThread->Terminate();
1662 mSpeculativeScriptThread = nsnull;
1665 // If we got interrupted in the middle of a document.write, then we might
1666 // have more than one parser context on our parsercontext stack. This has
1667 // the effect of making DidBuildModel a no-op, meaning that we never call
1668 // our sink's DidBuildModel and break the reference cycle, causing a leak.
1669 // Since we're getting terminated, we manually clean up our context stack.
1670 while (mParserContext && mParserContext->mPrevContext) {
1671 CParserContext *prev = mParserContext->mPrevContext;
1672 NS_ASSERTION(prev->mPrevContext || prev->mDTD, "How is there no root DTD?");
1674 delete mParserContext;
1675 mParserContext = prev;
1678 if (mParserContext && mParserContext->mDTD) {
1679 mParserContext->mDTD->Terminate();
1680 DidBuildModel(result);
1681 } else if (mSink) {
1682 // We have no parser context or no DTD yet (so we got terminated before we
1683 // got any data). Manually break the reference cycle with the sink.
1684 result = mSink->DidBuildModel();
1685 NS_ENSURE_SUCCESS(result, result);
1688 return NS_OK;
1691 NS_IMETHODIMP
1692 nsParser::ContinueParsing()
1694 if (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) {
1695 NS_WARNING("Trying to continue parsing on a unblocked parser.");
1696 return NS_OK;
1699 mFlags |= NS_PARSER_FLAG_PARSER_ENABLED;
1701 return ContinueInterruptedParsing();
1704 NS_IMETHODIMP
1705 nsParser::ContinueInterruptedParsing()
1707 // If there are scripts executing, then the content sink is jumping the gun
1708 // (probably due to a synchronous XMLHttpRequest) and will re-enable us
1709 // later, see bug 460706.
1710 if (mScriptsExecuting) {
1711 return NS_OK;
1714 // If the stream has already finished, there's a good chance
1715 // that we might start closing things down when the parser
1716 // is reenabled. To make sure that we're not deleted across
1717 // the reenabling process, hold a reference to ourselves.
1718 nsresult result=NS_OK;
1719 nsCOMPtr<nsIParser> kungFuDeathGrip(this);
1721 #ifdef DEBUG
1722 if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) {
1723 NS_WARNING("Don't call ContinueInterruptedParsing on a blocked parser.");
1725 #endif
1727 if (mSpeculativeScriptThread) {
1728 mSpeculativeScriptThread->StopParsing(PR_FALSE);
1731 PRBool isFinalChunk = mParserContext &&
1732 mParserContext->mStreamListenerState == eOnStop;
1734 if (mSink) {
1735 mSink->WillParse();
1737 result = ResumeParse(PR_TRUE, isFinalChunk); // Ref. bug 57999
1739 if (result != NS_OK) {
1740 result=mInternalState;
1743 return result;
1747 * Stops parsing temporarily. That's it will prevent the
1748 * parser from building up content model.
1750 NS_IMETHODIMP_(void)
1751 nsParser::BlockParser()
1753 mFlags &= ~NS_PARSER_FLAG_PARSER_ENABLED;
1754 MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: nsParser::BlockParser(), this=%p\n", this));
1755 MOZ_TIMER_STOP(mParseTime);
1759 * Open up the parser for tokenization, building up content
1760 * model..etc. However, this method does not resume parsing
1761 * automatically. It's the callers' responsibility to restart
1762 * the parsing engine.
1764 NS_IMETHODIMP_(void)
1765 nsParser::UnblockParser()
1767 if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) {
1768 mFlags |= NS_PARSER_FLAG_PARSER_ENABLED;
1769 MOZ_TIMER_DEBUGLOG(("Start: Parse Time: nsParser::UnblockParser(), this=%p\n", this));
1770 MOZ_TIMER_START(mParseTime);
1771 } else {
1772 NS_WARNING("Trying to unblock an unblocked parser.");
1777 * Call this to query whether the parser is enabled or not.
1779 NS_IMETHODIMP_(PRBool)
1780 nsParser::IsParserEnabled()
1782 return (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) != 0;
1786 * Call this to query whether the parser thinks it's done with parsing.
1788 NS_IMETHODIMP_(PRBool)
1789 nsParser::IsComplete()
1791 return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT);
1795 void nsParser::HandleParserContinueEvent(nsParserContinueEvent *ev)
1797 // Ignore any revoked continue events...
1798 if (mContinueEvent != ev)
1799 return;
1801 mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
1802 mContinueEvent = nsnull;
1804 NS_ASSERTION(mScriptsExecuting == 0, "Interrupted in the middle of a script?");
1805 ContinueInterruptedParsing();
1808 void
1809 nsParser::ScriptExecuting()
1811 ++mScriptsExecuting;
1814 void
1815 nsParser::ScriptDidExecute()
1817 NS_ASSERTION(mScriptsExecuting > 0, "Too many calls to ScriptDidExecute");
1818 --mScriptsExecuting;
1821 nsresult
1822 nsParser::DataAdded(const nsSubstring& aData, nsIRequest *aRequest)
1824 NS_ASSERTION(sParserDataListeners,
1825 "Don't call this with no parser data listeners!");
1827 if (!mSink || !aRequest) {
1828 return NS_OK;
1831 nsISupports *ctx = mSink->GetTarget();
1832 PRInt32 count = sParserDataListeners->Count();
1833 nsresult rv = NS_OK;
1834 PRBool canceled = PR_FALSE;
1836 while (count--) {
1837 rv |= sParserDataListeners->ObjectAt(count)->
1838 OnUnicharDataAvailable(aRequest, ctx, aData);
1840 if (NS_FAILED(rv) && !canceled) {
1841 aRequest->Cancel(rv);
1843 canceled = PR_TRUE;
1847 return rv;
1850 PRBool
1851 nsParser::CanInterrupt()
1853 return (mFlags & NS_PARSER_FLAG_CAN_INTERRUPT) != 0;
1856 void
1857 nsParser::SetCanInterrupt(PRBool aCanInterrupt)
1859 if (aCanInterrupt) {
1860 mFlags |= NS_PARSER_FLAG_CAN_INTERRUPT;
1861 } else {
1862 mFlags &= ~NS_PARSER_FLAG_CAN_INTERRUPT;
1867 * This is the main controlling routine in the parsing process.
1868 * Note that it may get called multiple times for the same scanner,
1869 * since this is a pushed based system, and all the tokens may
1870 * not have been consumed by the scanner during a given invocation
1871 * of this method.
1873 NS_IMETHODIMP
1874 nsParser::Parse(nsIURI* aURL,
1875 nsIRequestObserver* aListener,
1876 void* aKey,
1877 nsDTDMode aMode)
1880 NS_PRECONDITION(aURL, "Error: Null URL given");
1881 NS_ASSERTION(!mSpeculativeScriptThread, "Can't reuse a parser like this");
1883 nsresult result=kBadURL;
1884 mObserver = aListener;
1886 if (aURL) {
1887 nsCAutoString spec;
1888 nsresult rv = aURL->GetSpec(spec);
1889 if (rv != NS_OK) {
1890 return rv;
1892 NS_ConvertUTF8toUTF16 theName(spec);
1894 nsScanner* theScanner = new nsScanner(theName, PR_FALSE, mCharset,
1895 mCharsetSource);
1896 CParserContext* pc = new CParserContext(theScanner, aKey, mCommand,
1897 aListener);
1898 if (pc && theScanner) {
1899 pc->mMultipart = PR_TRUE;
1900 pc->mContextType = CParserContext::eCTURL;
1901 pc->mDTDMode = aMode;
1902 PushContext(*pc);
1904 // Here, and only here, hand this parser off to the scanner. We
1905 // only want to do that here since the only reason the scanner
1906 // needs the parser is to call DataAdded() on it, and that's
1907 // only ever wanted when parsing from an URI.
1908 theScanner->SetParser(this);
1910 result = NS_OK;
1911 } else {
1912 result = mInternalState = NS_ERROR_HTMLPARSER_BADCONTEXT;
1915 return result;
1919 * Call this method if all you want to do is parse 1 string full of HTML text.
1920 * In particular, this method should be called by the DOM when it has an HTML
1921 * string to feed to the parser in real-time.
1923 * @param aSourceBuffer contains a string-full of real content
1924 * @param aMimeType tells us what type of content to expect in the given string
1926 NS_IMETHODIMP
1927 nsParser::Parse(const nsAString& aSourceBuffer,
1928 void* aKey,
1929 const nsACString& aMimeType,
1930 PRBool aLastCall,
1931 nsDTDMode aMode)
1933 nsresult result = NS_OK;
1935 // Don't bother if we're never going to parse this.
1936 if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
1937 return result;
1940 if (!aLastCall && aSourceBuffer.IsEmpty()) {
1941 // Nothing is being passed to the parser so return
1942 // immediately. mUnusedInput will get processed when
1943 // some data is actually passed in.
1944 // But if this is the last call, make sure to finish up
1945 // stuff correctly.
1946 return result;
1949 if (mSpeculativeScriptThread) {
1950 mSpeculativeScriptThread->StopParsing(PR_TRUE);
1953 // Hack to pass on to the dtd the caller's desire to
1954 // parse a fragment without worrying about containment rules
1955 if (aMode == eDTDMode_fragment)
1956 mCommand = eViewFragment;
1958 // Maintain a reference to ourselves so we don't go away
1959 // till we're completely done.
1960 nsCOMPtr<nsIParser> kungFuDeathGrip(this);
1962 if (aLastCall || !aSourceBuffer.IsEmpty() || !mUnusedInput.IsEmpty()) {
1963 // Note: The following code will always find the parser context associated
1964 // with the given key, even if that context has been suspended (e.g., for
1965 // another document.write call). This doesn't appear to be exactly what IE
1966 // does in the case where this happens, but this makes more sense.
1967 CParserContext* pc = mParserContext;
1968 while (pc && pc->mKey != aKey) {
1969 pc = pc->mPrevContext;
1972 if (!pc) {
1973 // Only make a new context if we don't have one, OR if we do, but has a
1974 // different context key.
1975 nsScanner* theScanner = new nsScanner(mUnusedInput, mCharset, mCharsetSource);
1976 NS_ENSURE_TRUE(theScanner, NS_ERROR_OUT_OF_MEMORY);
1978 nsIDTD *theDTD = nsnull;
1979 eAutoDetectResult theStatus = eUnknownDetect;
1981 if (mParserContext && mParserContext->mMimeType == aMimeType) {
1982 // Ref. Bug 90379
1983 NS_ASSERTION(mParserContext->mDTD, "How come the DTD is null?");
1985 if (mParserContext) {
1986 // To fix bug 32263 we used create a new instance of the DTD!.
1987 // All we need is a new tokenizer which now gets created with
1988 // a parser context.
1989 theDTD = mParserContext->mDTD;
1990 theStatus = mParserContext->mAutoDetectStatus;
1991 // Added this to fix bug 32022.
1995 pc = new CParserContext(theScanner, aKey, mCommand,
1996 0, theDTD, theStatus, aLastCall);
1997 NS_ENSURE_TRUE(pc, NS_ERROR_OUT_OF_MEMORY);
1999 PushContext(*pc);
2001 pc->mMultipart = !aLastCall; // By default
2002 if (pc->mPrevContext) {
2003 pc->mMultipart |= pc->mPrevContext->mMultipart;
2006 // Start fix bug 40143
2007 if (pc->mMultipart) {
2008 pc->mStreamListenerState = eOnDataAvail;
2009 if (pc->mScanner) {
2010 pc->mScanner->SetIncremental(PR_TRUE);
2012 } else {
2013 pc->mStreamListenerState = eOnStop;
2014 if (pc->mScanner) {
2015 pc->mScanner->SetIncremental(PR_FALSE);
2018 // end fix for 40143
2020 pc->mContextType=CParserContext::eCTString;
2021 pc->SetMimeType(aMimeType);
2022 if (pc->mPrevContext && aMode == eDTDMode_autodetect) {
2023 // Preserve the DTD mode from the last context, bug 265814.
2024 pc->mDTDMode = pc->mPrevContext->mDTDMode;
2025 } else {
2026 pc->mDTDMode = aMode;
2029 mUnusedInput.Truncate();
2031 pc->mScanner->Append(aSourceBuffer);
2032 // Do not interrupt document.write() - bug 95487
2033 result = ResumeParse(PR_FALSE, PR_FALSE, PR_FALSE);
2034 } else {
2035 pc->mScanner->Append(aSourceBuffer);
2036 if (!pc->mPrevContext) {
2037 // Set stream listener state to eOnStop, on the final context - Fix 68160,
2038 // to guarantee DidBuildModel() call - Fix 36148
2039 if (aLastCall) {
2040 pc->mStreamListenerState = eOnStop;
2041 pc->mScanner->SetIncremental(PR_FALSE);
2044 if (pc == mParserContext) {
2045 // If pc is not mParserContext, then this call to ResumeParse would
2046 // do the wrong thing and try to continue parsing using
2047 // mParserContext. We need to wait to actually resume parsing on pc.
2048 ResumeParse(PR_FALSE, PR_FALSE, PR_FALSE);
2054 return result;
2057 NS_IMETHODIMP
2058 nsParser::ParseFragment(const nsAString& aSourceBuffer,
2059 void* aKey,
2060 nsTArray<nsString>& aTagStack,
2061 PRBool aXMLMode,
2062 const nsACString& aMimeType,
2063 nsDTDMode aMode)
2065 nsresult result = NS_OK;
2066 nsAutoString theContext;
2067 PRUint32 theCount = aTagStack.Length();
2068 PRUint32 theIndex = 0;
2070 // Disable observers for fragments
2071 mFlags &= ~NS_PARSER_FLAG_OBSERVERS_ENABLED;
2073 NS_ASSERTION(!mSpeculativeScriptThread, "Can't reuse a parser like this");
2075 for (theIndex = 0; theIndex < theCount; theIndex++) {
2076 theContext.AppendLiteral("<");
2077 theContext.Append(aTagStack[theCount - theIndex - 1]);
2078 theContext.AppendLiteral(">");
2081 if (theCount == 0) {
2082 // Ensure that the buffer is not empty. Because none of the DTDs care
2083 // about leading whitespace, this doesn't change the result.
2084 theContext.AssignLiteral(" ");
2087 // First, parse the context to build up the DTD's tag stack. Note that we
2088 // pass PR_FALSE for the aLastCall parameter.
2089 result = Parse(theContext, (void*)&theContext, aMimeType, PR_FALSE, aMode);
2090 if (NS_FAILED(result)) {
2091 mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
2092 return result;
2095 nsCOMPtr<nsIFragmentContentSink> fragSink = do_QueryInterface(mSink);
2096 NS_ASSERTION(fragSink, "ParseFragment requires a fragment content sink");
2098 if (!aXMLMode && theCount) {
2099 // First, we have to flush any tags that don't belong in the head if there
2100 // was no <body> in the context.
2101 // XXX This is extremely ugly. Maybe CNavDTD should have FlushMisplaced()?
2102 NS_ASSERTION(mParserContext, "Parsing didn't create a parser context?");
2104 CNavDTD* dtd = static_cast<CNavDTD*>
2105 (static_cast<nsIDTD*>
2106 (mParserContext->mDTD));
2107 NS_ASSERTION(dtd, "How did we parse anything without a dtd?");
2109 CStartToken bodyToken(NS_LITERAL_STRING("BODY"), eHTMLTag_body);
2110 nsCParserNode bodyNode(&bodyToken, 0);
2112 dtd->OpenContainer(&bodyNode, eHTMLTag_body);
2114 // Now parse the flushed out tags.
2115 result = BuildModel();
2116 if (NS_FAILED(result)) {
2117 mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
2118 return result;
2121 // Now that we've flushed all of the tags out of the body, we have to make
2122 // sure that there aren't any context tags left in the scanner.
2123 NS_ASSERTION(mParserContext->mScanner, "Where'd the scanner go?");
2125 PRUnichar next;
2126 if (NS_SUCCEEDED(mParserContext->mScanner->Peek(next))) {
2127 // Uh, oh. This must mean that the context stack has a special tag on
2128 // it, such as <textarea> or <title> that requires its end tag before it
2129 // will be consumed. Tell the content sink that it will be coming.
2130 // Note: For now, we can assume that there is only one such tag.
2131 NS_ASSERTION(next == '<', "The tokenizer failed to consume a token");
2132 fragSink->IgnoreFirstContainer();
2136 fragSink->WillBuildContent();
2137 // Now, parse the actual content. Note that this is the last call
2138 // for HTML content, but for XML, we will want to build and parse
2139 // the end tags. However, if tagStack is empty, it's the last call
2140 // for XML as well.
2141 if (!aXMLMode || (theCount == 0)) {
2142 result = Parse(aSourceBuffer, &theContext, aMimeType,
2143 PR_TRUE, aMode);
2144 fragSink->DidBuildContent();
2145 } else {
2146 // Add an end tag chunk, so expat will read the whole source buffer,
2147 // and not worry about ']]' etc.
2148 result = Parse(aSourceBuffer + NS_LITERAL_STRING("</"),
2149 &theContext, aMimeType, PR_FALSE, aMode);
2150 fragSink->DidBuildContent();
2152 if (NS_SUCCEEDED(result)) {
2153 nsAutoString endContext;
2154 for (theIndex = 0; theIndex < theCount; theIndex++) {
2155 // we already added an end tag chunk above
2156 if (theIndex > 0) {
2157 endContext.AppendLiteral("</");
2160 nsString& thisTag = aTagStack[theIndex];
2161 // was there an xmlns=?
2162 PRInt32 endOfTag = thisTag.FindChar(PRUnichar(' '));
2163 if (endOfTag == -1) {
2164 endContext.Append(thisTag);
2165 } else {
2166 endContext.Append(Substring(thisTag,0,endOfTag));
2169 endContext.AppendLiteral(">");
2172 result = Parse(endContext, &theContext, aMimeType,
2173 PR_TRUE, aMode);
2177 mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
2179 return result;
2183 * This routine is called to cause the parser to continue parsing its
2184 * underlying stream. This call allows the parse process to happen in
2185 * chunks, such as when the content is push based, and we need to parse in
2186 * pieces.
2188 * An interesting change in how the parser gets used has led us to add extra
2189 * processing to this method. The case occurs when the parser is blocked in
2190 * one context, and gets a parse(string) call in another context. In this
2191 * case, the parserContexts are linked. No problem.
2193 * The problem is that Parse(string) assumes that it can proceed unabated,
2194 * but if the parser is already blocked that assumption is false. So we
2195 * needed to add a mechanism here to allow the parser to continue to process
2196 * (the pop and free) contexts until 1) it get's blocked again; 2) it runs
2197 * out of contexts.
2200 * @param allowItertion : set to true if non-script resumption is requested
2201 * @param aIsFinalChunk : tells us when the last chunk of data is provided.
2202 * @return error code -- 0 if ok, non-zero if error.
2204 nsresult
2205 nsParser::ResumeParse(PRBool allowIteration, PRBool aIsFinalChunk,
2206 PRBool aCanInterrupt)
2208 nsresult result = NS_OK;
2210 if ((mFlags & NS_PARSER_FLAG_PARSER_ENABLED) &&
2211 mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
2212 MOZ_TIMER_DEBUGLOG(("Start: Parse Time: nsParser::ResumeParse(), this=%p\n", this));
2213 MOZ_TIMER_START(mParseTime);
2215 NS_ASSERTION(!mSpeculativeScriptThread || !mSpeculativeScriptThread->Parsing(),
2216 "Bad races happening, expect to crash!");
2218 result = WillBuildModel(mParserContext->mScanner->GetFilename());
2219 if (NS_FAILED(result)) {
2220 mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE;
2221 return result;
2224 if (mParserContext->mDTD) {
2225 mParserContext->mDTD->WillResumeParse(mSink);
2226 PRBool theIterationIsOk = PR_TRUE;
2228 while (result == NS_OK && theIterationIsOk) {
2229 if (!mUnusedInput.IsEmpty() && mParserContext->mScanner) {
2230 // -- Ref: Bug# 22485 --
2231 // Insert the unused input into the source buffer
2232 // as if it was read from the input stream.
2233 // Adding UngetReadable() per vidur!!
2234 mParserContext->mScanner->UngetReadable(mUnusedInput);
2235 mUnusedInput.Truncate(0);
2238 // Only allow parsing to be interrupted in the subsequent call to
2239 // build model.
2240 SetCanInterrupt(aCanInterrupt);
2241 nsresult theTokenizerResult = (mFlags & NS_PARSER_FLAG_CAN_TOKENIZE)
2242 ? Tokenize(aIsFinalChunk)
2243 : NS_OK;
2244 result = BuildModel();
2246 if (result == NS_ERROR_HTMLPARSER_INTERRUPTED && aIsFinalChunk) {
2247 PostContinueEvent();
2249 SetCanInterrupt(PR_FALSE);
2251 theIterationIsOk = theTokenizerResult != kEOF &&
2252 result != NS_ERROR_HTMLPARSER_INTERRUPTED;
2254 // Make sure not to stop parsing too early. Therefore, before shutting
2255 // down the parser, it's important to check whether the input buffer
2256 // has been scanned to completion (theTokenizerResult should be kEOF).
2257 // kEOF -> End of buffer.
2259 // If we're told to block the parser, we disable all further parsing
2260 // (and cache any data coming in) until the parser is re-enabled.
2261 if (NS_ERROR_HTMLPARSER_BLOCK == result) {
2262 if (mParserContext->mDTD) {
2263 mParserContext->mDTD->WillInterruptParse(mSink);
2266 if (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) {
2267 // If we were blocked by a recursive invocation, don't re-block.
2268 BlockParser();
2269 SpeculativelyParse();
2271 return NS_OK;
2273 if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
2274 // Note: Parser Terminate() calls DidBuildModel.
2275 if (mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
2276 DidBuildModel(mStreamStatus);
2277 mInternalState = result;
2280 return NS_OK;
2282 if ((NS_OK == result && theTokenizerResult == kEOF) ||
2283 result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
2284 PRBool theContextIsStringBased =
2285 CParserContext::eCTString == mParserContext->mContextType;
2287 if (mParserContext->mStreamListenerState == eOnStop ||
2288 !mParserContext->mMultipart || theContextIsStringBased) {
2289 if (!mParserContext->mPrevContext) {
2290 if (mParserContext->mStreamListenerState == eOnStop) {
2291 DidBuildModel(mStreamStatus);
2293 MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: nsParser::ResumeParse(), this=%p\n", this));
2294 MOZ_TIMER_STOP(mParseTime);
2296 MOZ_TIMER_LOG(("Parse Time (this=%p): ", this));
2297 MOZ_TIMER_PRINT(mParseTime);
2299 MOZ_TIMER_LOG(("DTD Time: "));
2300 MOZ_TIMER_PRINT(mDTDTime);
2302 MOZ_TIMER_LOG(("Tokenize Time: "));
2303 MOZ_TIMER_PRINT(mTokenizeTime);
2305 return NS_OK;
2307 } else {
2308 CParserContext* theContext = PopContext();
2309 if (theContext) {
2310 theIterationIsOk = allowIteration && theContextIsStringBased;
2311 if (theContext->mCopyUnused) {
2312 theContext->mScanner->CopyUnusedData(mUnusedInput);
2315 delete theContext;
2318 result = mInternalState;
2319 aIsFinalChunk = mParserContext &&
2320 mParserContext->mStreamListenerState == eOnStop;
2321 // ...then intentionally fall through to WillInterruptParse()...
2326 if (theTokenizerResult == kEOF ||
2327 result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
2328 result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
2329 if (mParserContext->mDTD) {
2330 mParserContext->mDTD->WillInterruptParse(mSink);
2334 } else {
2335 mInternalState = result = NS_ERROR_HTMLPARSER_UNRESOLVEDDTD;
2339 MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: nsParser::ResumeParse(), this=%p\n", this));
2340 MOZ_TIMER_STOP(mParseTime);
2342 return (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
2346 * This is where we loop over the tokens created in the
2347 * tokenization phase, and try to make sense out of them.
2349 nsresult
2350 nsParser::BuildModel()
2352 CParserContext* theRootContext = mParserContext;
2353 nsITokenizer* theTokenizer = nsnull;
2355 nsresult result = NS_OK;
2356 if (mParserContext) {
2357 PRInt32 type = mParserContext->mDTD ? mParserContext->mDTD->GetType() :
2358 NS_IPARSER_FLAG_HTML;
2359 result = mParserContext->GetTokenizer(type, mSink, theTokenizer);
2362 if (NS_SUCCEEDED(result)) {
2363 // Get the root DTD for use in model building...
2364 while (theRootContext->mPrevContext) {
2365 theRootContext = theRootContext->mPrevContext;
2368 nsIDTD* theRootDTD = theRootContext->mDTD;
2369 if (theRootDTD) {
2370 MOZ_TIMER_START(mDTDTime);
2371 result = theRootDTD->BuildModel(this, theTokenizer, nsnull, mSink);
2372 MOZ_TIMER_STOP(mDTDTime);
2374 } else {
2375 mInternalState = result = NS_ERROR_HTMLPARSER_BADTOKENIZER;
2377 return result;
2380 /*******************************************************************
2381 These methods are used to talk to the netlib system...
2382 *******************************************************************/
2384 nsresult
2385 nsParser::OnStartRequest(nsIRequest *request, nsISupports* aContext)
2387 NS_PRECONDITION(eNone == mParserContext->mStreamListenerState,
2388 "Parser's nsIStreamListener API was not setup "
2389 "correctly in constructor.");
2390 if (mObserver) {
2391 mObserver->OnStartRequest(request, aContext);
2393 mParserContext->mStreamListenerState = eOnStart;
2394 mParserContext->mAutoDetectStatus = eUnknownDetect;
2395 mParserContext->mDTD = nsnull;
2396 mParserContext->mRequest = request;
2398 nsresult rv;
2399 nsCAutoString contentType;
2400 nsCOMPtr<nsIChannel> channel = do_QueryInterface(request);
2401 if (channel) {
2402 rv = channel->GetContentType(contentType);
2403 if (NS_SUCCEEDED(rv)) {
2404 mParserContext->SetMimeType(contentType);
2408 rv = NS_OK;
2410 if (sParserDataListeners && mSink) {
2411 nsISupports *ctx = mSink->GetTarget();
2412 PRInt32 count = sParserDataListeners->Count();
2414 while (count--) {
2415 rv |= sParserDataListeners->ObjectAt(count)->
2416 OnStartRequest(request, ctx);
2420 return rv;
2424 #define UTF16_BOM "UTF-16"
2425 #define UTF16_BE "UTF-16BE"
2426 #define UTF16_LE "UTF-16LE"
2427 #define UCS4_BOM "UTF-32"
2428 #define UCS4_BE "UTF-32BE"
2429 #define UCS4_LE "UTF-32LE"
2430 #define UCS4_2143 "X-ISO-10646-UCS-4-2143"
2431 #define UCS4_3412 "X-ISO-10646-UCS-4-3412"
2432 #define UTF8 "UTF-8"
2434 static inline PRBool IsSecondMarker(unsigned char aChar)
2436 switch (aChar) {
2437 case '!':
2438 case '?':
2439 case 'h':
2440 case 'H':
2441 return PR_TRUE;
2442 default:
2443 return PR_FALSE;
2447 static PRBool
2448 DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen,
2449 nsCString& oCharset, PRInt32& oCharsetSource)
2451 oCharsetSource= kCharsetFromAutoDetection;
2452 oCharset.Truncate();
2453 // See http://www.w3.org/TR/2000/REC-xml-20001006#sec-guessing
2454 // for details
2455 // Also, MS Win2K notepad now generate 3 bytes BOM in UTF8 as UTF8 signature
2456 // We need to check that
2457 // UCS2 BOM FEFF = UTF8 EF BB BF
2458 switch(aBytes[0])
2460 case 0x00:
2461 if(0x00==aBytes[1]) {
2462 // 00 00
2463 if((0xFE==aBytes[2]) && (0xFF==aBytes[3])) {
2464 // 00 00 FE FF UCS-4, big-endian machine (1234 order)
2465 oCharset.Assign(UCS4_BOM);
2466 } else if((0x00==aBytes[2]) && (0x3C==aBytes[3])) {
2467 // 00 00 00 3C UCS-4, big-endian machine (1234 order)
2468 oCharset.Assign(UCS4_BE);
2469 } else if((0xFF==aBytes[2]) && (0xFE==aBytes[3])) {
2470 // 00 00 FF FE UCS-4, unusual octet order (2143)
2471 oCharset.Assign(UCS4_2143);
2472 } else if((0x3C==aBytes[2]) && (0x00==aBytes[3])) {
2473 // 00 00 3C 00 UCS-4, unusual octet order (2143)
2474 oCharset.Assign(UCS4_2143);
2476 oCharsetSource = kCharsetFromByteOrderMark;
2477 } else if((0x3C==aBytes[1]) && (0x00==aBytes[2])) {
2478 // 00 3C 00
2479 if(IsSecondMarker(aBytes[3])) {
2480 // 00 3C 00 SM UTF-16, big-endian, no Byte Order Mark
2481 oCharset.Assign(UTF16_BE);
2482 } else if((0x00==aBytes[3])) {
2483 // 00 3C 00 00 UCS-4, unusual octet order (3412)
2484 oCharset.Assign(UCS4_3412);
2486 oCharsetSource = kCharsetFromByteOrderMark;
2488 break;
2489 case 0x3C:
2490 if(0x00==aBytes[1] && (0x00==aBytes[3])) {
2491 // 3C 00 XX 00
2492 if(IsSecondMarker(aBytes[2])) {
2493 // 3C 00 SM 00 UTF-16, little-endian, no Byte Order Mark
2494 oCharset.Assign(UTF16_LE);
2495 } else if((0x00==aBytes[2])) {
2496 // 3C 00 00 00 UCS-4, little-endian machine (4321 order)
2497 oCharset.Assign(UCS4_LE);
2499 oCharsetSource = kCharsetFromByteOrderMark;
2500 // For html, meta tag detector is invoked before this so that we have
2501 // to deal only with XML here.
2502 } else if( (0x3F==aBytes[1]) &&
2503 (0x78==aBytes[2]) && (0x6D==aBytes[3]) &&
2504 (0 == PL_strncmp("<?xml", (char*)aBytes, 5 ))) {
2505 // 3C 3F 78 6D
2506 // ASCII characters are in their normal positions, so we can safely
2507 // deal with the XML declaration in the old C way
2508 // The shortest string so far (strlen==5):
2509 // <?xml
2510 PRInt32 i;
2511 PRBool versionFound = PR_FALSE, encodingFound = PR_FALSE;
2512 for (i=6; i < aLen && !encodingFound; ++i) {
2513 // end of XML declaration?
2514 if ((((char*)aBytes)[i] == '?') &&
2515 ((i+1) < aLen) &&
2516 (((char*)aBytes)[i+1] == '>')) {
2517 break;
2519 // Version is required.
2520 if (!versionFound) {
2521 // Want to avoid string comparisons, hence looking for 'n'
2522 // and only if found check the string leading to it. Not
2523 // foolproof, but fast.
2524 // The shortest string allowed before this is (strlen==13):
2525 // <?xml version
2526 if ((((char*)aBytes)[i] == 'n') &&
2527 (i >= 12) &&
2528 (0 == PL_strncmp("versio", (char*)(aBytes+i-6), 6 ))) {
2529 // Fast forward through version
2530 char q = 0;
2531 for (++i; i < aLen; ++i) {
2532 char qi = ((char*)aBytes)[i];
2533 if (qi == '\'' || qi == '"') {
2534 if (q && q == qi) {
2535 // ending quote
2536 versionFound = PR_TRUE;
2537 break;
2538 } else {
2539 // Starting quote
2540 q = qi;
2545 } else {
2546 // encoding must follow version
2547 // Want to avoid string comparisons, hence looking for 'g'
2548 // and only if found check the string leading to it. Not
2549 // foolproof, but fast.
2550 // The shortest allowed string before this (strlen==26):
2551 // <?xml version="1" encoding
2552 if ((((char*)aBytes)[i] == 'g') &&
2553 (i >= 25) &&
2554 (0 == PL_strncmp("encodin", (char*)(aBytes+i-7), 7 ))) {
2555 PRInt32 encStart = 0;
2556 char q = 0;
2557 for (++i; i < aLen; ++i) {
2558 char qi = ((char*)aBytes)[i];
2559 if (qi == '\'' || qi == '"') {
2560 if (q && q == qi) {
2561 PRInt32 count = i - encStart;
2562 // encoding value is invalid if it is UTF-16
2563 if (count > 0 &&
2564 (0 != PL_strcmp("UTF-16", (char*)(aBytes+encStart)))) {
2565 oCharset.Assign((char*)(aBytes+encStart),count);
2566 oCharsetSource = kCharsetFromMetaTag;
2568 encodingFound = PR_TRUE;
2569 break;
2570 } else {
2571 encStart = i+1;
2572 q = qi;
2577 } // if (!versionFound)
2578 } // for
2580 break;
2581 case 0xEF:
2582 if((0xBB==aBytes[1]) && (0xBF==aBytes[2])) {
2583 // EF BB BF
2584 // Win2K UTF-8 BOM
2585 oCharset.Assign(UTF8);
2586 oCharsetSource= kCharsetFromByteOrderMark;
2588 break;
2589 case 0xFE:
2590 if(0xFF==aBytes[1]) {
2591 if(0x00==aBytes[2] && 0x00==aBytes[3]) {
2592 // FE FF 00 00 UCS-4, unusual octet order (3412)
2593 oCharset.Assign(UCS4_3412);
2594 } else {
2595 // FE FF UTF-16, big-endian
2596 oCharset.Assign(UTF16_BOM);
2598 oCharsetSource= kCharsetFromByteOrderMark;
2600 break;
2601 case 0xFF:
2602 if(0xFE==aBytes[1]) {
2603 if(0x00==aBytes[2] && 0x00==aBytes[3])
2604 // FF FE 00 00 UTF-32, little-endian
2605 oCharset.Assign(UCS4_BOM);
2606 else
2607 // FF FE
2608 // UTF-16, little-endian
2609 oCharset.Assign(UTF16_BOM);
2610 oCharsetSource= kCharsetFromByteOrderMark;
2612 break;
2613 // case 0x4C: if((0x6F==aBytes[1]) && ((0xA7==aBytes[2] && (0x94==aBytes[3])) {
2614 // We do not care EBCIDIC here....
2615 // }
2616 // break;
2617 } // switch
2618 return !oCharset.IsEmpty();
2621 inline const char
2622 GetNextChar(nsACString::const_iterator& aStart,
2623 nsACString::const_iterator& aEnd)
2625 NS_ASSERTION(aStart != aEnd, "end of buffer");
2626 return (++aStart != aEnd) ? *aStart : '\0';
2629 PRBool
2630 nsParser::DetectMetaTag(const char* aBytes,
2631 PRInt32 aLen,
2632 nsCString& aCharset,
2633 PRInt32& aCharsetSource)
2635 aCharsetSource= kCharsetFromMetaTag;
2636 aCharset.SetLength(0);
2638 // XXX Only look inside HTML documents for now. For XML
2639 // documents we should be looking inside the XMLDecl.
2640 if (!mParserContext->mMimeType.EqualsLiteral(kHTMLTextContentType)) {
2641 return PR_FALSE;
2644 // Fast and loose parsing to determine if we have a complete
2645 // META tag in this block, looking upto 2k into it.
2646 const nsASingleFragmentCString& str =
2647 Substring(aBytes, aBytes + PR_MIN(aLen, 2048));
2648 // XXXldb Should be const_char_iterator when FindInReadable supports it.
2649 nsACString::const_iterator begin, end;
2651 str.BeginReading(begin);
2652 str.EndReading(end);
2653 nsACString::const_iterator currPos(begin);
2654 nsACString::const_iterator tokEnd;
2655 nsACString::const_iterator tagEnd(begin);
2657 while (currPos != end) {
2658 if (!FindCharInReadable('<', currPos, end))
2659 break; // no tag found in this buffer
2661 if (GetNextChar(currPos, end) == '!') {
2662 if (GetNextChar(currPos, end) != '-' ||
2663 GetNextChar(currPos, end) != '-') {
2664 // If we only see a <! not followed by --, just skip to the next >.
2665 if (!FindCharInReadable('>', currPos, end)) {
2666 return PR_FALSE; // No more tags to follow.
2669 // Continue searching for a meta tag following this "comment".
2670 ++currPos;
2671 continue;
2674 // Found MDO ( <!-- ). Now search for MDC ( --[*s]> )
2675 PRBool foundMDC = PR_FALSE;
2676 PRBool foundMatch = PR_FALSE;
2677 while (!foundMDC) {
2678 if (GetNextChar(currPos, end) == '-' &&
2679 GetNextChar(currPos, end) == '-') {
2680 foundMatch = !foundMatch; // toggle until we've matching "--"
2681 } else if (currPos == end) {
2682 return PR_FALSE; // Couldn't find --[*s]> in this buffer
2683 } else if (foundMatch && *currPos == '>') {
2684 foundMDC = PR_TRUE; // found comment end delimiter.
2685 ++currPos;
2688 continue; // continue searching for META tag.
2691 // Find the end of the tag, break if incomplete
2692 tagEnd = currPos;
2693 if (!FindCharInReadable('>', tagEnd, end))
2694 break;
2696 // If this is not a META tag, continue to next loop
2697 if ( (*currPos != 'm' && *currPos != 'M') ||
2698 (*(++currPos) != 'e' && *currPos != 'E') ||
2699 (*(++currPos) != 't' && *currPos != 'T') ||
2700 (*(++currPos) != 'a' && *currPos != 'A') ||
2701 !nsCRT::IsAsciiSpace(*(++currPos))) {
2702 currPos = tagEnd;
2703 continue;
2706 // If could not find "charset" in this tag, skip this tag and try next
2707 tokEnd = tagEnd;
2708 if (!CaseInsensitiveFindInReadable(NS_LITERAL_CSTRING("CHARSET"),
2709 currPos, tokEnd)) {
2710 currPos = tagEnd;
2711 continue;
2713 currPos = tokEnd;
2715 // skip spaces before '='
2716 while (*currPos == kSpace || *currPos == kNewLine ||
2717 *currPos == kCR || *currPos == kTab) {
2718 ++currPos;
2720 // skip '='
2721 if (*currPos != '=') {
2722 currPos = tagEnd;
2723 continue;
2725 ++currPos;
2726 // skip spaces after '='
2727 while (*currPos == kSpace || *currPos == kNewLine ||
2728 *currPos == kCR || *currPos == kTab) {
2729 ++currPos;
2732 // skip open quote
2733 if (*currPos == '\'' || *currPos == '\"')
2734 ++currPos;
2736 // find the end of charset string
2737 tokEnd = currPos;
2738 while (*tokEnd != '\'' && *tokEnd != '\"' && tokEnd != tagEnd)
2739 ++tokEnd;
2741 // return true if we successfully got something for charset
2742 if (currPos != tokEnd) {
2743 aCharset.Assign(currPos.get(), tokEnd.get() - currPos.get());
2744 return PR_TRUE;
2747 // Nothing specified as charset, continue next loop
2748 currPos = tagEnd;
2751 return PR_FALSE;
2754 typedef struct {
2755 PRBool mNeedCharsetCheck;
2756 nsParser* mParser;
2757 nsIParserFilter* mParserFilter;
2758 nsScanner* mScanner;
2759 nsIRequest* mRequest;
2760 } ParserWriteStruct;
2763 * This function is invoked as a result of a call to a stream's
2764 * ReadSegments() method. It is called for each contiguous buffer
2765 * of data in the underlying stream or pipe. Using ReadSegments
2766 * allows us to avoid copying data to read out of the stream.
2768 static NS_METHOD
2769 ParserWriteFunc(nsIInputStream* in,
2770 void* closure,
2771 const char* fromRawSegment,
2772 PRUint32 toOffset,
2773 PRUint32 count,
2774 PRUint32 *writeCount)
2776 nsresult result;
2777 ParserWriteStruct* pws = static_cast<ParserWriteStruct*>(closure);
2778 const char* buf = fromRawSegment;
2779 PRUint32 theNumRead = count;
2781 if (!pws) {
2782 return NS_ERROR_FAILURE;
2785 if (pws->mNeedCharsetCheck) {
2786 PRInt32 guessSource;
2787 nsCAutoString guess;
2788 nsCAutoString preferred;
2790 pws->mNeedCharsetCheck = PR_FALSE;
2791 if (pws->mParser->DetectMetaTag(buf, theNumRead, guess, guessSource) ||
2792 ((count >= 4) &&
2793 DetectByteOrderMark((const unsigned char*)buf,
2794 theNumRead, guess, guessSource))) {
2795 nsCOMPtr<nsICharsetAlias> alias(do_GetService(NS_CHARSETALIAS_CONTRACTID));
2796 result = alias->GetPreferred(guess, preferred);
2797 // Only continue if it's a recognized charset and not
2798 // one of a designated set that we ignore.
2799 if (NS_SUCCEEDED(result) &&
2800 ((kCharsetFromByteOrderMark == guessSource) ||
2801 (!preferred.EqualsLiteral("UTF-16") &&
2802 !preferred.EqualsLiteral("UTF-16BE") &&
2803 !preferred.EqualsLiteral("UTF-16LE") &&
2804 !preferred.EqualsLiteral("UTF-32") &&
2805 !preferred.EqualsLiteral("UTF-32BE") &&
2806 !preferred.EqualsLiteral("UTF-32LE")))) {
2807 guess = preferred;
2808 pws->mParser->SetDocumentCharset(guess, guessSource);
2809 pws->mParser->SetSinkCharset(preferred);
2810 nsCOMPtr<nsICachingChannel> channel(do_QueryInterface(pws->mRequest));
2811 if (channel) {
2812 nsCOMPtr<nsISupports> cacheToken;
2813 channel->GetCacheToken(getter_AddRefs(cacheToken));
2814 if (cacheToken) {
2815 nsCOMPtr<nsICacheEntryDescriptor> cacheDescriptor(do_QueryInterface(cacheToken));
2816 if (cacheDescriptor) {
2817 #ifdef DEBUG
2818 nsresult rv =
2819 #endif
2820 cacheDescriptor->SetMetaDataElement("charset",
2821 guess.get());
2822 NS_ASSERTION(NS_SUCCEEDED(rv),"cannot SetMetaDataElement");
2830 if (pws->mParserFilter)
2831 pws->mParserFilter->RawBuffer(buf, &theNumRead);
2833 result = pws->mScanner->Append(buf, theNumRead, pws->mRequest);
2834 if (NS_SUCCEEDED(result)) {
2835 *writeCount = count;
2838 return result;
2841 nsresult
2842 nsParser::OnDataAvailable(nsIRequest *request, nsISupports* aContext,
2843 nsIInputStream *pIStream, PRUint32 sourceOffset,
2844 PRUint32 aLength)
2846 NS_PRECONDITION((eOnStart == mParserContext->mStreamListenerState ||
2847 eOnDataAvail == mParserContext->mStreamListenerState),
2848 "Error: OnStartRequest() must be called before OnDataAvailable()");
2849 NS_PRECONDITION(NS_InputStreamIsBuffered(pIStream),
2850 "Must have a buffered input stream");
2852 nsresult rv = NS_OK;
2854 CParserContext *theContext = mParserContext;
2856 while (theContext && theContext->mRequest != request) {
2857 theContext = theContext->mPrevContext;
2860 if (theContext) {
2861 theContext->mStreamListenerState = eOnDataAvail;
2863 if ((mFlags & NS_PARSER_FLAG_PARSER_ENABLED) &&
2864 mSpeculativeScriptThread) {
2865 mSpeculativeScriptThread->StopParsing(PR_FALSE);
2868 if (eInvalidDetect == theContext->mAutoDetectStatus) {
2869 if (theContext->mScanner) {
2870 nsScannerIterator iter;
2871 theContext->mScanner->EndReading(iter);
2872 theContext->mScanner->SetPosition(iter, PR_TRUE);
2876 PRUint32 totalRead;
2877 ParserWriteStruct pws;
2878 pws.mNeedCharsetCheck =
2879 (0 == sourceOffset) && (mCharsetSource < kCharsetFromMetaTag);
2880 pws.mParser = this;
2881 pws.mParserFilter = mParserFilter;
2882 pws.mScanner = theContext->mScanner;
2883 pws.mRequest = request;
2885 rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead);
2886 if (NS_FAILED(rv)) {
2887 return rv;
2890 // Don't bother to start parsing until we've seen some
2891 // non-whitespace data
2892 if (mScriptsExecuting == 0 &&
2893 theContext->mScanner->FirstNonWhitespacePosition() >= 0) {
2894 if (mSink) {
2895 mSink->WillParse();
2897 rv = ResumeParse();
2899 } else {
2900 rv = NS_ERROR_UNEXPECTED;
2903 return rv;
2907 * This is called by the networking library once the last block of data
2908 * has been collected from the net.
2910 nsresult
2911 nsParser::OnStopRequest(nsIRequest *request, nsISupports* aContext,
2912 nsresult status)
2914 nsresult rv = NS_OK;
2916 if (mSpeculativeScriptThread) {
2917 mSpeculativeScriptThread->StopParsing(PR_FALSE);
2920 CParserContext *pc = mParserContext;
2921 while (pc) {
2922 if (pc->mRequest == request) {
2923 pc->mStreamListenerState = eOnStop;
2924 pc->mScanner->SetIncremental(PR_FALSE);
2925 break;
2928 pc = pc->mPrevContext;
2931 mStreamStatus = status;
2933 if (mParserFilter)
2934 mParserFilter->Finish();
2936 if (mScriptsExecuting == 0 && NS_SUCCEEDED(rv)) {
2937 if (mSink) {
2938 mSink->WillParse();
2940 rv = ResumeParse(PR_TRUE, PR_TRUE);
2943 // If the parser isn't enabled, we don't finish parsing till
2944 // it is reenabled.
2947 // XXX Should we wait to notify our observers as well if the
2948 // parser isn't yet enabled?
2949 if (mObserver) {
2950 mObserver->OnStopRequest(request, aContext, status);
2953 if (sParserDataListeners && mSink) {
2954 nsISupports *ctx = mSink->GetTarget();
2955 PRInt32 count = sParserDataListeners->Count();
2957 while (count--) {
2958 rv |= sParserDataListeners->ObjectAt(count)->OnStopRequest(request, ctx,
2959 status);
2963 return rv;
2967 /*******************************************************************
2968 Here come the tokenization methods...
2969 *******************************************************************/
2973 * Part of the code sandwich, this gets called right before
2974 * the tokenization process begins. The main reason for
2975 * this call is to allow the delegate to do initialization.
2977 PRBool
2978 nsParser::WillTokenize(PRBool aIsFinalChunk)
2980 if (!mParserContext) {
2981 return PR_TRUE;
2984 nsITokenizer* theTokenizer;
2985 PRInt32 type = mParserContext->mDTD ? mParserContext->mDTD->GetType() :
2986 NS_IPARSER_FLAG_HTML;
2987 nsresult result = mParserContext->GetTokenizer(type, mSink, theTokenizer);
2988 NS_ENSURE_SUCCESS(result, PR_FALSE);
2989 return NS_SUCCEEDED(theTokenizer->WillTokenize(aIsFinalChunk,
2990 &mTokenAllocator));
2995 * This is the primary control routine to consume tokens.
2996 * It iteratively consumes tokens until an error occurs or
2997 * you run out of data.
2999 nsresult nsParser::Tokenize(PRBool aIsFinalChunk)
3001 nsITokenizer* theTokenizer;
3003 nsresult result = NS_ERROR_NOT_AVAILABLE;
3004 if (mParserContext) {
3005 PRInt32 type = mParserContext->mDTD ? mParserContext->mDTD->GetType()
3006 : NS_IPARSER_FLAG_HTML;
3007 result = mParserContext->GetTokenizer(type, mSink, theTokenizer);
3010 if (NS_SUCCEEDED(result)) {
3011 if (mFlags & NS_PARSER_FLAG_FLUSH_TOKENS) {
3012 // For some reason tokens didn't get flushed (probably
3013 // the parser got blocked before all the tokens in the
3014 // stack got handled). Flush 'em now. Ref. bug 104856
3015 if (theTokenizer->GetCount() != 0) {
3016 return result;
3019 // Reset since the tokens have been flushed.
3020 mFlags &= ~NS_PARSER_FLAG_FLUSH_TOKENS;
3023 PRBool flushTokens = PR_FALSE;
3025 MOZ_TIMER_START(mTokenizeTime);
3027 mParserContext->mNumConsumed = 0;
3029 PRBool killSink = PR_FALSE;
3031 WillTokenize(aIsFinalChunk);
3032 while (NS_SUCCEEDED(result)) {
3033 mParserContext->mNumConsumed += mParserContext->mScanner->Mark();
3034 result = theTokenizer->ConsumeToken(*mParserContext->mScanner,
3035 flushTokens);
3036 if (NS_FAILED(result)) {
3037 mParserContext->mScanner->RewindToMark();
3038 if (kEOF == result){
3039 break;
3041 if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
3042 killSink = PR_TRUE;
3043 result = Terminate();
3044 break;
3046 } else if (flushTokens && (mFlags & NS_PARSER_FLAG_OBSERVERS_ENABLED)) {
3047 // I added the extra test of NS_PARSER_FLAG_OBSERVERS_ENABLED to fix Bug# 23931.
3048 // Flush tokens on seeing </SCRIPT> -- Ref: Bug# 22485 --
3049 // Also remember to update the marked position.
3050 mFlags |= NS_PARSER_FLAG_FLUSH_TOKENS;
3051 mParserContext->mNumConsumed += mParserContext->mScanner->Mark();
3052 break;
3055 DidTokenize(aIsFinalChunk);
3057 MOZ_TIMER_STOP(mTokenizeTime);
3059 if (killSink) {
3060 mSink = nsnull;
3062 } else {
3063 result = mInternalState = NS_ERROR_HTMLPARSER_BADTOKENIZER;
3066 return result;
3070 * This is the tail-end of the code sandwich for the
3071 * tokenization process. It gets called once tokenziation
3072 * has completed for each phase.
3074 PRBool
3075 nsParser::DidTokenize(PRBool aIsFinalChunk)
3077 if (!mParserContext) {
3078 return PR_TRUE;
3081 nsITokenizer* theTokenizer;
3082 PRInt32 type = mParserContext->mDTD ? mParserContext->mDTD->GetType() :
3083 NS_IPARSER_FLAG_HTML;
3084 nsresult rv = mParserContext->GetTokenizer(type, mSink, theTokenizer);
3085 NS_ENSURE_SUCCESS(rv, PR_FALSE);
3087 rv = theTokenizer->DidTokenize(aIsFinalChunk);
3088 return NS_SUCCEEDED(rv);
3092 * Get the channel associated with this parser
3094 * @param aChannel out param that will contain the result
3095 * @return NS_OK if successful
3097 NS_IMETHODIMP
3098 nsParser::GetChannel(nsIChannel** aChannel)
3100 nsresult result = NS_ERROR_NOT_AVAILABLE;
3101 if (mParserContext && mParserContext->mRequest) {
3102 result = CallQueryInterface(mParserContext->mRequest, aChannel);
3104 return result;
3108 * Get the DTD associated with this parser
3110 NS_IMETHODIMP
3111 nsParser::GetDTD(nsIDTD** aDTD)
3113 if (mParserContext) {
3114 NS_IF_ADDREF(*aDTD = mParserContext->mDTD);
3117 return NS_OK;