1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set sw=2 ts=2 et tw=79: */
3 /* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
16 * The Original Code is mozilla.org code.
18 * The Initial Developer of the Original Code is
19 * Netscape Communications Corporation.
20 * Portions created by the Initial Developer are Copyright (C) 1998
21 * the Initial Developer. All Rights Reserved.
24 * Pierre Phaneuf <pp@ludusdesign.com>
26 * Alternatively, the contents of this file may be used under the terms of
27 * either of the GNU General Public License Version 2 or later (the "GPL"),
28 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
44 #include "nsScanner.h"
46 #include "nsIStringStream.h"
47 #include "nsIChannel.h"
48 #include "nsICachingChannel.h"
49 #include "nsICacheEntryDescriptor.h"
50 #include "nsICharsetAlias.h"
51 #include "nsICharsetConverterManager.h"
52 #include "nsIInputStream.h"
57 #include "nsAutoLock.h"
58 #include "nsParserCIID.h"
59 #include "nsReadableUtils.h"
61 #include "nsExpatDriver.h"
62 #include "nsIServiceManager.h"
63 #include "nsICategoryManager.h"
64 #include "nsISupportsPrimitives.h"
65 #include "nsIFragmentContentSink.h"
66 #include "nsStreamUtils.h"
67 #include "nsHTMLTokenizer.h"
68 #include "nsIDocument.h"
69 #include "nsNetUtil.h"
70 #include "nsScriptLoader.h"
71 #include "nsDataHashtable.h"
72 #include "nsIThreadPool.h"
73 #include "nsXPCOMCIDInternal.h"
75 #ifdef MOZ_VIEW_SOURCE
76 #include "nsViewSourceHTML.h"
79 #define NS_PARSER_FLAG_PARSER_ENABLED 0x00000002
80 #define NS_PARSER_FLAG_OBSERVERS_ENABLED 0x00000004
81 #define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000008
82 #define NS_PARSER_FLAG_CAN_INTERRUPT 0x00000010
83 #define NS_PARSER_FLAG_FLUSH_TOKENS 0x00000020
84 #define NS_PARSER_FLAG_CAN_TOKENIZE 0x00000040
86 static NS_DEFINE_IID(kISupportsIID
, NS_ISUPPORTS_IID
);
87 static NS_DEFINE_CID(kCParserCID
, NS_PARSER_CID
);
88 static NS_DEFINE_IID(kIParserIID
, NS_IPARSER_IID
);
90 //-------------------------------------------------------------------
92 nsCOMArray
<nsIUnicharStreamListener
> *nsParser::sParserDataListeners
;
94 //-------------- Begin ParseContinue Event Definition ------------------------
96 The parser can be explicitly interrupted by passing a return value of
97 NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause
98 the parser to stop processing and allow the application to return to the event
99 loop. The data which was left at the time of interruption will be processed
100 the next time OnDataAvailable is called. If the parser has received its final
101 chunk of data then OnDataAvailable will no longer be called by the networking
102 module, so the parser will schedule a nsParserContinueEvent which will call
103 the parser to process the remaining data after returning to the event loop.
104 If the parser is interrupted while processing the remaining data it will
105 schedule another ParseContinueEvent. The processing of data followed by
106 scheduling of the continue events will proceed until either:
108 1) All of the remaining data can be processed without interrupting
109 2) The parser has been cancelled.
112 This capability is currently used in CNavDTD and nsHTMLContentSink. The
113 nsHTMLContentSink is notified by CNavDTD when a chunk of tokens is going to be
114 processed and when each token is processed. The nsHTML content sink records
115 the time when the chunk has started processing and will return
116 NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has exceeded a
117 threshold called max tokenizing processing time. This allows the content sink
118 to limit how much data is processed in a single chunk which in turn gates how
119 much time is spent away from the event loop. Processing smaller chunks of data
120 also reduces the time spent in subsequent reflows.
122 This capability is most apparent when loading large documents. If the maximum
123 token processing time is set small enough the application will remain
124 responsive during document load.
126 A side-effect of this capability is that document load is not complete when
127 the last chunk of data is passed to OnDataAvailable since the parser may have
128 been interrupted when the last chunk of data arrived. The document is complete
129 when all of the document has been tokenized and there aren't any pending
130 nsParserContinueEvents. This can cause problems if the application assumes
131 that it can monitor the load requests to determine when the document load has
132 been completed. This is what happens in Mozilla. The document is considered
133 completely loaded when all of the load requests have been satisfied. To delay
134 the document load until all of the parsing has been completed the
135 nsHTMLContentSink adds a dummy parser load request which is not removed until
136 the nsHTMLContentSink's DidBuildModel is called. The CNavDTD will not call
137 DidBuildModel until the final chunk of data has been passed to the parser
138 through the OnDataAvailable and there aren't any pending
139 nsParserContineEvents.
141 Currently the parser is ignores requests to be interrupted during the
142 processing of script. This is because a document.write followed by JavaScript
143 calls to manipulate the DOM may fail if the parser was interrupted during the
146 For more details @see bugzilla bug 76722
150 class nsParserContinueEvent
: public nsRunnable
153 nsRefPtr
<nsParser
> mParser
;
155 nsParserContinueEvent(nsParser
* aParser
)
161 mParser
->HandleParserContinueEvent(this);
166 //-------------- End ParseContinue Event Definition ------------------------
168 template <class Type
>
171 typedef void (*Reaper
)(Type
*);
173 Holder(Reaper aReaper
)
174 : mHoldee(nsnull
), mReaper(aReaper
)
187 const Holder
&operator =(Type
*aHoldee
) {
188 if (mHoldee
&& aHoldee
!= mHoldee
) {
200 class nsSpeculativeScriptThread
: public nsIRunnable
{
202 nsSpeculativeScriptThread()
203 : mLock(nsAutoLock::DestroyLock
),
204 mCVar(PR_DestroyCondVar
),
206 mCurrentlyParsing(0),
210 mTerminated(PR_FALSE
) {
213 ~nsSpeculativeScriptThread() {
214 NS_ASSERTION(NS_IsMainThread() || !mDocument
,
215 "Destroying the document on the wrong thread");
221 nsresult
StartParsing(nsParser
*aParser
);
222 void StopParsing(PRBool aFromDocWrite
);
224 enum PrefetchType
{ SCRIPT
, STYLESHEET
, IMAGE
};
225 struct PrefetchEntry
{
229 nsString elementType
;
232 nsIDocument
*GetDocument() {
233 NS_ASSERTION(NS_IsMainThread(), "Potential threadsafety hazard");
238 return mCurrentlyParsing
;
241 CParserContext
*Context() {
245 typedef nsDataHashtable
<nsCStringHashKey
, PRBool
> PreloadedType
;
246 PreloadedType
& GetPreloadedURIs() {
247 return mPreloadedURIs
;
251 mTerminated
= PR_TRUE
;
252 StopParsing(PR_FALSE
);
254 PRBool
Terminated() {
260 void ProcessToken(CToken
*aToken
);
262 void AddToPrefetchList(const nsAString
&src
,
263 const nsAString
&charset
,
264 const nsAString
&elementType
,
267 // These members are only accessed on the speculatively parsing thread.
268 nsTokenAllocator mTokenAllocator
;
270 // The following members are shared across the main thread and the
271 // speculatively parsing thread.
272 Holder
<PRLock
> mLock
;
273 Holder
<PRCondVar
> mCVar
;
275 volatile PRUint32 mKeepParsing
;
276 volatile PRUint32 mCurrentlyParsing
;
277 nsRefPtr
<nsHTMLTokenizer
> mTokenizer
;
278 nsAutoPtr
<nsScanner
> mScanner
;
280 enum { kBatchPrefetchURIs
= 5 };
281 nsAutoTArray
<PrefetchEntry
, kBatchPrefetchURIs
> mURIs
;
284 // Number of characters consumed by the last speculative parse.
285 PRUint32 mNumConsumed
;
287 // These members are only accessed on the main thread.
288 nsCOMPtr
<nsIDocument
> mDocument
;
289 CParserContext
*mContext
;
290 PreloadedType mPreloadedURIs
;
294 class nsPreloadURIs
: public nsIRunnable
{
296 nsPreloadURIs(nsAutoTArray
<nsSpeculativeScriptThread::PrefetchEntry
, 5> &aURIs
,
297 nsSpeculativeScriptThread
*aScriptThread
)
299 mScriptThread(aScriptThread
) {
305 static void PreloadURIs(const nsAutoTArray
<nsSpeculativeScriptThread::PrefetchEntry
, 5> &aURIs
,
306 nsSpeculativeScriptThread
*aScriptThread
);
309 nsAutoTArray
<nsSpeculativeScriptThread::PrefetchEntry
, 5> mURIs
;
310 nsRefPtr
<nsSpeculativeScriptThread
> mScriptThread
;
313 NS_IMPL_THREADSAFE_ISUPPORTS1(nsPreloadURIs
, nsIRunnable
)
318 PreloadURIs(mURIs
, mScriptThread
);
323 nsPreloadURIs::PreloadURIs(const nsAutoTArray
<nsSpeculativeScriptThread::PrefetchEntry
, 5> &aURIs
,
324 nsSpeculativeScriptThread
*aScriptThread
)
326 NS_ASSERTION(NS_IsMainThread(), "Touching non-threadsafe objects off thread");
328 if (aScriptThread
->Terminated()) {
332 nsIDocument
*doc
= aScriptThread
->GetDocument();
333 NS_ASSERTION(doc
, "We shouldn't have started preloading without a document");
335 // Note: Per the code in the HTML content sink, we should be keeping track
336 // of each <base href> as it comes. However, because we do our speculative
337 // parsing off the main thread, this is hard to emulate. For now, just load
338 // the URIs using the document's base URI at the potential cost of being
339 // wrong and having to re-load a given relative URI later.
340 nsIURI
*base
= doc
->GetBaseURI();
341 const nsCString
&charset
= doc
->GetDocumentCharacterSet();
342 nsSpeculativeScriptThread::PreloadedType
&alreadyPreloaded
=
343 aScriptThread
->GetPreloadedURIs();
344 for (PRUint32 i
= 0, e
= aURIs
.Length(); i
< e
; ++i
) {
345 const nsSpeculativeScriptThread::PrefetchEntry
&pe
= aURIs
[i
];
346 if (pe
.type
!= nsSpeculativeScriptThread::SCRIPT
) {
350 nsCOMPtr
<nsIURI
> uri
;
351 nsresult rv
= NS_NewURI(getter_AddRefs(uri
), pe
.uri
, charset
.get(), base
);
353 NS_WARNING("Failed to create a URI");
360 if (alreadyPreloaded
.Get(spec
, &answer
)) {
361 // Already preloaded. Don't preload again.
365 alreadyPreloaded
.Put(spec
, PR_TRUE
);
367 doc
->ScriptLoader()->PreloadURI(uri
, pe
.charset
, pe
.elementType
);
371 NS_IMPL_THREADSAFE_ISUPPORTS1(nsSpeculativeScriptThread
, nsIRunnable
)
374 nsSpeculativeScriptThread::Run()
376 NS_ASSERTION(!NS_IsMainThread(), "Speculative parsing on the main thread?");
380 mTokenizer
->WillTokenize(PR_FALSE
, &mTokenAllocator
);
381 while (mKeepParsing
) {
382 PRBool flushTokens
= PR_FALSE
;
383 nsresult rv
= mTokenizer
->ConsumeToken(*mScanner
, flushTokens
);
388 mNumConsumed
+= mScanner
->Mark();
390 // TODO Don't pop the tokens.
392 while (mKeepParsing
&& (token
= mTokenizer
->PopToken())) {
396 mTokenizer
->DidTokenize(PR_FALSE
);
399 nsAutoLock
al(mLock
.get());
401 mCurrentlyParsing
= 0;
402 PR_NotifyCondVar(mCVar
.get());
408 nsSpeculativeScriptThread::StartParsing(nsParser
*aParser
)
410 NS_ASSERTION(NS_IsMainThread(), "Called on the wrong thread");
411 NS_ASSERTION(!mCurrentlyParsing
, "Bad race happening");
413 if (!aParser
->ThreadPool()) {
417 nsIContentSink
*sink
= aParser
->GetContentSink();
422 nsCOMPtr
<nsIDocument
> doc
= do_QueryInterface(sink
->GetTarget());
428 CParserContext
*context
= aParser
->PeekContext();
430 mLock
= nsAutoLock::NewLock("nsSpeculativeScriptThread::mLock");
432 return NS_ERROR_OUT_OF_MEMORY
;
435 mCVar
= PR_NewCondVar(mLock
.get());
437 return NS_ERROR_OUT_OF_MEMORY
;
440 if (!mPreloadedURIs
.Init(15)) {
441 return NS_ERROR_OUT_OF_MEMORY
;
444 mTokenizer
= new nsHTMLTokenizer(context
->mDTDMode
, context
->mDocType
,
445 context
->mParserCommand
, 0);
447 return NS_ERROR_OUT_OF_MEMORY
;
449 mTokenizer
->CopyState(context
->mTokenizer
);
450 context
->mScanner
->CopyUnusedData(toScan
);
451 if (toScan
.IsEmpty()) {
454 } else if (context
== mContext
) {
455 // Don't parse the same part of the document twice.
456 nsScannerIterator end
;
457 context
->mScanner
->EndReading(end
);
459 nsScannerIterator start
;
460 context
->mScanner
->CurrentPosition(start
);
462 if (mNumConsumed
> context
->mNumConsumed
) {
463 // We consumed more the last time we tried speculatively parsing than we
464 // did the last time we actually parsed.
465 PRUint32 distance
= Distance(start
, end
);
466 start
.advance(PR_MIN(mNumConsumed
- context
->mNumConsumed
, distance
));
470 // We're at the end of this context's buffer, nothing else to do.
474 CopyUnicodeTo(start
, end
, toScan
);
476 // Grab all of the context.
477 context
->mScanner
->CopyUnusedData(toScan
);
478 if (toScan
.IsEmpty()) {
479 // Nothing to parse, don't do anything.
484 nsCAutoString charset
;
486 aParser
->GetDocumentCharset(charset
, source
);
488 mScanner
= new nsScanner(toScan
, charset
, source
);
490 return NS_ERROR_OUT_OF_MEMORY
;
495 mCurrentlyParsing
= 1;
497 return aParser
->ThreadPool()->Dispatch(this, NS_DISPATCH_NORMAL
);
501 nsSpeculativeScriptThread::StopParsing(PRBool
/*aFromDocWrite*/)
503 NS_ASSERTION(NS_IsMainThread(), "Can't stop parsing from another thread");
506 // If we bailed early out of StartParsing, don't do anything.
511 nsAutoLock
al(mLock
.get());
514 if (mCurrentlyParsing
) {
515 PR_WaitCondVar(mCVar
.get(), PR_INTERVAL_NO_TIMEOUT
);
516 NS_ASSERTION(!mCurrentlyParsing
, "Didn't actually stop parsing?");
520 // The thread is now idle.
522 // If we're terminated, then we need to ensure that we release our document
523 // and tokenizer here on the main thread so that our last reference to them
524 // isn't our alter-ego rescheduled on another thread.
528 } else if (mNumURIs
) {
529 // Note: Don't do this if we're terminated.
530 nsPreloadURIs::PreloadURIs(mURIs
, this);
535 // Note: Currently, we pop the tokens off (see the comment in Run) so this
536 // isn't a problem. If and when we actually use the tokens created
537 // off-thread, we'll need to use aFromDocWrite for real.
541 nsSpeculativeScriptThread::ProcessToken(CToken
*aToken
)
543 // Only called on the speculative script thread.
545 CHTMLToken
*token
= static_cast<CHTMLToken
*>(aToken
);
546 switch (static_cast<eHTMLTokenTypes
>(token
->GetTokenType())) {
548 CStartToken
*start
= static_cast<CStartToken
*>(aToken
);
549 nsHTMLTag tag
= static_cast<nsHTMLTag
>(start
->GetTypeID());
550 PRInt16 attrs
= start
->GetAttributeCount();
553 nsAutoString elementType
;
554 nsAutoString charset
;
558 #if 0 // TODO Support stylesheet and image preloading.
559 case eHTMLTag_link
: {
560 // If this is a <link rel=stylesheet> find the src.
561 PRBool isRelStylesheet
= PR_FALSE
;
562 for (; i
< attrs
; ++i
) {
563 CAttributeToken
*attr
= static_cast<CAttributeToken
*>(mTokenizer
->PopToken());
564 NS_ASSERTION(attr
->GetTokenType() == eToken_attribute
, "Weird token");
566 if (attr
->GetKey().EqualsLiteral("rel")) {
567 if (!attr
->GetValue().EqualsLiteral("stylesheet")) {
568 IF_FREE(attr
, &mTokenAllocator
);
571 isRelStylesheet
= PR_TRUE
;
572 } else if (attr
->GetKey().EqualsLiteral("src")) {
573 src
.Assign(attr
->GetValue());
574 if (isRelStylesheet
) {
575 IF_FREE(attr
, &mTokenAllocator
);
580 IF_FREE(attr
, &mTokenAllocator
);
583 if (isRelStylesheet
&& !src
.IsEmpty()) {
584 AddToPrefetchList(src
, STYLESHEET
);
592 if (tag
== eHTMLTag_img
)
595 case eHTMLTag_script
:
596 if (tag
== eHTMLTag_script
)
599 for (; i
< attrs
; ++i
) {
600 CAttributeToken
*attr
= static_cast<CAttributeToken
*>(mTokenizer
->PopToken());
601 NS_ASSERTION(attr
->GetTokenType() == eToken_attribute
, "Weird token");
603 if (attr
->GetKey().EqualsLiteral("src")) {
604 src
.Assign(attr
->GetValue());
605 } else if (attr
->GetKey().EqualsLiteral("charset")) {
606 charset
.Assign(attr
->GetValue());
607 } else if (attr
->GetKey().EqualsLiteral("type")) {
608 elementType
.Assign(attr
->GetValue());
610 IF_FREE(attr
, &mTokenAllocator
);
613 if (!src
.IsEmpty()) {
614 AddToPrefetchList(src
, charset
, elementType
, ptype
);
622 for (; i
< attrs
; ++i
) {
623 CToken
*attr
= mTokenizer
->PopToken();
627 NS_ASSERTION(attr
->GetTokenType() == eToken_attribute
, "Weird token");
628 IF_FREE(attr
, &mTokenAllocator
);
638 IF_FREE(aToken
, &mTokenAllocator
);
642 nsSpeculativeScriptThread::AddToPrefetchList(const nsAString
&src
,
643 const nsAString
&charset
,
644 const nsAString
&elementType
,
647 PrefetchEntry
*pe
= mURIs
.InsertElementAt(mNumURIs
++);
650 pe
->charset
= charset
;
651 pe
->elementType
= elementType
;
653 if (mNumURIs
== kBatchPrefetchURIs
) {
654 nsCOMPtr
<nsIRunnable
> r
= new nsPreloadURIs(mURIs
, this);
658 NS_DispatchToMainThread(r
, NS_DISPATCH_NORMAL
);
662 nsICharsetAlias
* nsParser::sCharsetAliasService
= nsnull
;
663 nsICharsetConverterManager
* nsParser::sCharsetConverterManager
= nsnull
;
664 nsIThreadPool
* nsParser::sSpeculativeThreadPool
= nsnull
;
667 * This gets called when the htmlparser module is initialized.
674 nsCOMPtr
<nsICategoryManager
> cm
=
675 do_GetService(NS_CATEGORYMANAGER_CONTRACTID
, &rv
);
676 NS_ENSURE_SUCCESS(rv
, rv
);
678 nsCOMPtr
<nsISimpleEnumerator
> e
;
679 rv
= cm
->EnumerateCategory("Parser data listener", getter_AddRefs(e
));
680 NS_ENSURE_SUCCESS(rv
, rv
);
682 nsCAutoString categoryEntry
;
683 nsXPIDLCString contractId
;
684 nsCOMPtr
<nsISupports
> entry
;
686 while (NS_SUCCEEDED(e
->GetNext(getter_AddRefs(entry
)))) {
687 nsCOMPtr
<nsISupportsCString
> category(do_QueryInterface(entry
));
690 NS_WARNING("Category entry not an nsISupportsCString!");
694 rv
= category
->GetData(categoryEntry
);
695 NS_ENSURE_SUCCESS(rv
, rv
);
697 rv
= cm
->GetCategoryEntry("Parser data listener", categoryEntry
.get(),
698 getter_Copies(contractId
));
699 NS_ENSURE_SUCCESS(rv
, rv
);
701 nsCOMPtr
<nsIUnicharStreamListener
> listener
=
702 do_CreateInstance(contractId
.get());
705 if (!sParserDataListeners
) {
706 sParserDataListeners
= new nsCOMArray
<nsIUnicharStreamListener
>();
708 if (!sParserDataListeners
)
709 return NS_ERROR_OUT_OF_MEMORY
;
712 sParserDataListeners
->AppendObject(listener
);
716 nsCOMPtr
<nsICharsetAlias
> charsetAlias
=
717 do_GetService(NS_CHARSETALIAS_CONTRACTID
, &rv
);
718 NS_ENSURE_SUCCESS(rv
, rv
);
720 nsCOMPtr
<nsICharsetConverterManager
> charsetConverter
=
721 do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID
, &rv
);
722 NS_ENSURE_SUCCESS(rv
, rv
);
724 charsetAlias
.swap(sCharsetAliasService
);
725 charsetConverter
.swap(sCharsetConverterManager
);
727 nsCOMPtr
<nsIThreadPool
> threadPool
=
728 do_CreateInstance(NS_THREADPOOL_CONTRACTID
, &rv
);
729 NS_ENSURE_SUCCESS(rv
, rv
);
731 rv
= threadPool
->SetThreadLimit(kSpeculativeThreadLimit
);
732 NS_ENSURE_SUCCESS(rv
, rv
);
734 rv
= threadPool
->SetIdleThreadLimit(kIdleThreadLimit
);
735 NS_ENSURE_SUCCESS(rv
, rv
);
737 rv
= threadPool
->SetIdleThreadTimeout(kIdleThreadTimeout
);
738 NS_ENSURE_SUCCESS(rv
, rv
);
740 threadPool
.swap(sSpeculativeThreadPool
);
747 * This gets called when the htmlparser module is shutdown.
750 void nsParser::Shutdown()
752 delete sParserDataListeners
;
753 sParserDataListeners
= nsnull
;
755 NS_IF_RELEASE(sCharsetAliasService
);
756 NS_IF_RELEASE(sCharsetConverterManager
);
757 if (sSpeculativeThreadPool
) {
758 sSpeculativeThreadPool
->Shutdown();
759 NS_RELEASE(sSpeculativeThreadPool
);
764 static PRBool gDumpContent
=PR_FALSE
;
768 * default constructor
775 nsParser::~nsParser()
781 nsParser::Initialize(PRBool aConstructor
)
785 gDumpContent
= PR_GetEnv("PARSER_DUMP_CONTENT") != nsnull
;
796 mParserFilter
= nsnull
;
797 mUnusedInput
.Truncate();
800 mContinueEvent
= nsnull
;
801 mCharsetSource
= kCharsetUninitialized
;
802 mCharset
.AssignLiteral("ISO-8859-1");
803 mInternalState
= NS_OK
;
805 mCommand
= eViewNormal
;
806 mFlags
= NS_PARSER_FLAG_OBSERVERS_ENABLED
|
807 NS_PARSER_FLAG_PARSER_ENABLED
|
808 NS_PARSER_FLAG_CAN_TOKENIZE
;
809 mScriptsExecuting
= 0;
811 MOZ_TIMER_DEBUGLOG(("Reset: Parse Time: nsParser::nsParser(), this=%p\n", this));
812 MOZ_TIMER_RESET(mParseTime
);
813 MOZ_TIMER_RESET(mDTDTime
);
814 MOZ_TIMER_RESET(mTokenizeTime
);
823 // Sink (HTMLContentSink at this time) supports nsIDebugDumpContent
824 // interface. We can get to the content model through the sink.
825 nsresult result
= NS_OK
;
826 nsCOMPtr
<nsIDebugDumpContent
> trigger
= do_QueryInterface(mSink
, &result
);
827 if (NS_SUCCEEDED(result
)) {
828 trigger
->DumpContentModel();
835 if (mParserContext
&& mParserContext
->mPrevContext
) {
836 NS_WARNING("Extra parser contexts still on the parser stack");
840 while (mParserContext
) {
841 CParserContext
*pc
= mParserContext
->mPrevContext
;
842 delete mParserContext
;
846 // It should not be possible for this flag to be set when we are getting
847 // destroyed since this flag implies a pending nsParserContinueEvent, which
848 // has an owning reference to |this|.
849 NS_ASSERTION(!(mFlags
& NS_PARSER_FLAG_PENDING_CONTINUE_EVENT
), "bad");
850 if (mSpeculativeScriptThread
) {
851 mSpeculativeScriptThread
->Terminate();
852 mSpeculativeScriptThread
= nsnull
;
856 NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser
)
858 NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser
)
859 NS_IMPL_CYCLE_COLLECTION_UNLINK_NSCOMPTR(mSink
)
860 NS_IMPL_CYCLE_COLLECTION_UNLINK_NSCOMPTR(mObserver
)
861 NS_IMPL_CYCLE_COLLECTION_UNLINK_END
863 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser
)
864 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_NSCOMPTR(mSink
)
865 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_NSCOMPTR(mObserver
)
866 CParserContext
*pc
= tmp
->mParserContext
;
868 cb
.NoteXPCOMChild(pc
->mDTD
);
869 cb
.NoteXPCOMChild(pc
->mTokenizer
);
870 pc
= pc
->mPrevContext
;
872 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
874 NS_IMPL_CYCLE_COLLECTING_ADDREF_AMBIGUOUS(nsParser
, nsIParser
)
875 NS_IMPL_CYCLE_COLLECTING_RELEASE_AMBIGUOUS(nsParser
, nsIParser
)
876 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser
)
877 NS_INTERFACE_MAP_ENTRY(nsIStreamListener
)
878 NS_INTERFACE_MAP_ENTRY(nsIParser
)
879 NS_INTERFACE_MAP_ENTRY(nsIRequestObserver
)
880 NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports
, nsIParser
)
883 // The parser continue event is posted only if
884 // all of the data to parse has been passed to ::OnDataAvailable
885 // and the parser has been interrupted by the content sink
886 // because the processing of tokens took too long.
889 nsParser::PostContinueEvent()
891 if (!(mFlags
& NS_PARSER_FLAG_PENDING_CONTINUE_EVENT
)) {
892 // If this flag isn't set, then there shouldn't be a live continue event!
893 NS_ASSERTION(!mContinueEvent
, "bad");
895 // This creates a reference cycle between this and the event that is
896 // broken when the event fires.
897 nsCOMPtr
<nsIRunnable
> event
= new nsParserContinueEvent(this);
898 if (NS_FAILED(NS_DispatchToCurrentThread(event
))) {
899 NS_WARNING("failed to dispatch parser continuation event");
901 mFlags
|= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT
;
902 mContinueEvent
= event
;
909 nsParser::SetParserFilter(nsIParserFilter
* aFilter
)
911 mParserFilter
= aFilter
;
915 nsParser::GetCommand(nsCString
& aCommand
)
917 aCommand
= mCommandStr
;
921 * Call this method once you've created a parser, and want to instruct it
922 * about the command which caused the parser to be constructed. For example,
923 * this allows us to select a DTD which can do, say, view-source.
925 * @param aCommand the command string to set
928 nsParser::SetCommand(const char* aCommand
)
930 mCommandStr
.Assign(aCommand
);
931 if (mCommandStr
.Equals(kViewSourceCommand
)) {
932 mCommand
= eViewSource
;
933 } else if (mCommandStr
.Equals(kViewFragmentCommand
)) {
934 mCommand
= eViewFragment
;
936 mCommand
= eViewNormal
;
941 * Call this method once you've created a parser, and want to instruct it
942 * about the command which caused the parser to be constructed. For example,
943 * this allows us to select a DTD which can do, say, view-source.
945 * @param aParserCommand the command to set
948 nsParser::SetCommand(eParserCommands aParserCommand
)
950 mCommand
= aParserCommand
;
954 * Call this method once you've created a parser, and want to instruct it
955 * about what charset to load
957 * @param aCharset- the charset of a document
958 * @param aCharsetSource- the source of the charset
961 nsParser::SetDocumentCharset(const nsACString
& aCharset
, PRInt32 aCharsetSource
)
964 mCharsetSource
= aCharsetSource
;
965 if (mParserContext
&& mParserContext
->mScanner
) {
966 mParserContext
->mScanner
->SetDocumentCharset(aCharset
, aCharsetSource
);
971 nsParser::SetSinkCharset(nsACString
& aCharset
)
974 mSink
->SetDocumentCharset(aCharset
);
979 * This method gets called in order to set the content
980 * sink for this parser to dump nodes to.
982 * @param nsIContentSink interface for node receiver
985 nsParser::SetContentSink(nsIContentSink
* aSink
)
987 NS_PRECONDITION(aSink
, "sink cannot be null!");
991 mSink
->SetParser(this);
996 * retrieve the sink set into the parser
997 * @return current sink
999 NS_IMETHODIMP_(nsIContentSink
*)
1000 nsParser::GetContentSink()
1006 * Retrieve parsemode from topmost parser context
1010 NS_IMETHODIMP_(nsDTDMode
)
1011 nsParser::GetParseMode()
1013 if (mParserContext
) {
1014 return mParserContext
->mDTDMode
;
1016 NS_NOTREACHED("no parser context");
1017 return eDTDMode_unknown
;
1021 * Determine what DTD mode (and thus what layout nsCompatibility mode)
1022 * to use for this document based on the first chunk of data received
1023 * from the network (each parsercontext can have its own mode). (No,
1024 * this is not an optimal solution -- we really don't need to know until
1025 * after we've received the DOCTYPE, and this could easily be part of
1026 * the regular parsing process if the parser were designed in a way that
1027 * made such modifications easy.)
1030 // Parse the PS production in the SGML spec (excluding the part dealing
1031 // with entity references) starting at theIndex into theBuffer, and
1032 // return the first index after the end of the production.
1034 ParsePS(const nsString
& aBuffer
, PRInt32 aIndex
)
1037 PRUnichar ch
= aBuffer
.CharAt(aIndex
);
1038 if ((ch
== PRUnichar(' ')) || (ch
== PRUnichar('\t')) ||
1039 (ch
== PRUnichar('\n')) || (ch
== PRUnichar('\r'))) {
1041 } else if (ch
== PRUnichar('-')) {
1043 if (aBuffer
.CharAt(aIndex
+1) == PRUnichar('-') &&
1044 kNotFound
!= (tmpIndex
=aBuffer
.Find("--",PR_FALSE
,aIndex
+2,-1))) {
1045 aIndex
= tmpIndex
+ 2;
1055 #define PARSE_DTD_HAVE_DOCTYPE (1<<0)
1056 #define PARSE_DTD_HAVE_PUBLIC_ID (1<<1)
1057 #define PARSE_DTD_HAVE_SYSTEM_ID (1<<2)
1058 #define PARSE_DTD_HAVE_INTERNAL_SUBSET (1<<3)
1060 // return PR_TRUE on success (includes not present), PR_FALSE on failure
1062 ParseDocTypeDecl(const nsString
&aBuffer
,
1063 PRInt32
*aResultFlags
,
1064 nsString
&aPublicID
,
1065 nsString
&aSystemID
)
1067 PRBool haveDoctype
= PR_FALSE
;
1070 // Skip through any comments and processing instructions
1071 // The PI-skipping is a bit of a hack.
1072 PRInt32 theIndex
= 0;
1074 theIndex
= aBuffer
.FindChar('<', theIndex
);
1075 if (theIndex
== kNotFound
) break;
1076 PRUnichar nextChar
= aBuffer
.CharAt(theIndex
+1);
1077 if (nextChar
== PRUnichar('!')) {
1078 PRInt32 tmpIndex
= theIndex
+ 2;
1080 (theIndex
=aBuffer
.Find("DOCTYPE", PR_TRUE
, tmpIndex
, 0))) {
1081 haveDoctype
= PR_TRUE
;
1082 theIndex
+= 7; // skip "DOCTYPE"
1085 theIndex
= ParsePS(aBuffer
, tmpIndex
);
1086 theIndex
= aBuffer
.FindChar('>', theIndex
);
1087 } else if (nextChar
== PRUnichar('?')) {
1088 theIndex
= aBuffer
.FindChar('>', theIndex
);
1092 } while (theIndex
!= kNotFound
);
1096 *aResultFlags
|= PARSE_DTD_HAVE_DOCTYPE
;
1098 theIndex
= ParsePS(aBuffer
, theIndex
);
1099 theIndex
= aBuffer
.Find("HTML", PR_TRUE
, theIndex
, 0);
1100 if (kNotFound
== theIndex
)
1102 theIndex
= ParsePS(aBuffer
, theIndex
+4);
1103 PRInt32 tmpIndex
= aBuffer
.Find("PUBLIC", PR_TRUE
, theIndex
, 0);
1105 if (kNotFound
!= tmpIndex
) {
1106 theIndex
= ParsePS(aBuffer
, tmpIndex
+6);
1108 // We get here only if we've read <!DOCTYPE HTML PUBLIC
1109 // (not case sensitive) possibly with comments within.
1111 // Now find the beginning and end of the public identifier
1112 // and the system identifier (if present).
1114 PRUnichar lit
= aBuffer
.CharAt(theIndex
);
1115 if ((lit
!= PRUnichar('\"')) && (lit
!= PRUnichar('\'')))
1118 // Start is the first character, excluding the quote, and End is
1119 // the final quote, so there are (end-start) characters.
1121 PRInt32 PublicIDStart
= theIndex
+ 1;
1122 PRInt32 PublicIDEnd
= aBuffer
.FindChar(lit
, PublicIDStart
);
1123 if (kNotFound
== PublicIDEnd
)
1125 theIndex
= ParsePS(aBuffer
, PublicIDEnd
+ 1);
1126 PRUnichar next
= aBuffer
.CharAt(theIndex
);
1127 if (next
== PRUnichar('>')) {
1128 // There was a public identifier, but no system
1131 // This is needed to avoid the else at the end, and it's
1132 // also the most common case.
1133 } else if ((next
== PRUnichar('\"')) ||
1134 (next
== PRUnichar('\''))) {
1135 // We found a system identifier.
1136 *aResultFlags
|= PARSE_DTD_HAVE_SYSTEM_ID
;
1137 PRInt32 SystemIDStart
= theIndex
+ 1;
1138 PRInt32 SystemIDEnd
= aBuffer
.FindChar(next
, SystemIDStart
);
1139 if (kNotFound
== SystemIDEnd
)
1142 Substring(aBuffer
, SystemIDStart
, SystemIDEnd
- SystemIDStart
);
1143 } else if (next
== PRUnichar('[')) {
1144 // We found an internal subset.
1145 *aResultFlags
|= PARSE_DTD_HAVE_INTERNAL_SUBSET
;
1147 // Something's wrong.
1151 // Since a public ID is a minimum literal, we must trim
1152 // and collapse whitespace
1153 aPublicID
= Substring(aBuffer
, PublicIDStart
, PublicIDEnd
- PublicIDStart
);
1154 aPublicID
.CompressWhitespace(PR_TRUE
, PR_TRUE
);
1155 *aResultFlags
|= PARSE_DTD_HAVE_PUBLIC_ID
;
1157 tmpIndex
=aBuffer
.Find("SYSTEM", PR_TRUE
, theIndex
, 0);
1158 if (kNotFound
!= tmpIndex
) {
1159 // DOCTYPES with system ID but no Public ID
1160 *aResultFlags
|= PARSE_DTD_HAVE_SYSTEM_ID
;
1162 theIndex
= ParsePS(aBuffer
, tmpIndex
+6);
1163 PRUnichar next
= aBuffer
.CharAt(theIndex
);
1164 if (next
!= PRUnichar('\"') && next
!= PRUnichar('\''))
1167 PRInt32 SystemIDStart
= theIndex
+ 1;
1168 PRInt32 SystemIDEnd
= aBuffer
.FindChar(next
, SystemIDStart
);
1170 if (kNotFound
== SystemIDEnd
)
1173 Substring(aBuffer
, SystemIDStart
, SystemIDEnd
- SystemIDStart
);
1174 theIndex
= ParsePS(aBuffer
, SystemIDEnd
+ 1);
1177 PRUnichar nextChar
= aBuffer
.CharAt(theIndex
);
1178 if (nextChar
== PRUnichar('['))
1179 *aResultFlags
|= PARSE_DTD_HAVE_INTERNAL_SUBSET
;
1180 else if (nextChar
!= PRUnichar('>'))
1189 eQuirks
, /* always quirks mode, unless there's an internal subset */
1190 eAlmostStandards
,/* eCompatibility_AlmostStandards */
1191 eFullStandards
/* eCompatibility_FullStandards */
1193 * public IDs that should trigger strict mode are not listed
1194 * since we want all future public IDs to trigger strict mode as
1200 eMode mode_if_no_sysid
;
1201 eMode mode_if_sysid
;
1204 #define ELEMENTS_OF(array_) (sizeof(array_)/sizeof(array_[0]))
1206 // These must be in nsCRT::strcmp order so binary-search can be used.
1207 // This is verified, |#ifdef DEBUG|, below.
1209 // Even though public identifiers should be case sensitive, we will do
1210 // all comparisons after converting to lower case in order to do
1211 // case-insensitive comparison since there are a number of existing web
1212 // sites that use the incorrect case. Therefore all of the public
1213 // identifiers below are in lower case (with the correct case following,
1214 // in comments). The case is verified, |#ifdef DEBUG|, below.
1215 static const PubIDInfo kPublicIDs
[] = {
1216 {"+//silmaril//dtd html pro v0r11 19970101//en" /* "+//Silmaril//dtd html Pro v0r11 19970101//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1217 {"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en" /* "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1218 {"-//as//dtd html 3.0 aswedit + extensions//en" /* "-//AS//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1219 {"-//ietf//dtd html 2.0 level 1//en" /* "-//IETF//DTD HTML 2.0 Level 1//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1220 {"-//ietf//dtd html 2.0 level 2//en" /* "-//IETF//DTD HTML 2.0 Level 2//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1221 {"-//ietf//dtd html 2.0 strict level 1//en" /* "-//IETF//DTD HTML 2.0 Strict Level 1//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1222 {"-//ietf//dtd html 2.0 strict level 2//en" /* "-//IETF//DTD HTML 2.0 Strict Level 2//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1223 {"-//ietf//dtd html 2.0 strict//en" /* "-//IETF//DTD HTML 2.0 Strict//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1224 {"-//ietf//dtd html 2.0//en" /* "-//IETF//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1225 {"-//ietf//dtd html 2.1e//en" /* "-//IETF//DTD HTML 2.1E//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1226 {"-//ietf//dtd html 3.0//en" /* "-//IETF//DTD HTML 3.0//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1227 {"-//ietf//dtd html 3.0//en//" /* "-//IETF//DTD HTML 3.0//EN//" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1228 {"-//ietf//dtd html 3.2 final//en" /* "-//IETF//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1229 {"-//ietf//dtd html 3.2//en" /* "-//IETF//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1230 {"-//ietf//dtd html 3//en" /* "-//IETF//DTD HTML 3//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1231 {"-//ietf//dtd html level 0//en" /* "-//IETF//DTD HTML Level 0//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1232 {"-//ietf//dtd html level 0//en//2.0" /* "-//IETF//DTD HTML Level 0//EN//2.0" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1233 {"-//ietf//dtd html level 1//en" /* "-//IETF//DTD HTML Level 1//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1234 {"-//ietf//dtd html level 1//en//2.0" /* "-//IETF//DTD HTML Level 1//EN//2.0" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1235 {"-//ietf//dtd html level 2//en" /* "-//IETF//DTD HTML Level 2//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1236 {"-//ietf//dtd html level 2//en//2.0" /* "-//IETF//DTD HTML Level 2//EN//2.0" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1237 {"-//ietf//dtd html level 3//en" /* "-//IETF//DTD HTML Level 3//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1238 {"-//ietf//dtd html level 3//en//3.0" /* "-//IETF//DTD HTML Level 3//EN//3.0" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1239 {"-//ietf//dtd html strict level 0//en" /* "-//IETF//DTD HTML Strict Level 0//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1240 {"-//ietf//dtd html strict level 0//en//2.0" /* "-//IETF//DTD HTML Strict Level 0//EN//2.0" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1241 {"-//ietf//dtd html strict level 1//en" /* "-//IETF//DTD HTML Strict Level 1//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1242 {"-//ietf//dtd html strict level 1//en//2.0" /* "-//IETF//DTD HTML Strict Level 1//EN//2.0" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1243 {"-//ietf//dtd html strict level 2//en" /* "-//IETF//DTD HTML Strict Level 2//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1244 {"-//ietf//dtd html strict level 2//en//2.0" /* "-//IETF//DTD HTML Strict Level 2//EN//2.0" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1245 {"-//ietf//dtd html strict level 3//en" /* "-//IETF//DTD HTML Strict Level 3//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1246 {"-//ietf//dtd html strict level 3//en//3.0" /* "-//IETF//DTD HTML Strict Level 3//EN//3.0" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1247 {"-//ietf//dtd html strict//en" /* "-//IETF//DTD HTML Strict//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1248 {"-//ietf//dtd html strict//en//2.0" /* "-//IETF//DTD HTML Strict//EN//2.0" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1249 {"-//ietf//dtd html strict//en//3.0" /* "-//IETF//DTD HTML Strict//EN//3.0" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1250 {"-//ietf//dtd html//en" /* "-//IETF//DTD HTML//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1251 {"-//ietf//dtd html//en//2.0" /* "-//IETF//DTD HTML//EN//2.0" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1252 {"-//ietf//dtd html//en//3.0" /* "-//IETF//DTD HTML//EN//3.0" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1253 {"-//metrius//dtd metrius presentational//en" /* "-//Metrius//DTD Metrius Presentational//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1254 {"-//microsoft//dtd internet explorer 2.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1255 {"-//microsoft//dtd internet explorer 2.0 html//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1256 {"-//microsoft//dtd internet explorer 2.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 2.0 Tables//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1257 {"-//microsoft//dtd internet explorer 3.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1258 {"-//microsoft//dtd internet explorer 3.0 html//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1259 {"-//microsoft//dtd internet explorer 3.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 3.0 Tables//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1260 {"-//netscape comm. corp.//dtd html//en" /* "-//Netscape Comm. Corp.//DTD HTML//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1261 {"-//netscape comm. corp.//dtd strict html//en" /* "-//Netscape Comm. Corp.//DTD Strict HTML//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1262 {"-//o'reilly and associates//dtd html 2.0//en" /* "-//O'Reilly and Associates//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1263 {"-//o'reilly and associates//dtd html extended 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended 1.0//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1264 {"-//o'reilly and associates//dtd html extended relaxed 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1265 {"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//en" /* "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1266 {"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//en" /* "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1267 {"-//spyglass//dtd html 2.0 extended//en" /* "-//Spyglass//DTD HTML 2.0 Extended//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1268 {"-//sq//dtd html 2.0 hotmetal + extensions//en" /* "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1269 {"-//sun microsystems corp.//dtd hotjava html//en" /* "-//Sun Microsystems Corp.//DTD HotJava HTML//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1270 {"-//sun microsystems corp.//dtd hotjava strict html//en" /* "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1271 {"-//w3c//dtd html 3 1995-03-24//en" /* "-//W3C//DTD HTML 3 1995-03-24//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1272 {"-//w3c//dtd html 3.2 draft//en" /* "-//W3C//DTD HTML 3.2 Draft//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1273 {"-//w3c//dtd html 3.2 final//en" /* "-//W3C//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1274 {"-//w3c//dtd html 3.2//en" /* "-//W3C//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1275 {"-//w3c//dtd html 3.2s draft//en" /* "-//W3C//DTD HTML 3.2S Draft//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1276 {"-//w3c//dtd html 4.0 frameset//en" /* "-//W3C//DTD HTML 4.0 Frameset//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1277 {"-//w3c//dtd html 4.0 transitional//en" /* "-//W3C//DTD HTML 4.0 Transitional//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1278 {"-//w3c//dtd html 4.01 frameset//en" /* "-//W3C//DTD HTML 4.01 Frameset//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eAlmostStandards
},
1279 {"-//w3c//dtd html 4.01 transitional//en" /* "-//W3C//DTD HTML 4.01 Transitional//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eAlmostStandards
},
1280 {"-//w3c//dtd html experimental 19960712//en" /* "-//W3C//DTD HTML Experimental 19960712//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1281 {"-//w3c//dtd html experimental 970421//en" /* "-//W3C//DTD HTML Experimental 970421//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1282 {"-//w3c//dtd w3 html//en" /* "-//W3C//DTD W3 HTML//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1283 {"-//w3c//dtd xhtml 1.0 frameset//en" /* "-//W3C//DTD XHTML 1.0 Frameset//EN" */, PubIDInfo::eAlmostStandards
, PubIDInfo::eAlmostStandards
},
1284 {"-//w3c//dtd xhtml 1.0 transitional//en" /* "-//W3C//DTD XHTML 1.0 Transitional//EN" */, PubIDInfo::eAlmostStandards
, PubIDInfo::eAlmostStandards
},
1285 {"-//w3o//dtd w3 html 3.0//en" /* "-//W3O//DTD W3 HTML 3.0//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1286 {"-//w3o//dtd w3 html 3.0//en//" /* "-//W3O//DTD W3 HTML 3.0//EN//" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1287 {"-//w3o//dtd w3 html strict 3.0//en//" /* "-//W3O//DTD W3 HTML Strict 3.0//EN//" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1288 {"-//webtechs//dtd mozilla html 2.0//en" /* "-//WebTechs//DTD Mozilla HTML 2.0//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1289 {"-//webtechs//dtd mozilla html//en" /* "-//WebTechs//DTD Mozilla HTML//EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1290 {"-/w3c/dtd html 4.0 transitional/en" /* "-/W3C/DTD HTML 4.0 Transitional/EN" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1291 {"html" /* "HTML" */, PubIDInfo::eQuirks
, PubIDInfo::eQuirks
},
1298 static PRBool gVerified
= PR_FALSE
;
1300 gVerified
= PR_TRUE
;
1302 for (i
= 0; i
< ELEMENTS_OF(kPublicIDs
) - 1; ++i
) {
1303 if (nsCRT::strcmp(kPublicIDs
[i
].name
, kPublicIDs
[i
+1].name
) >= 0) {
1304 NS_NOTREACHED("doctypes out of order");
1305 printf("Doctypes %s and %s out of order.\n",
1306 kPublicIDs
[i
].name
, kPublicIDs
[i
+1].name
);
1309 for (i
= 0; i
< ELEMENTS_OF(kPublicIDs
); ++i
) {
1310 nsCAutoString
lcPubID(kPublicIDs
[i
].name
);
1311 ToLowerCase(lcPubID
);
1312 if (nsCRT::strcmp(kPublicIDs
[i
].name
, lcPubID
.get()) != 0) {
1313 NS_NOTREACHED("doctype not lower case");
1314 printf("Doctype %s not lower case.\n", kPublicIDs
[i
].name
);
1322 DetermineHTMLParseMode(const nsString
& aBuffer
,
1323 nsDTDMode
& aParseMode
,
1324 eParserDocType
& aDocType
)
1329 PRInt32 resultFlags
;
1330 nsAutoString publicIDUCS2
, sysIDUCS2
;
1331 if (ParseDocTypeDecl(aBuffer
, &resultFlags
, publicIDUCS2
, sysIDUCS2
)) {
1332 if (!(resultFlags
& PARSE_DTD_HAVE_DOCTYPE
)) {
1334 aParseMode
= eDTDMode_quirks
;
1335 aDocType
= eHTML_Quirks
;
1336 } else if ((resultFlags
& PARSE_DTD_HAVE_INTERNAL_SUBSET
) ||
1337 !(resultFlags
& PARSE_DTD_HAVE_PUBLIC_ID
)) {
1338 // A doctype with an internal subset is always full_standards.
1339 // A doctype without a public ID is always full_standards.
1340 aDocType
= eHTML_Strict
;
1341 aParseMode
= eDTDMode_full_standards
;
1343 // Special hack for IBM's custom DOCTYPE.
1344 if (!(resultFlags
& PARSE_DTD_HAVE_INTERNAL_SUBSET
) &&
1345 sysIDUCS2
== NS_LITERAL_STRING(
1346 "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")) {
1347 aParseMode
= eDTDMode_quirks
;
1348 aDocType
= eHTML_Quirks
;
1352 // We have to check our list of public IDs to see what to do.
1353 // Yes, we want UCS2 to ASCII lossy conversion.
1354 nsCAutoString publicID
;
1355 publicID
.AssignWithConversion(publicIDUCS2
);
1357 // See comment above definition of kPublicIDs about case
1359 ToLowerCase(publicID
);
1361 // Binary search to see if we can find the correct public ID
1362 // These must be signed since maximum can go below zero and we'll
1363 // crash if it's unsigned.
1364 PRInt32 minimum
= 0;
1365 PRInt32 maximum
= ELEMENTS_OF(kPublicIDs
) - 1;
1368 index
= (minimum
+ maximum
) / 2;
1369 PRInt32 comparison
=
1370 nsCRT::strcmp(publicID
.get(), kPublicIDs
[index
].name
);
1371 if (comparison
== 0)
1374 maximum
= index
- 1;
1376 minimum
= index
+ 1;
1378 if (maximum
< minimum
) {
1379 // The DOCTYPE is not in our list, so it must be full_standards.
1380 aParseMode
= eDTDMode_full_standards
;
1381 aDocType
= eHTML_Strict
;
1386 switch ((resultFlags
& PARSE_DTD_HAVE_SYSTEM_ID
)
1387 ? kPublicIDs
[index
].mode_if_sysid
1388 : kPublicIDs
[index
].mode_if_no_sysid
)
1390 case PubIDInfo::eQuirks
:
1391 aParseMode
= eDTDMode_quirks
;
1392 aDocType
= eHTML_Quirks
;
1394 case PubIDInfo::eAlmostStandards
:
1395 aParseMode
= eDTDMode_almost_standards
;
1396 aDocType
= eHTML_Strict
;
1398 case PubIDInfo::eFullStandards
:
1399 aParseMode
= eDTDMode_full_standards
;
1400 aDocType
= eHTML_Strict
;
1403 NS_NOTREACHED("no other cases!");
1407 // badly formed DOCTYPE -> quirks
1408 aParseMode
= eDTDMode_quirks
;
1409 aDocType
= eHTML_Quirks
;
1414 DetermineParseMode(const nsString
& aBuffer
, nsDTDMode
& aParseMode
,
1415 eParserDocType
& aDocType
, const nsACString
& aMimeType
)
1417 if (aMimeType
.EqualsLiteral(kHTMLTextContentType
)) {
1418 DetermineHTMLParseMode(aBuffer
, aParseMode
, aDocType
);
1419 } else if (aMimeType
.EqualsLiteral(kPlainTextContentType
) ||
1420 aMimeType
.EqualsLiteral(kTextCSSContentType
) ||
1421 aMimeType
.EqualsLiteral(kApplicationJSContentType
) ||
1422 aMimeType
.EqualsLiteral(kApplicationXJSContentType
) ||
1423 aMimeType
.EqualsLiteral(kTextECMAScriptContentType
) ||
1424 aMimeType
.EqualsLiteral(kApplicationECMAScriptContentType
) ||
1425 aMimeType
.EqualsLiteral(kTextJSContentType
)) {
1426 aDocType
= ePlainText
;
1427 aParseMode
= eDTDMode_quirks
;
1428 } else { // Some form of XML
1430 aParseMode
= eDTDMode_full_standards
;
1435 FindSuitableDTD(CParserContext
& aParserContext
)
1437 NS_ASSERTION(!aParserContext
.mDTD
, "Already found a DTD");
1439 // We always find a DTD.
1440 aParserContext
.mAutoDetectStatus
= ePrimaryDetect
;
1442 #ifdef MOZ_VIEW_SOURCE
1443 // Quick check for view source.
1444 if (aParserContext
.mParserCommand
== eViewSource
) {
1445 aParserContext
.mDTD
= new CViewSourceHTML();
1446 return aParserContext
.mDTD
? NS_OK
: NS_ERROR_OUT_OF_MEMORY
;
1450 // Now see if we're parsing HTML (which, as far as we're concerned, simply
1451 // means "not XML").
1452 if (aParserContext
.mDocType
!= eXML
) {
1453 aParserContext
.mDTD
= new CNavDTD();
1454 return aParserContext
.mDTD
? NS_OK
: NS_ERROR_OUT_OF_MEMORY
;
1457 // If we're here, then we'd better be parsing XML.
1458 NS_ASSERTION(aParserContext
.mDocType
== eXML
, "What are you trying to send me, here?");
1459 aParserContext
.mDTD
= new nsExpatDriver();
1460 return aParserContext
.mDTD
? NS_OK
: NS_ERROR_OUT_OF_MEMORY
;
1464 nsParser::CancelParsingEvents()
1466 if (mFlags
& NS_PARSER_FLAG_PENDING_CONTINUE_EVENT
) {
1467 NS_ASSERTION(mContinueEvent
, "mContinueEvent is null");
1468 // Revoke the pending continue parsing event
1469 mContinueEvent
= nsnull
;
1470 mFlags
&= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT
;
1475 ////////////////////////////////////////////////////////////////////////
1479 * This gets called just prior to the model actually
1480 * being constructed. It's important to make this the
1481 * last thing that happens right before parsing, so we
1482 * can delay until the last moment the resolution of
1483 * which DTD to use (unless of course we're assigned one).
1486 nsParser::WillBuildModel(nsString
& aFilename
)
1488 if (!mParserContext
)
1489 return kInvalidParserContext
;
1491 if (eUnknownDetect
!= mParserContext
->mAutoDetectStatus
)
1494 if (eDTDMode_unknown
== mParserContext
->mDTDMode
||
1495 eDTDMode_autodetect
== mParserContext
->mDTDMode
) {
1496 PRUnichar buf
[1025];
1497 nsFixedString
theBuffer(buf
, 1024, 0);
1499 // Grab 1024 characters, starting at the first non-whitespace
1500 // character, to look for the doctype in.
1501 mParserContext
->mScanner
->Peek(theBuffer
, 1024, mParserContext
->mScanner
->FirstNonWhitespacePosition());
1502 DetermineParseMode(theBuffer
, mParserContext
->mDTDMode
,
1503 mParserContext
->mDocType
, mParserContext
->mMimeType
);
1506 nsresult rv
= FindSuitableDTD(*mParserContext
);
1507 NS_ENSURE_SUCCESS(rv
, rv
);
1509 nsITokenizer
* tokenizer
;
1510 rv
= mParserContext
->GetTokenizer(mParserContext
->mDTD
->GetType(), mSink
, tokenizer
);
1511 NS_ENSURE_SUCCESS(rv
, rv
);
1513 return mParserContext
->mDTD
->WillBuildModel(*mParserContext
, tokenizer
, mSink
);
1517 * This gets called when the parser is done with its input.
1518 * Note that the parser may have been called recursively, so we
1519 * have to check for a prev. context before closing out the DTD/sink.
1522 nsParser::DidBuildModel(nsresult anErrorCode
)
1524 nsresult result
= anErrorCode
;
1527 if (mParserContext
&& !mParserContext
->mPrevContext
) {
1528 if (mParserContext
->mDTD
) {
1529 result
= mParserContext
->mDTD
->DidBuildModel(anErrorCode
,PR_TRUE
,this,mSink
);
1532 //Ref. to bug 61462.
1533 mParserContext
->mRequest
= 0;
1541 nsParser::SpeculativelyParse()
1543 if (mParserContext
->mParserCommand
== eViewNormal
&&
1544 !mParserContext
->mMimeType
.EqualsLiteral("text/html")) {
1548 if (!mSpeculativeScriptThread
) {
1549 mSpeculativeScriptThread
= new nsSpeculativeScriptThread();
1550 if (!mSpeculativeScriptThread
) {
1555 nsresult rv
= mSpeculativeScriptThread
->StartParsing(this);
1556 if (NS_FAILED(rv
)) {
1557 mSpeculativeScriptThread
= nsnull
;
1562 * This method adds a new parser context to the list,
1563 * pushing the current one to the next position.
1565 * @param ptr to new context
1568 nsParser::PushContext(CParserContext
& aContext
)
1570 aContext
.mPrevContext
= mParserContext
;
1571 mParserContext
= &aContext
;
1575 * This method pops the topmost context off the stack,
1576 * returning it to the user. The next context (if any)
1577 * becomes the current context.
1578 * @update gess7/22/98
1579 * @return prev. context
1582 nsParser::PopContext()
1584 CParserContext
* oldContext
= mParserContext
;
1586 mParserContext
= oldContext
->mPrevContext
;
1587 if (mParserContext
) {
1588 // If the old context was blocked, propagate the blocked state
1589 // back to the new one. Also, propagate the stream listener state
1590 // but don't override onStop state to guarantee the call to DidBuildModel().
1591 if (mParserContext
->mStreamListenerState
!= eOnStop
) {
1592 mParserContext
->mStreamListenerState
= oldContext
->mStreamListenerState
;
1594 // Update the current context's tokenizer to any information gleaned
1595 // while parsing document.write() calls (such as "a plaintext tag was
1597 if (mParserContext
->mTokenizer
) {
1598 mParserContext
->mTokenizer
->CopyState(oldContext
->mTokenizer
);
1606 * Call this when you want control whether or not the parser will parse
1607 * and tokenize input (TRUE), or whether it just caches input to be
1608 * parsed later (FALSE).
1610 * @param aState determines whether we parse/tokenize or just cache.
1611 * @return current state
1614 nsParser::SetUnusedInput(nsString
& aBuffer
)
1616 mUnusedInput
= aBuffer
;
1619 NS_IMETHODIMP_(void *)
1620 nsParser::GetRootContextKey()
1622 CParserContext
* pc
= mParserContext
;
1627 while (pc
->mPrevContext
) {
1628 pc
= pc
->mPrevContext
;
1635 * Call this when you want to *force* the parser to terminate the
1636 * parsing process altogether. This is binary -- so once you terminate
1637 * you can't resume without restarting altogether.
1640 nsParser::Terminate(void)
1642 // We should only call DidBuildModel once, so don't do anything if this is
1643 // the second time that Terminate has been called.
1644 if (mInternalState
== NS_ERROR_HTMLPARSER_STOPPARSING
) {
1648 nsresult result
= NS_OK
;
1649 // XXX - [ until we figure out a way to break parser-sink circularity ]
1650 // Hack - Hold a reference until we are completely done...
1651 nsCOMPtr
<nsIParser
> kungFuDeathGrip(this);
1652 mInternalState
= result
= NS_ERROR_HTMLPARSER_STOPPARSING
;
1654 // CancelParsingEvents must be called to avoid leaking the nsParser object
1656 // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then CancelParsingEvents
1657 // will reset it so DidBuildModel will call DidBuildModel on the DTD. Note:
1658 // The IsComplete() call inside of DidBuildModel looks at the pendingContinueEvents flag.
1659 CancelParsingEvents();
1660 if (mSpeculativeScriptThread
) {
1661 mSpeculativeScriptThread
->Terminate();
1662 mSpeculativeScriptThread
= nsnull
;
1665 // If we got interrupted in the middle of a document.write, then we might
1666 // have more than one parser context on our parsercontext stack. This has
1667 // the effect of making DidBuildModel a no-op, meaning that we never call
1668 // our sink's DidBuildModel and break the reference cycle, causing a leak.
1669 // Since we're getting terminated, we manually clean up our context stack.
1670 while (mParserContext
&& mParserContext
->mPrevContext
) {
1671 CParserContext
*prev
= mParserContext
->mPrevContext
;
1672 NS_ASSERTION(prev
->mPrevContext
|| prev
->mDTD
, "How is there no root DTD?");
1674 delete mParserContext
;
1675 mParserContext
= prev
;
1678 if (mParserContext
&& mParserContext
->mDTD
) {
1679 mParserContext
->mDTD
->Terminate();
1680 DidBuildModel(result
);
1682 // We have no parser context or no DTD yet (so we got terminated before we
1683 // got any data). Manually break the reference cycle with the sink.
1684 result
= mSink
->DidBuildModel();
1685 NS_ENSURE_SUCCESS(result
, result
);
1692 nsParser::ContinueParsing()
1694 if (mFlags
& NS_PARSER_FLAG_PARSER_ENABLED
) {
1695 NS_WARNING("Trying to continue parsing on a unblocked parser.");
1699 mFlags
|= NS_PARSER_FLAG_PARSER_ENABLED
;
1701 return ContinueInterruptedParsing();
1705 nsParser::ContinueInterruptedParsing()
1707 // If there are scripts executing, then the content sink is jumping the gun
1708 // (probably due to a synchronous XMLHttpRequest) and will re-enable us
1709 // later, see bug 460706.
1710 if (mScriptsExecuting
) {
1714 // If the stream has already finished, there's a good chance
1715 // that we might start closing things down when the parser
1716 // is reenabled. To make sure that we're not deleted across
1717 // the reenabling process, hold a reference to ourselves.
1718 nsresult result
=NS_OK
;
1719 nsCOMPtr
<nsIParser
> kungFuDeathGrip(this);
1722 if (!(mFlags
& NS_PARSER_FLAG_PARSER_ENABLED
)) {
1723 NS_WARNING("Don't call ContinueInterruptedParsing on a blocked parser.");
1727 if (mSpeculativeScriptThread
) {
1728 mSpeculativeScriptThread
->StopParsing(PR_FALSE
);
1731 PRBool isFinalChunk
= mParserContext
&&
1732 mParserContext
->mStreamListenerState
== eOnStop
;
1737 result
= ResumeParse(PR_TRUE
, isFinalChunk
); // Ref. bug 57999
1739 if (result
!= NS_OK
) {
1740 result
=mInternalState
;
1747 * Stops parsing temporarily. That's it will prevent the
1748 * parser from building up content model.
1750 NS_IMETHODIMP_(void)
1751 nsParser::BlockParser()
1753 mFlags
&= ~NS_PARSER_FLAG_PARSER_ENABLED
;
1754 MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: nsParser::BlockParser(), this=%p\n", this));
1755 MOZ_TIMER_STOP(mParseTime
);
1759 * Open up the parser for tokenization, building up content
1760 * model..etc. However, this method does not resume parsing
1761 * automatically. It's the callers' responsibility to restart
1762 * the parsing engine.
1764 NS_IMETHODIMP_(void)
1765 nsParser::UnblockParser()
1767 if (!(mFlags
& NS_PARSER_FLAG_PARSER_ENABLED
)) {
1768 mFlags
|= NS_PARSER_FLAG_PARSER_ENABLED
;
1769 MOZ_TIMER_DEBUGLOG(("Start: Parse Time: nsParser::UnblockParser(), this=%p\n", this));
1770 MOZ_TIMER_START(mParseTime
);
1772 NS_WARNING("Trying to unblock an unblocked parser.");
1777 * Call this to query whether the parser is enabled or not.
1779 NS_IMETHODIMP_(PRBool
)
1780 nsParser::IsParserEnabled()
1782 return (mFlags
& NS_PARSER_FLAG_PARSER_ENABLED
) != 0;
1786 * Call this to query whether the parser thinks it's done with parsing.
1788 NS_IMETHODIMP_(PRBool
)
1789 nsParser::IsComplete()
1791 return !(mFlags
& NS_PARSER_FLAG_PENDING_CONTINUE_EVENT
);
1795 void nsParser::HandleParserContinueEvent(nsParserContinueEvent
*ev
)
1797 // Ignore any revoked continue events...
1798 if (mContinueEvent
!= ev
)
1801 mFlags
&= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT
;
1802 mContinueEvent
= nsnull
;
1804 NS_ASSERTION(mScriptsExecuting
== 0, "Interrupted in the middle of a script?");
1805 ContinueInterruptedParsing();
1809 nsParser::ScriptExecuting()
1811 ++mScriptsExecuting
;
1815 nsParser::ScriptDidExecute()
1817 NS_ASSERTION(mScriptsExecuting
> 0, "Too many calls to ScriptDidExecute");
1818 --mScriptsExecuting
;
1822 nsParser::DataAdded(const nsSubstring
& aData
, nsIRequest
*aRequest
)
1824 NS_ASSERTION(sParserDataListeners
,
1825 "Don't call this with no parser data listeners!");
1827 if (!mSink
|| !aRequest
) {
1831 nsISupports
*ctx
= mSink
->GetTarget();
1832 PRInt32 count
= sParserDataListeners
->Count();
1833 nsresult rv
= NS_OK
;
1834 PRBool canceled
= PR_FALSE
;
1837 rv
|= sParserDataListeners
->ObjectAt(count
)->
1838 OnUnicharDataAvailable(aRequest
, ctx
, aData
);
1840 if (NS_FAILED(rv
) && !canceled
) {
1841 aRequest
->Cancel(rv
);
1851 nsParser::CanInterrupt()
1853 return (mFlags
& NS_PARSER_FLAG_CAN_INTERRUPT
) != 0;
1857 nsParser::SetCanInterrupt(PRBool aCanInterrupt
)
1859 if (aCanInterrupt
) {
1860 mFlags
|= NS_PARSER_FLAG_CAN_INTERRUPT
;
1862 mFlags
&= ~NS_PARSER_FLAG_CAN_INTERRUPT
;
1867 * This is the main controlling routine in the parsing process.
1868 * Note that it may get called multiple times for the same scanner,
1869 * since this is a pushed based system, and all the tokens may
1870 * not have been consumed by the scanner during a given invocation
1874 nsParser::Parse(nsIURI
* aURL
,
1875 nsIRequestObserver
* aListener
,
1880 NS_PRECONDITION(aURL
, "Error: Null URL given");
1881 NS_ASSERTION(!mSpeculativeScriptThread
, "Can't reuse a parser like this");
1883 nsresult result
=kBadURL
;
1884 mObserver
= aListener
;
1888 nsresult rv
= aURL
->GetSpec(spec
);
1892 NS_ConvertUTF8toUTF16
theName(spec
);
1894 nsScanner
* theScanner
= new nsScanner(theName
, PR_FALSE
, mCharset
,
1896 CParserContext
* pc
= new CParserContext(theScanner
, aKey
, mCommand
,
1898 if (pc
&& theScanner
) {
1899 pc
->mMultipart
= PR_TRUE
;
1900 pc
->mContextType
= CParserContext::eCTURL
;
1901 pc
->mDTDMode
= aMode
;
1904 // Here, and only here, hand this parser off to the scanner. We
1905 // only want to do that here since the only reason the scanner
1906 // needs the parser is to call DataAdded() on it, and that's
1907 // only ever wanted when parsing from an URI.
1908 theScanner
->SetParser(this);
1912 result
= mInternalState
= NS_ERROR_HTMLPARSER_BADCONTEXT
;
1919 * Call this method if all you want to do is parse 1 string full of HTML text.
1920 * In particular, this method should be called by the DOM when it has an HTML
1921 * string to feed to the parser in real-time.
1923 * @param aSourceBuffer contains a string-full of real content
1924 * @param aMimeType tells us what type of content to expect in the given string
1927 nsParser::Parse(const nsAString
& aSourceBuffer
,
1929 const nsACString
& aMimeType
,
1933 nsresult result
= NS_OK
;
1935 // Don't bother if we're never going to parse this.
1936 if (mInternalState
== NS_ERROR_HTMLPARSER_STOPPARSING
) {
1940 if (!aLastCall
&& aSourceBuffer
.IsEmpty()) {
1941 // Nothing is being passed to the parser so return
1942 // immediately. mUnusedInput will get processed when
1943 // some data is actually passed in.
1944 // But if this is the last call, make sure to finish up
1949 if (mSpeculativeScriptThread
) {
1950 mSpeculativeScriptThread
->StopParsing(PR_TRUE
);
1953 // Hack to pass on to the dtd the caller's desire to
1954 // parse a fragment without worrying about containment rules
1955 if (aMode
== eDTDMode_fragment
)
1956 mCommand
= eViewFragment
;
1958 // Maintain a reference to ourselves so we don't go away
1959 // till we're completely done.
1960 nsCOMPtr
<nsIParser
> kungFuDeathGrip(this);
1962 if (aLastCall
|| !aSourceBuffer
.IsEmpty() || !mUnusedInput
.IsEmpty()) {
1963 // Note: The following code will always find the parser context associated
1964 // with the given key, even if that context has been suspended (e.g., for
1965 // another document.write call). This doesn't appear to be exactly what IE
1966 // does in the case where this happens, but this makes more sense.
1967 CParserContext
* pc
= mParserContext
;
1968 while (pc
&& pc
->mKey
!= aKey
) {
1969 pc
= pc
->mPrevContext
;
1973 // Only make a new context if we don't have one, OR if we do, but has a
1974 // different context key.
1975 nsScanner
* theScanner
= new nsScanner(mUnusedInput
, mCharset
, mCharsetSource
);
1976 NS_ENSURE_TRUE(theScanner
, NS_ERROR_OUT_OF_MEMORY
);
1978 nsIDTD
*theDTD
= nsnull
;
1979 eAutoDetectResult theStatus
= eUnknownDetect
;
1981 if (mParserContext
&& mParserContext
->mMimeType
== aMimeType
) {
1983 NS_ASSERTION(mParserContext
->mDTD
, "How come the DTD is null?");
1985 if (mParserContext
) {
1986 // To fix bug 32263 we used create a new instance of the DTD!.
1987 // All we need is a new tokenizer which now gets created with
1988 // a parser context.
1989 theDTD
= mParserContext
->mDTD
;
1990 theStatus
= mParserContext
->mAutoDetectStatus
;
1991 // Added this to fix bug 32022.
1995 pc
= new CParserContext(theScanner
, aKey
, mCommand
,
1996 0, theDTD
, theStatus
, aLastCall
);
1997 NS_ENSURE_TRUE(pc
, NS_ERROR_OUT_OF_MEMORY
);
2001 pc
->mMultipart
= !aLastCall
; // By default
2002 if (pc
->mPrevContext
) {
2003 pc
->mMultipart
|= pc
->mPrevContext
->mMultipart
;
2006 // Start fix bug 40143
2007 if (pc
->mMultipart
) {
2008 pc
->mStreamListenerState
= eOnDataAvail
;
2010 pc
->mScanner
->SetIncremental(PR_TRUE
);
2013 pc
->mStreamListenerState
= eOnStop
;
2015 pc
->mScanner
->SetIncremental(PR_FALSE
);
2018 // end fix for 40143
2020 pc
->mContextType
=CParserContext::eCTString
;
2021 pc
->SetMimeType(aMimeType
);
2022 if (pc
->mPrevContext
&& aMode
== eDTDMode_autodetect
) {
2023 // Preserve the DTD mode from the last context, bug 265814.
2024 pc
->mDTDMode
= pc
->mPrevContext
->mDTDMode
;
2026 pc
->mDTDMode
= aMode
;
2029 mUnusedInput
.Truncate();
2031 pc
->mScanner
->Append(aSourceBuffer
);
2032 // Do not interrupt document.write() - bug 95487
2033 result
= ResumeParse(PR_FALSE
, PR_FALSE
, PR_FALSE
);
2035 pc
->mScanner
->Append(aSourceBuffer
);
2036 if (!pc
->mPrevContext
) {
2037 // Set stream listener state to eOnStop, on the final context - Fix 68160,
2038 // to guarantee DidBuildModel() call - Fix 36148
2040 pc
->mStreamListenerState
= eOnStop
;
2041 pc
->mScanner
->SetIncremental(PR_FALSE
);
2044 if (pc
== mParserContext
) {
2045 // If pc is not mParserContext, then this call to ResumeParse would
2046 // do the wrong thing and try to continue parsing using
2047 // mParserContext. We need to wait to actually resume parsing on pc.
2048 ResumeParse(PR_FALSE
, PR_FALSE
, PR_FALSE
);
2058 nsParser::ParseFragment(const nsAString
& aSourceBuffer
,
2060 nsTArray
<nsString
>& aTagStack
,
2062 const nsACString
& aMimeType
,
2065 nsresult result
= NS_OK
;
2066 nsAutoString theContext
;
2067 PRUint32 theCount
= aTagStack
.Length();
2068 PRUint32 theIndex
= 0;
2070 // Disable observers for fragments
2071 mFlags
&= ~NS_PARSER_FLAG_OBSERVERS_ENABLED
;
2073 NS_ASSERTION(!mSpeculativeScriptThread
, "Can't reuse a parser like this");
2075 for (theIndex
= 0; theIndex
< theCount
; theIndex
++) {
2076 theContext
.AppendLiteral("<");
2077 theContext
.Append(aTagStack
[theCount
- theIndex
- 1]);
2078 theContext
.AppendLiteral(">");
2081 if (theCount
== 0) {
2082 // Ensure that the buffer is not empty. Because none of the DTDs care
2083 // about leading whitespace, this doesn't change the result.
2084 theContext
.AssignLiteral(" ");
2087 // First, parse the context to build up the DTD's tag stack. Note that we
2088 // pass PR_FALSE for the aLastCall parameter.
2089 result
= Parse(theContext
, (void*)&theContext
, aMimeType
, PR_FALSE
, aMode
);
2090 if (NS_FAILED(result
)) {
2091 mFlags
|= NS_PARSER_FLAG_OBSERVERS_ENABLED
;
2095 nsCOMPtr
<nsIFragmentContentSink
> fragSink
= do_QueryInterface(mSink
);
2096 NS_ASSERTION(fragSink
, "ParseFragment requires a fragment content sink");
2098 if (!aXMLMode
&& theCount
) {
2099 // First, we have to flush any tags that don't belong in the head if there
2100 // was no <body> in the context.
2101 // XXX This is extremely ugly. Maybe CNavDTD should have FlushMisplaced()?
2102 NS_ASSERTION(mParserContext
, "Parsing didn't create a parser context?");
2104 CNavDTD
* dtd
= static_cast<CNavDTD
*>
2105 (static_cast<nsIDTD
*>
2106 (mParserContext
->mDTD
));
2107 NS_ASSERTION(dtd
, "How did we parse anything without a dtd?");
2109 CStartToken
bodyToken(NS_LITERAL_STRING("BODY"), eHTMLTag_body
);
2110 nsCParserNode
bodyNode(&bodyToken
, 0);
2112 dtd
->OpenContainer(&bodyNode
, eHTMLTag_body
);
2114 // Now parse the flushed out tags.
2115 result
= BuildModel();
2116 if (NS_FAILED(result
)) {
2117 mFlags
|= NS_PARSER_FLAG_OBSERVERS_ENABLED
;
2121 // Now that we've flushed all of the tags out of the body, we have to make
2122 // sure that there aren't any context tags left in the scanner.
2123 NS_ASSERTION(mParserContext
->mScanner
, "Where'd the scanner go?");
2126 if (NS_SUCCEEDED(mParserContext
->mScanner
->Peek(next
))) {
2127 // Uh, oh. This must mean that the context stack has a special tag on
2128 // it, such as <textarea> or <title> that requires its end tag before it
2129 // will be consumed. Tell the content sink that it will be coming.
2130 // Note: For now, we can assume that there is only one such tag.
2131 NS_ASSERTION(next
== '<', "The tokenizer failed to consume a token");
2132 fragSink
->IgnoreFirstContainer();
2136 fragSink
->WillBuildContent();
2137 // Now, parse the actual content. Note that this is the last call
2138 // for HTML content, but for XML, we will want to build and parse
2139 // the end tags. However, if tagStack is empty, it's the last call
2141 if (!aXMLMode
|| (theCount
== 0)) {
2142 result
= Parse(aSourceBuffer
, &theContext
, aMimeType
,
2144 fragSink
->DidBuildContent();
2146 // Add an end tag chunk, so expat will read the whole source buffer,
2147 // and not worry about ']]' etc.
2148 result
= Parse(aSourceBuffer
+ NS_LITERAL_STRING("</"),
2149 &theContext
, aMimeType
, PR_FALSE
, aMode
);
2150 fragSink
->DidBuildContent();
2152 if (NS_SUCCEEDED(result
)) {
2153 nsAutoString endContext
;
2154 for (theIndex
= 0; theIndex
< theCount
; theIndex
++) {
2155 // we already added an end tag chunk above
2157 endContext
.AppendLiteral("</");
2160 nsString
& thisTag
= aTagStack
[theIndex
];
2161 // was there an xmlns=?
2162 PRInt32 endOfTag
= thisTag
.FindChar(PRUnichar(' '));
2163 if (endOfTag
== -1) {
2164 endContext
.Append(thisTag
);
2166 endContext
.Append(Substring(thisTag
,0,endOfTag
));
2169 endContext
.AppendLiteral(">");
2172 result
= Parse(endContext
, &theContext
, aMimeType
,
2177 mFlags
|= NS_PARSER_FLAG_OBSERVERS_ENABLED
;
2183 * This routine is called to cause the parser to continue parsing its
2184 * underlying stream. This call allows the parse process to happen in
2185 * chunks, such as when the content is push based, and we need to parse in
2188 * An interesting change in how the parser gets used has led us to add extra
2189 * processing to this method. The case occurs when the parser is blocked in
2190 * one context, and gets a parse(string) call in another context. In this
2191 * case, the parserContexts are linked. No problem.
2193 * The problem is that Parse(string) assumes that it can proceed unabated,
2194 * but if the parser is already blocked that assumption is false. So we
2195 * needed to add a mechanism here to allow the parser to continue to process
2196 * (the pop and free) contexts until 1) it get's blocked again; 2) it runs
2200 * @param allowItertion : set to true if non-script resumption is requested
2201 * @param aIsFinalChunk : tells us when the last chunk of data is provided.
2202 * @return error code -- 0 if ok, non-zero if error.
2205 nsParser::ResumeParse(PRBool allowIteration
, PRBool aIsFinalChunk
,
2206 PRBool aCanInterrupt
)
2208 nsresult result
= NS_OK
;
2210 if ((mFlags
& NS_PARSER_FLAG_PARSER_ENABLED
) &&
2211 mInternalState
!= NS_ERROR_HTMLPARSER_STOPPARSING
) {
2212 MOZ_TIMER_DEBUGLOG(("Start: Parse Time: nsParser::ResumeParse(), this=%p\n", this));
2213 MOZ_TIMER_START(mParseTime
);
2215 NS_ASSERTION(!mSpeculativeScriptThread
|| !mSpeculativeScriptThread
->Parsing(),
2216 "Bad races happening, expect to crash!");
2218 result
= WillBuildModel(mParserContext
->mScanner
->GetFilename());
2219 if (NS_FAILED(result
)) {
2220 mFlags
&= ~NS_PARSER_FLAG_CAN_TOKENIZE
;
2224 if (mParserContext
->mDTD
) {
2225 mParserContext
->mDTD
->WillResumeParse(mSink
);
2226 PRBool theIterationIsOk
= PR_TRUE
;
2228 while (result
== NS_OK
&& theIterationIsOk
) {
2229 if (!mUnusedInput
.IsEmpty() && mParserContext
->mScanner
) {
2230 // -- Ref: Bug# 22485 --
2231 // Insert the unused input into the source buffer
2232 // as if it was read from the input stream.
2233 // Adding UngetReadable() per vidur!!
2234 mParserContext
->mScanner
->UngetReadable(mUnusedInput
);
2235 mUnusedInput
.Truncate(0);
2238 // Only allow parsing to be interrupted in the subsequent call to
2240 SetCanInterrupt(aCanInterrupt
);
2241 nsresult theTokenizerResult
= (mFlags
& NS_PARSER_FLAG_CAN_TOKENIZE
)
2242 ? Tokenize(aIsFinalChunk
)
2244 result
= BuildModel();
2246 if (result
== NS_ERROR_HTMLPARSER_INTERRUPTED
&& aIsFinalChunk
) {
2247 PostContinueEvent();
2249 SetCanInterrupt(PR_FALSE
);
2251 theIterationIsOk
= theTokenizerResult
!= kEOF
&&
2252 result
!= NS_ERROR_HTMLPARSER_INTERRUPTED
;
2254 // Make sure not to stop parsing too early. Therefore, before shutting
2255 // down the parser, it's important to check whether the input buffer
2256 // has been scanned to completion (theTokenizerResult should be kEOF).
2257 // kEOF -> End of buffer.
2259 // If we're told to block the parser, we disable all further parsing
2260 // (and cache any data coming in) until the parser is re-enabled.
2261 if (NS_ERROR_HTMLPARSER_BLOCK
== result
) {
2262 if (mParserContext
->mDTD
) {
2263 mParserContext
->mDTD
->WillInterruptParse(mSink
);
2266 if (mFlags
& NS_PARSER_FLAG_PARSER_ENABLED
) {
2267 // If we were blocked by a recursive invocation, don't re-block.
2269 SpeculativelyParse();
2273 if (NS_ERROR_HTMLPARSER_STOPPARSING
== result
) {
2274 // Note: Parser Terminate() calls DidBuildModel.
2275 if (mInternalState
!= NS_ERROR_HTMLPARSER_STOPPARSING
) {
2276 DidBuildModel(mStreamStatus
);
2277 mInternalState
= result
;
2282 if ((NS_OK
== result
&& theTokenizerResult
== kEOF
) ||
2283 result
== NS_ERROR_HTMLPARSER_INTERRUPTED
) {
2284 PRBool theContextIsStringBased
=
2285 CParserContext::eCTString
== mParserContext
->mContextType
;
2287 if (mParserContext
->mStreamListenerState
== eOnStop
||
2288 !mParserContext
->mMultipart
|| theContextIsStringBased
) {
2289 if (!mParserContext
->mPrevContext
) {
2290 if (mParserContext
->mStreamListenerState
== eOnStop
) {
2291 DidBuildModel(mStreamStatus
);
2293 MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: nsParser::ResumeParse(), this=%p\n", this));
2294 MOZ_TIMER_STOP(mParseTime
);
2296 MOZ_TIMER_LOG(("Parse Time (this=%p): ", this));
2297 MOZ_TIMER_PRINT(mParseTime
);
2299 MOZ_TIMER_LOG(("DTD Time: "));
2300 MOZ_TIMER_PRINT(mDTDTime
);
2302 MOZ_TIMER_LOG(("Tokenize Time: "));
2303 MOZ_TIMER_PRINT(mTokenizeTime
);
2308 CParserContext
* theContext
= PopContext();
2310 theIterationIsOk
= allowIteration
&& theContextIsStringBased
;
2311 if (theContext
->mCopyUnused
) {
2312 theContext
->mScanner
->CopyUnusedData(mUnusedInput
);
2318 result
= mInternalState
;
2319 aIsFinalChunk
= mParserContext
&&
2320 mParserContext
->mStreamListenerState
== eOnStop
;
2321 // ...then intentionally fall through to WillInterruptParse()...
2326 if (theTokenizerResult
== kEOF
||
2327 result
== NS_ERROR_HTMLPARSER_INTERRUPTED
) {
2328 result
= (result
== NS_ERROR_HTMLPARSER_INTERRUPTED
) ? NS_OK
: result
;
2329 if (mParserContext
->mDTD
) {
2330 mParserContext
->mDTD
->WillInterruptParse(mSink
);
2335 mInternalState
= result
= NS_ERROR_HTMLPARSER_UNRESOLVEDDTD
;
2339 MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: nsParser::ResumeParse(), this=%p\n", this));
2340 MOZ_TIMER_STOP(mParseTime
);
2342 return (result
== NS_ERROR_HTMLPARSER_INTERRUPTED
) ? NS_OK
: result
;
2346 * This is where we loop over the tokens created in the
2347 * tokenization phase, and try to make sense out of them.
2350 nsParser::BuildModel()
2352 CParserContext
* theRootContext
= mParserContext
;
2353 nsITokenizer
* theTokenizer
= nsnull
;
2355 nsresult result
= NS_OK
;
2356 if (mParserContext
) {
2357 PRInt32 type
= mParserContext
->mDTD
? mParserContext
->mDTD
->GetType() :
2358 NS_IPARSER_FLAG_HTML
;
2359 result
= mParserContext
->GetTokenizer(type
, mSink
, theTokenizer
);
2362 if (NS_SUCCEEDED(result
)) {
2363 // Get the root DTD for use in model building...
2364 while (theRootContext
->mPrevContext
) {
2365 theRootContext
= theRootContext
->mPrevContext
;
2368 nsIDTD
* theRootDTD
= theRootContext
->mDTD
;
2370 MOZ_TIMER_START(mDTDTime
);
2371 result
= theRootDTD
->BuildModel(this, theTokenizer
, nsnull
, mSink
);
2372 MOZ_TIMER_STOP(mDTDTime
);
2375 mInternalState
= result
= NS_ERROR_HTMLPARSER_BADTOKENIZER
;
2380 /*******************************************************************
2381 These methods are used to talk to the netlib system...
2382 *******************************************************************/
2385 nsParser::OnStartRequest(nsIRequest
*request
, nsISupports
* aContext
)
2387 NS_PRECONDITION(eNone
== mParserContext
->mStreamListenerState
,
2388 "Parser's nsIStreamListener API was not setup "
2389 "correctly in constructor.");
2391 mObserver
->OnStartRequest(request
, aContext
);
2393 mParserContext
->mStreamListenerState
= eOnStart
;
2394 mParserContext
->mAutoDetectStatus
= eUnknownDetect
;
2395 mParserContext
->mDTD
= nsnull
;
2396 mParserContext
->mRequest
= request
;
2399 nsCAutoString contentType
;
2400 nsCOMPtr
<nsIChannel
> channel
= do_QueryInterface(request
);
2402 rv
= channel
->GetContentType(contentType
);
2403 if (NS_SUCCEEDED(rv
)) {
2404 mParserContext
->SetMimeType(contentType
);
2410 if (sParserDataListeners
&& mSink
) {
2411 nsISupports
*ctx
= mSink
->GetTarget();
2412 PRInt32 count
= sParserDataListeners
->Count();
2415 rv
|= sParserDataListeners
->ObjectAt(count
)->
2416 OnStartRequest(request
, ctx
);
2424 #define UTF16_BOM "UTF-16"
2425 #define UTF16_BE "UTF-16BE"
2426 #define UTF16_LE "UTF-16LE"
2427 #define UCS4_BOM "UTF-32"
2428 #define UCS4_BE "UTF-32BE"
2429 #define UCS4_LE "UTF-32LE"
2430 #define UCS4_2143 "X-ISO-10646-UCS-4-2143"
2431 #define UCS4_3412 "X-ISO-10646-UCS-4-3412"
2432 #define UTF8 "UTF-8"
2434 static inline PRBool
IsSecondMarker(unsigned char aChar
)
2448 DetectByteOrderMark(const unsigned char* aBytes
, PRInt32 aLen
,
2449 nsCString
& oCharset
, PRInt32
& oCharsetSource
)
2451 oCharsetSource
= kCharsetFromAutoDetection
;
2452 oCharset
.Truncate();
2453 // See http://www.w3.org/TR/2000/REC-xml-20001006#sec-guessing
2455 // Also, MS Win2K notepad now generate 3 bytes BOM in UTF8 as UTF8 signature
2456 // We need to check that
2457 // UCS2 BOM FEFF = UTF8 EF BB BF
2461 if(0x00==aBytes
[1]) {
2463 if((0xFE==aBytes
[2]) && (0xFF==aBytes
[3])) {
2464 // 00 00 FE FF UCS-4, big-endian machine (1234 order)
2465 oCharset
.Assign(UCS4_BOM
);
2466 } else if((0x00==aBytes
[2]) && (0x3C==aBytes
[3])) {
2467 // 00 00 00 3C UCS-4, big-endian machine (1234 order)
2468 oCharset
.Assign(UCS4_BE
);
2469 } else if((0xFF==aBytes
[2]) && (0xFE==aBytes
[3])) {
2470 // 00 00 FF FE UCS-4, unusual octet order (2143)
2471 oCharset
.Assign(UCS4_2143
);
2472 } else if((0x3C==aBytes
[2]) && (0x00==aBytes
[3])) {
2473 // 00 00 3C 00 UCS-4, unusual octet order (2143)
2474 oCharset
.Assign(UCS4_2143
);
2476 oCharsetSource
= kCharsetFromByteOrderMark
;
2477 } else if((0x3C==aBytes
[1]) && (0x00==aBytes
[2])) {
2479 if(IsSecondMarker(aBytes
[3])) {
2480 // 00 3C 00 SM UTF-16, big-endian, no Byte Order Mark
2481 oCharset
.Assign(UTF16_BE
);
2482 } else if((0x00==aBytes
[3])) {
2483 // 00 3C 00 00 UCS-4, unusual octet order (3412)
2484 oCharset
.Assign(UCS4_3412
);
2486 oCharsetSource
= kCharsetFromByteOrderMark
;
2490 if(0x00==aBytes
[1] && (0x00==aBytes
[3])) {
2492 if(IsSecondMarker(aBytes
[2])) {
2493 // 3C 00 SM 00 UTF-16, little-endian, no Byte Order Mark
2494 oCharset
.Assign(UTF16_LE
);
2495 } else if((0x00==aBytes
[2])) {
2496 // 3C 00 00 00 UCS-4, little-endian machine (4321 order)
2497 oCharset
.Assign(UCS4_LE
);
2499 oCharsetSource
= kCharsetFromByteOrderMark
;
2500 // For html, meta tag detector is invoked before this so that we have
2501 // to deal only with XML here.
2502 } else if( (0x3F==aBytes
[1]) &&
2503 (0x78==aBytes
[2]) && (0x6D==aBytes
[3]) &&
2504 (0 == PL_strncmp("<?xml", (char*)aBytes
, 5 ))) {
2506 // ASCII characters are in their normal positions, so we can safely
2507 // deal with the XML declaration in the old C way
2508 // The shortest string so far (strlen==5):
2511 PRBool versionFound
= PR_FALSE
, encodingFound
= PR_FALSE
;
2512 for (i
=6; i
< aLen
&& !encodingFound
; ++i
) {
2513 // end of XML declaration?
2514 if ((((char*)aBytes
)[i
] == '?') &&
2516 (((char*)aBytes
)[i
+1] == '>')) {
2519 // Version is required.
2520 if (!versionFound
) {
2521 // Want to avoid string comparisons, hence looking for 'n'
2522 // and only if found check the string leading to it. Not
2523 // foolproof, but fast.
2524 // The shortest string allowed before this is (strlen==13):
2526 if ((((char*)aBytes
)[i
] == 'n') &&
2528 (0 == PL_strncmp("versio", (char*)(aBytes
+i
-6), 6 ))) {
2529 // Fast forward through version
2531 for (++i
; i
< aLen
; ++i
) {
2532 char qi
= ((char*)aBytes
)[i
];
2533 if (qi
== '\'' || qi
== '"') {
2536 versionFound
= PR_TRUE
;
2546 // encoding must follow version
2547 // Want to avoid string comparisons, hence looking for 'g'
2548 // and only if found check the string leading to it. Not
2549 // foolproof, but fast.
2550 // The shortest allowed string before this (strlen==26):
2551 // <?xml version="1" encoding
2552 if ((((char*)aBytes
)[i
] == 'g') &&
2554 (0 == PL_strncmp("encodin", (char*)(aBytes
+i
-7), 7 ))) {
2555 PRInt32 encStart
= 0;
2557 for (++i
; i
< aLen
; ++i
) {
2558 char qi
= ((char*)aBytes
)[i
];
2559 if (qi
== '\'' || qi
== '"') {
2561 PRInt32 count
= i
- encStart
;
2562 // encoding value is invalid if it is UTF-16
2564 (0 != PL_strcmp("UTF-16", (char*)(aBytes
+encStart
)))) {
2565 oCharset
.Assign((char*)(aBytes
+encStart
),count
);
2566 oCharsetSource
= kCharsetFromMetaTag
;
2568 encodingFound
= PR_TRUE
;
2577 } // if (!versionFound)
2582 if((0xBB==aBytes
[1]) && (0xBF==aBytes
[2])) {
2585 oCharset
.Assign(UTF8
);
2586 oCharsetSource
= kCharsetFromByteOrderMark
;
2590 if(0xFF==aBytes
[1]) {
2591 if(0x00==aBytes
[2] && 0x00==aBytes
[3]) {
2592 // FE FF 00 00 UCS-4, unusual octet order (3412)
2593 oCharset
.Assign(UCS4_3412
);
2595 // FE FF UTF-16, big-endian
2596 oCharset
.Assign(UTF16_BOM
);
2598 oCharsetSource
= kCharsetFromByteOrderMark
;
2602 if(0xFE==aBytes
[1]) {
2603 if(0x00==aBytes
[2] && 0x00==aBytes
[3])
2604 // FF FE 00 00 UTF-32, little-endian
2605 oCharset
.Assign(UCS4_BOM
);
2608 // UTF-16, little-endian
2609 oCharset
.Assign(UTF16_BOM
);
2610 oCharsetSource
= kCharsetFromByteOrderMark
;
2613 // case 0x4C: if((0x6F==aBytes[1]) && ((0xA7==aBytes[2] && (0x94==aBytes[3])) {
2614 // We do not care EBCIDIC here....
2618 return !oCharset
.IsEmpty();
2622 GetNextChar(nsACString::const_iterator
& aStart
,
2623 nsACString::const_iterator
& aEnd
)
2625 NS_ASSERTION(aStart
!= aEnd
, "end of buffer");
2626 return (++aStart
!= aEnd
) ? *aStart
: '\0';
2630 nsParser::DetectMetaTag(const char* aBytes
,
2632 nsCString
& aCharset
,
2633 PRInt32
& aCharsetSource
)
2635 aCharsetSource
= kCharsetFromMetaTag
;
2636 aCharset
.SetLength(0);
2638 // XXX Only look inside HTML documents for now. For XML
2639 // documents we should be looking inside the XMLDecl.
2640 if (!mParserContext
->mMimeType
.EqualsLiteral(kHTMLTextContentType
)) {
2644 // Fast and loose parsing to determine if we have a complete
2645 // META tag in this block, looking upto 2k into it.
2646 const nsASingleFragmentCString
& str
=
2647 Substring(aBytes
, aBytes
+ PR_MIN(aLen
, 2048));
2648 // XXXldb Should be const_char_iterator when FindInReadable supports it.
2649 nsACString::const_iterator begin
, end
;
2651 str
.BeginReading(begin
);
2652 str
.EndReading(end
);
2653 nsACString::const_iterator
currPos(begin
);
2654 nsACString::const_iterator tokEnd
;
2655 nsACString::const_iterator
tagEnd(begin
);
2657 while (currPos
!= end
) {
2658 if (!FindCharInReadable('<', currPos
, end
))
2659 break; // no tag found in this buffer
2661 if (GetNextChar(currPos
, end
) == '!') {
2662 if (GetNextChar(currPos
, end
) != '-' ||
2663 GetNextChar(currPos
, end
) != '-') {
2664 // If we only see a <! not followed by --, just skip to the next >.
2665 if (!FindCharInReadable('>', currPos
, end
)) {
2666 return PR_FALSE
; // No more tags to follow.
2669 // Continue searching for a meta tag following this "comment".
2674 // Found MDO ( <!-- ). Now search for MDC ( --[*s]> )
2675 PRBool foundMDC
= PR_FALSE
;
2676 PRBool foundMatch
= PR_FALSE
;
2678 if (GetNextChar(currPos
, end
) == '-' &&
2679 GetNextChar(currPos
, end
) == '-') {
2680 foundMatch
= !foundMatch
; // toggle until we've matching "--"
2681 } else if (currPos
== end
) {
2682 return PR_FALSE
; // Couldn't find --[*s]> in this buffer
2683 } else if (foundMatch
&& *currPos
== '>') {
2684 foundMDC
= PR_TRUE
; // found comment end delimiter.
2688 continue; // continue searching for META tag.
2691 // Find the end of the tag, break if incomplete
2693 if (!FindCharInReadable('>', tagEnd
, end
))
2696 // If this is not a META tag, continue to next loop
2697 if ( (*currPos
!= 'm' && *currPos
!= 'M') ||
2698 (*(++currPos
) != 'e' && *currPos
!= 'E') ||
2699 (*(++currPos
) != 't' && *currPos
!= 'T') ||
2700 (*(++currPos
) != 'a' && *currPos
!= 'A') ||
2701 !nsCRT::IsAsciiSpace(*(++currPos
))) {
2706 // If could not find "charset" in this tag, skip this tag and try next
2708 if (!CaseInsensitiveFindInReadable(NS_LITERAL_CSTRING("CHARSET"),
2715 // skip spaces before '='
2716 while (*currPos
== kSpace
|| *currPos
== kNewLine
||
2717 *currPos
== kCR
|| *currPos
== kTab
) {
2721 if (*currPos
!= '=') {
2726 // skip spaces after '='
2727 while (*currPos
== kSpace
|| *currPos
== kNewLine
||
2728 *currPos
== kCR
|| *currPos
== kTab
) {
2733 if (*currPos
== '\'' || *currPos
== '\"')
2736 // find the end of charset string
2738 while (*tokEnd
!= '\'' && *tokEnd
!= '\"' && tokEnd
!= tagEnd
)
2741 // return true if we successfully got something for charset
2742 if (currPos
!= tokEnd
) {
2743 aCharset
.Assign(currPos
.get(), tokEnd
.get() - currPos
.get());
2747 // Nothing specified as charset, continue next loop
2755 PRBool mNeedCharsetCheck
;
2757 nsIParserFilter
* mParserFilter
;
2758 nsScanner
* mScanner
;
2759 nsIRequest
* mRequest
;
2760 } ParserWriteStruct
;
2763 * This function is invoked as a result of a call to a stream's
2764 * ReadSegments() method. It is called for each contiguous buffer
2765 * of data in the underlying stream or pipe. Using ReadSegments
2766 * allows us to avoid copying data to read out of the stream.
2769 ParserWriteFunc(nsIInputStream
* in
,
2771 const char* fromRawSegment
,
2774 PRUint32
*writeCount
)
2777 ParserWriteStruct
* pws
= static_cast<ParserWriteStruct
*>(closure
);
2778 const char* buf
= fromRawSegment
;
2779 PRUint32 theNumRead
= count
;
2782 return NS_ERROR_FAILURE
;
2785 if (pws
->mNeedCharsetCheck
) {
2786 PRInt32 guessSource
;
2787 nsCAutoString guess
;
2788 nsCAutoString preferred
;
2790 pws
->mNeedCharsetCheck
= PR_FALSE
;
2791 if (pws
->mParser
->DetectMetaTag(buf
, theNumRead
, guess
, guessSource
) ||
2793 DetectByteOrderMark((const unsigned char*)buf
,
2794 theNumRead
, guess
, guessSource
))) {
2795 nsCOMPtr
<nsICharsetAlias
> alias(do_GetService(NS_CHARSETALIAS_CONTRACTID
));
2796 result
= alias
->GetPreferred(guess
, preferred
);
2797 // Only continue if it's a recognized charset and not
2798 // one of a designated set that we ignore.
2799 if (NS_SUCCEEDED(result
) &&
2800 ((kCharsetFromByteOrderMark
== guessSource
) ||
2801 (!preferred
.EqualsLiteral("UTF-16") &&
2802 !preferred
.EqualsLiteral("UTF-16BE") &&
2803 !preferred
.EqualsLiteral("UTF-16LE") &&
2804 !preferred
.EqualsLiteral("UTF-32") &&
2805 !preferred
.EqualsLiteral("UTF-32BE") &&
2806 !preferred
.EqualsLiteral("UTF-32LE")))) {
2808 pws
->mParser
->SetDocumentCharset(guess
, guessSource
);
2809 pws
->mParser
->SetSinkCharset(preferred
);
2810 nsCOMPtr
<nsICachingChannel
> channel(do_QueryInterface(pws
->mRequest
));
2812 nsCOMPtr
<nsISupports
> cacheToken
;
2813 channel
->GetCacheToken(getter_AddRefs(cacheToken
));
2815 nsCOMPtr
<nsICacheEntryDescriptor
> cacheDescriptor(do_QueryInterface(cacheToken
));
2816 if (cacheDescriptor
) {
2820 cacheDescriptor
->SetMetaDataElement("charset",
2822 NS_ASSERTION(NS_SUCCEEDED(rv
),"cannot SetMetaDataElement");
2830 if (pws
->mParserFilter
)
2831 pws
->mParserFilter
->RawBuffer(buf
, &theNumRead
);
2833 result
= pws
->mScanner
->Append(buf
, theNumRead
, pws
->mRequest
);
2834 if (NS_SUCCEEDED(result
)) {
2835 *writeCount
= count
;
2842 nsParser::OnDataAvailable(nsIRequest
*request
, nsISupports
* aContext
,
2843 nsIInputStream
*pIStream
, PRUint32 sourceOffset
,
2846 NS_PRECONDITION((eOnStart
== mParserContext
->mStreamListenerState
||
2847 eOnDataAvail
== mParserContext
->mStreamListenerState
),
2848 "Error: OnStartRequest() must be called before OnDataAvailable()");
2849 NS_PRECONDITION(NS_InputStreamIsBuffered(pIStream
),
2850 "Must have a buffered input stream");
2852 nsresult rv
= NS_OK
;
2854 CParserContext
*theContext
= mParserContext
;
2856 while (theContext
&& theContext
->mRequest
!= request
) {
2857 theContext
= theContext
->mPrevContext
;
2861 theContext
->mStreamListenerState
= eOnDataAvail
;
2863 if ((mFlags
& NS_PARSER_FLAG_PARSER_ENABLED
) &&
2864 mSpeculativeScriptThread
) {
2865 mSpeculativeScriptThread
->StopParsing(PR_FALSE
);
2868 if (eInvalidDetect
== theContext
->mAutoDetectStatus
) {
2869 if (theContext
->mScanner
) {
2870 nsScannerIterator iter
;
2871 theContext
->mScanner
->EndReading(iter
);
2872 theContext
->mScanner
->SetPosition(iter
, PR_TRUE
);
2877 ParserWriteStruct pws
;
2878 pws
.mNeedCharsetCheck
=
2879 (0 == sourceOffset
) && (mCharsetSource
< kCharsetFromMetaTag
);
2881 pws
.mParserFilter
= mParserFilter
;
2882 pws
.mScanner
= theContext
->mScanner
;
2883 pws
.mRequest
= request
;
2885 rv
= pIStream
->ReadSegments(ParserWriteFunc
, &pws
, aLength
, &totalRead
);
2886 if (NS_FAILED(rv
)) {
2890 // Don't bother to start parsing until we've seen some
2891 // non-whitespace data
2892 if (mScriptsExecuting
== 0 &&
2893 theContext
->mScanner
->FirstNonWhitespacePosition() >= 0) {
2900 rv
= NS_ERROR_UNEXPECTED
;
2907 * This is called by the networking library once the last block of data
2908 * has been collected from the net.
2911 nsParser::OnStopRequest(nsIRequest
*request
, nsISupports
* aContext
,
2914 nsresult rv
= NS_OK
;
2916 if (mSpeculativeScriptThread
) {
2917 mSpeculativeScriptThread
->StopParsing(PR_FALSE
);
2920 CParserContext
*pc
= mParserContext
;
2922 if (pc
->mRequest
== request
) {
2923 pc
->mStreamListenerState
= eOnStop
;
2924 pc
->mScanner
->SetIncremental(PR_FALSE
);
2928 pc
= pc
->mPrevContext
;
2931 mStreamStatus
= status
;
2934 mParserFilter
->Finish();
2936 if (mScriptsExecuting
== 0 && NS_SUCCEEDED(rv
)) {
2940 rv
= ResumeParse(PR_TRUE
, PR_TRUE
);
2943 // If the parser isn't enabled, we don't finish parsing till
2947 // XXX Should we wait to notify our observers as well if the
2948 // parser isn't yet enabled?
2950 mObserver
->OnStopRequest(request
, aContext
, status
);
2953 if (sParserDataListeners
&& mSink
) {
2954 nsISupports
*ctx
= mSink
->GetTarget();
2955 PRInt32 count
= sParserDataListeners
->Count();
2958 rv
|= sParserDataListeners
->ObjectAt(count
)->OnStopRequest(request
, ctx
,
2967 /*******************************************************************
2968 Here come the tokenization methods...
2969 *******************************************************************/
2973 * Part of the code sandwich, this gets called right before
2974 * the tokenization process begins. The main reason for
2975 * this call is to allow the delegate to do initialization.
2978 nsParser::WillTokenize(PRBool aIsFinalChunk
)
2980 if (!mParserContext
) {
2984 nsITokenizer
* theTokenizer
;
2985 PRInt32 type
= mParserContext
->mDTD
? mParserContext
->mDTD
->GetType() :
2986 NS_IPARSER_FLAG_HTML
;
2987 nsresult result
= mParserContext
->GetTokenizer(type
, mSink
, theTokenizer
);
2988 NS_ENSURE_SUCCESS(result
, PR_FALSE
);
2989 return NS_SUCCEEDED(theTokenizer
->WillTokenize(aIsFinalChunk
,
2995 * This is the primary control routine to consume tokens.
2996 * It iteratively consumes tokens until an error occurs or
2997 * you run out of data.
2999 nsresult
nsParser::Tokenize(PRBool aIsFinalChunk
)
3001 nsITokenizer
* theTokenizer
;
3003 nsresult result
= NS_ERROR_NOT_AVAILABLE
;
3004 if (mParserContext
) {
3005 PRInt32 type
= mParserContext
->mDTD
? mParserContext
->mDTD
->GetType()
3006 : NS_IPARSER_FLAG_HTML
;
3007 result
= mParserContext
->GetTokenizer(type
, mSink
, theTokenizer
);
3010 if (NS_SUCCEEDED(result
)) {
3011 if (mFlags
& NS_PARSER_FLAG_FLUSH_TOKENS
) {
3012 // For some reason tokens didn't get flushed (probably
3013 // the parser got blocked before all the tokens in the
3014 // stack got handled). Flush 'em now. Ref. bug 104856
3015 if (theTokenizer
->GetCount() != 0) {
3019 // Reset since the tokens have been flushed.
3020 mFlags
&= ~NS_PARSER_FLAG_FLUSH_TOKENS
;
3023 PRBool flushTokens
= PR_FALSE
;
3025 MOZ_TIMER_START(mTokenizeTime
);
3027 mParserContext
->mNumConsumed
= 0;
3029 PRBool killSink
= PR_FALSE
;
3031 WillTokenize(aIsFinalChunk
);
3032 while (NS_SUCCEEDED(result
)) {
3033 mParserContext
->mNumConsumed
+= mParserContext
->mScanner
->Mark();
3034 result
= theTokenizer
->ConsumeToken(*mParserContext
->mScanner
,
3036 if (NS_FAILED(result
)) {
3037 mParserContext
->mScanner
->RewindToMark();
3038 if (kEOF
== result
){
3041 if (NS_ERROR_HTMLPARSER_STOPPARSING
== result
) {
3043 result
= Terminate();
3046 } else if (flushTokens
&& (mFlags
& NS_PARSER_FLAG_OBSERVERS_ENABLED
)) {
3047 // I added the extra test of NS_PARSER_FLAG_OBSERVERS_ENABLED to fix Bug# 23931.
3048 // Flush tokens on seeing </SCRIPT> -- Ref: Bug# 22485 --
3049 // Also remember to update the marked position.
3050 mFlags
|= NS_PARSER_FLAG_FLUSH_TOKENS
;
3051 mParserContext
->mNumConsumed
+= mParserContext
->mScanner
->Mark();
3055 DidTokenize(aIsFinalChunk
);
3057 MOZ_TIMER_STOP(mTokenizeTime
);
3063 result
= mInternalState
= NS_ERROR_HTMLPARSER_BADTOKENIZER
;
3070 * This is the tail-end of the code sandwich for the
3071 * tokenization process. It gets called once tokenziation
3072 * has completed for each phase.
3075 nsParser::DidTokenize(PRBool aIsFinalChunk
)
3077 if (!mParserContext
) {
3081 nsITokenizer
* theTokenizer
;
3082 PRInt32 type
= mParserContext
->mDTD
? mParserContext
->mDTD
->GetType() :
3083 NS_IPARSER_FLAG_HTML
;
3084 nsresult rv
= mParserContext
->GetTokenizer(type
, mSink
, theTokenizer
);
3085 NS_ENSURE_SUCCESS(rv
, PR_FALSE
);
3087 rv
= theTokenizer
->DidTokenize(aIsFinalChunk
);
3088 return NS_SUCCEEDED(rv
);
3092 * Get the channel associated with this parser
3094 * @param aChannel out param that will contain the result
3095 * @return NS_OK if successful
3098 nsParser::GetChannel(nsIChannel
** aChannel
)
3100 nsresult result
= NS_ERROR_NOT_AVAILABLE
;
3101 if (mParserContext
&& mParserContext
->mRequest
) {
3102 result
= CallQueryInterface(mParserContext
->mRequest
, aChannel
);
3108 * Get the DTD associated with this parser
3111 nsParser::GetDTD(nsIDTD
** aDTD
)
3113 if (mParserContext
) {
3114 NS_IF_ADDREF(*aDTD
= mParserContext
->mDTD
);