Backed out changeset b71c8c052463 (bug 1943846) for causing mass failures. CLOSED...
[gecko.git] / parser / htmlparser / nsParser.cpp
blobd3d579a5cf1ab3824bbf66066a57f6afc83995aa
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set sw=2 ts=2 et tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "nsAtom.h"
8 #include "nsParser.h"
9 #include "nsString.h"
10 #include "nsCRT.h"
11 #include "nsScanner.h"
12 #include "plstr.h"
13 #include "nsIChannel.h"
14 #include "nsIInputStream.h"
15 #include "CNavDTD.h"
16 #include "prenv.h"
17 #include "prlock.h"
18 #include "prcvar.h"
19 #include "nsReadableUtils.h"
20 #include "nsCOMPtr.h"
21 #include "nsExpatDriver.h"
22 #include "nsIFragmentContentSink.h"
23 #include "nsStreamUtils.h"
24 #include "nsXPCOMCIDInternal.h"
25 #include "nsMimeTypes.h"
26 #include "nsCharsetSource.h"
27 #include "nsThreadUtils.h"
28 #include "nsIHTMLContentSink.h"
30 #include "mozilla/BinarySearch.h"
31 #include "mozilla/CondVar.h"
32 #include "mozilla/dom/ScriptLoader.h"
33 #include "mozilla/Encoding.h"
34 #include "mozilla/Mutex.h"
36 using namespace mozilla;
38 #define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000001
39 #define NS_PARSER_FLAG_CAN_TOKENIZE 0x00000002
41 //-------------- Begin ParseContinue Event Definition ------------------------
43 The parser can be explicitly interrupted by passing a return value of
44 NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause
45 the parser to stop processing and allow the application to return to the event
46 loop. The data which was left at the time of interruption will be processed
47 the next time OnDataAvailable is called. If the parser has received its final
48 chunk of data then OnDataAvailable will no longer be called by the networking
49 module, so the parser will schedule a nsParserContinueEvent which will call
50 the parser to process the remaining data after returning to the event loop.
51 If the parser is interrupted while processing the remaining data it will
52 schedule another ParseContinueEvent. The processing of data followed by
53 scheduling of the continue events will proceed until either:
55 1) All of the remaining data can be processed without interrupting
56 2) The parser has been cancelled.
59 This capability is currently used in CNavDTD and nsHTMLContentSink. The
60 nsHTMLContentSink is notified by CNavDTD when a chunk of tokens is going to be
61 processed and when each token is processed. The nsHTML content sink records
62 the time when the chunk has started processing and will return
63 NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has exceeded a
64 threshold called max tokenizing processing time. This allows the content sink
65 to limit how much data is processed in a single chunk which in turn gates how
66 much time is spent away from the event loop. Processing smaller chunks of data
67 also reduces the time spent in subsequent reflows.
69 This capability is most apparent when loading large documents. If the maximum
70 token processing time is set small enough the application will remain
71 responsive during document load.
73 A side-effect of this capability is that document load is not complete when
74 the last chunk of data is passed to OnDataAvailable since the parser may have
75 been interrupted when the last chunk of data arrived. The document is complete
76 when all of the document has been tokenized and there aren't any pending
77 nsParserContinueEvents. This can cause problems if the application assumes
78 that it can monitor the load requests to determine when the document load has
79 been completed. This is what happens in Mozilla. The document is considered
80 completely loaded when all of the load requests have been satisfied. To delay
81 the document load until all of the parsing has been completed the
82 nsHTMLContentSink adds a dummy parser load request which is not removed until
83 the nsHTMLContentSink's DidBuildModel is called. The CNavDTD will not call
84 DidBuildModel until the final chunk of data has been passed to the parser
85 through the OnDataAvailable and there aren't any pending
86 nsParserContineEvents.
88 Currently the parser is ignores requests to be interrupted during the
89 processing of script. This is because a document.write followed by JavaScript
90 calls to manipulate the DOM may fail if the parser was interrupted during the
91 document.write.
93 For more details @see bugzilla bug 76722
96 class nsParserContinueEvent : public Runnable {
97 public:
98 RefPtr<nsParser> mParser;
100 explicit nsParserContinueEvent(nsParser* aParser)
101 : mozilla::Runnable("nsParserContinueEvent"), mParser(aParser) {}
103 NS_IMETHOD Run() override {
104 mParser->HandleParserContinueEvent(this);
105 return NS_OK;
109 //-------------- End ParseContinue Event Definition ------------------------
112 * default constructor
114 nsParser::nsParser() : mCharset(WINDOWS_1252_ENCODING) { Initialize(); }
116 nsParser::~nsParser() { Cleanup(); }
118 void nsParser::Initialize() {
119 mContinueEvent = nullptr;
120 mCharsetSource = kCharsetUninitialized;
121 mCharset = WINDOWS_1252_ENCODING;
122 mInternalState = NS_OK;
123 mStreamStatus = NS_OK;
124 mCommand = eViewNormal;
125 mBlocked = 0;
126 mFlags = NS_PARSER_FLAG_CAN_TOKENIZE;
128 mProcessingNetworkData = false;
129 mOnStopPending = false;
130 mIsAboutBlank = false;
133 void nsParser::Cleanup() {
134 // It should not be possible for this flag to be set when we are getting
135 // destroyed since this flag implies a pending nsParserContinueEvent, which
136 // has an owning reference to |this|.
137 NS_ASSERTION(!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT), "bad");
140 NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser)
142 NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser)
143 NS_IMPL_CYCLE_COLLECTION_UNLINK(mDTD)
144 NS_IMPL_CYCLE_COLLECTION_UNLINK(mSink)
145 NS_IMPL_CYCLE_COLLECTION_UNLINK_WEAK_REFERENCE
146 NS_IMPL_CYCLE_COLLECTION_UNLINK_END
148 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser)
149 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mDTD)
150 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSink)
151 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
153 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser)
154 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsParser)
155 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser)
156 NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
157 NS_INTERFACE_MAP_ENTRY(nsIParser)
158 NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
159 NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
160 NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIParser)
161 NS_INTERFACE_MAP_END
163 // The parser continue event is posted only if
164 // all of the data to parse has been passed to ::OnDataAvailable
165 // and the parser has been interrupted by the content sink
166 // because the processing of tokens took too long.
168 nsresult nsParser::PostContinueEvent() {
169 if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT)) {
170 // If this flag isn't set, then there shouldn't be a live continue event!
171 NS_ASSERTION(!mContinueEvent, "bad");
173 // This creates a reference cycle between this and the event that is
174 // broken when the event fires.
175 nsCOMPtr<nsIRunnable> event = new nsParserContinueEvent(this);
176 if (NS_FAILED(NS_DispatchToCurrentThread(event))) {
177 NS_WARNING("failed to dispatch parser continuation event");
178 } else {
179 mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
180 mContinueEvent = event;
183 return NS_OK;
186 NS_IMETHODIMP_(void)
187 nsParser::GetCommand(nsCString& aCommand) { aCommand = mCommandStr; }
190 * Call this method once you've created a parser, and want to instruct it
191 * about the command which caused the parser to be constructed. For example,
192 * this allows us to select a DTD which can do, say, view-source.
194 * @param aCommand the command string to set
196 NS_IMETHODIMP_(void)
197 nsParser::SetCommand(const char* aCommand) {
198 mCommandStr.Assign(aCommand);
199 if (mCommandStr.EqualsLiteral("view-source")) {
200 mCommand = eViewSource;
201 } else if (mCommandStr.EqualsLiteral("view-fragment")) {
202 mCommand = eViewFragment;
203 } else {
204 mCommand = eViewNormal;
209 * Call this method once you've created a parser, and want to instruct it
210 * about the command which caused the parser to be constructed. For example,
211 * this allows us to select a DTD which can do, say, view-source.
213 * @param aParserCommand the command to set
215 NS_IMETHODIMP_(void)
216 nsParser::SetCommand(eParserCommands aParserCommand) {
217 mCommand = aParserCommand;
221 * Call this method once you've created a parser, and want to instruct it
222 * about what charset to load
224 * @param aCharset- the charset of a document
225 * @param aCharsetSource- the source of the charset
227 void nsParser::SetDocumentCharset(NotNull<const Encoding*> aCharset,
228 int32_t aCharsetSource,
229 bool aForceAutoDetection) {
230 mCharset = aCharset;
231 mCharsetSource = aCharsetSource;
232 if (mParserContext) {
233 mParserContext->mScanner.SetDocumentCharset(aCharset, aCharsetSource);
237 void nsParser::SetSinkCharset(NotNull<const Encoding*> aCharset) {
238 if (mSink) {
239 mSink->SetDocumentCharset(aCharset);
244 * This method gets called in order to set the content
245 * sink for this parser to dump nodes to.
247 * @param nsIContentSink interface for node receiver
249 NS_IMETHODIMP_(void)
250 nsParser::SetContentSink(nsIContentSink* aSink) {
251 MOZ_ASSERT(aSink, "sink cannot be null!");
252 mSink = aSink;
254 if (mSink) {
255 mSink->SetParser(this);
256 nsCOMPtr<nsIHTMLContentSink> htmlSink = do_QueryInterface(mSink);
257 if (htmlSink) {
258 mIsAboutBlank = true;
264 * retrieve the sink set into the parser
265 * @return current sink
267 NS_IMETHODIMP_(nsIContentSink*)
268 nsParser::GetContentSink() { return mSink; }
270 ////////////////////////////////////////////////////////////////////////
273 * This gets called just prior to the model actually
274 * being constructed. It's important to make this the
275 * last thing that happens right before parsing, so we
276 * can delay until the last moment the resolution of
277 * which DTD to use (unless of course we're assigned one).
279 nsresult nsParser::WillBuildModel() {
280 if (!mParserContext) return NS_ERROR_HTMLPARSER_INVALIDPARSERCONTEXT;
282 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
283 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
284 // to avoid introducing unintentional changes to behavior.
285 return mInternalState;
288 if (eUnknownDetect != mParserContext->mAutoDetectStatus) return NS_OK;
290 if (eDTDMode_autodetect == mParserContext->mDTDMode) {
291 if (mIsAboutBlank) {
292 mParserContext->mDTDMode = eDTDMode_quirks;
293 mParserContext->mDocType = eHTML_Quirks;
294 } else {
295 mParserContext->mDTDMode = eDTDMode_full_standards;
296 mParserContext->mDocType = eXML;
298 } // else XML fragment with nested parser context
300 // We always find a DTD.
301 mParserContext->mAutoDetectStatus = ePrimaryDetect;
303 // Quick check for view source.
304 MOZ_ASSERT(mParserContext->mParserCommand != eViewSource,
305 "The old parser is not supposed to be used for View Source "
306 "anymore.");
308 // Now see if we're parsing XML or HTML (which, as far as we're concerned,
309 // simply means "not XML").
310 if (mParserContext->mDocType == eXML) {
311 RefPtr<nsExpatDriver> expat = new nsExpatDriver();
312 nsresult rv = expat->Initialize(mParserContext->mScanner.GetURI(), mSink);
313 NS_ENSURE_SUCCESS(rv, rv);
315 mDTD = expat.forget();
316 } else {
317 mDTD = new CNavDTD();
320 return mSink->WillBuildModel(mParserContext->mDTDMode);
324 * This gets called when the parser is done with its input.
326 void nsParser::DidBuildModel() {
327 if (IsComplete() && mParserContext) {
328 // Let sink know if we're about to end load because we've been terminated.
329 // In that case we don't want it to run deferred scripts.
330 bool terminated = mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING;
331 if (mDTD && mSink) {
332 mDTD->DidBuildModel();
333 mSink->DidBuildModel(terminated);
336 // Ref. to bug 61462.
337 mParserContext->mRequest = nullptr;
342 * Call this when you want to *force* the parser to terminate the
343 * parsing process altogether. This is binary -- so once you terminate
344 * you can't resume without restarting altogether.
346 NS_IMETHODIMP
347 nsParser::Terminate(void) {
348 // We should only call DidBuildModel once, so don't do anything if this is
349 // the second time that Terminate has been called.
350 if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
351 return NS_OK;
354 nsresult result = NS_OK;
355 // XXX - [ until we figure out a way to break parser-sink circularity ]
356 // Hack - Hold a reference until we are completely done...
357 nsCOMPtr<nsIParser> kungFuDeathGrip(this);
358 mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING;
360 // @see bug 108049
361 // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then reset it so
362 // DidBuildModel will call DidBuildModel on the DTD. Note: The IsComplete()
363 // call inside of DidBuildModel looks at the pendingContinueEvents flag.
364 if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) {
365 NS_ASSERTION(mContinueEvent, "mContinueEvent is null");
366 // Revoke the pending continue parsing event
367 mContinueEvent = nullptr;
368 mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
371 if (mDTD) {
372 mDTD->Terminate();
373 DidBuildModel();
374 } else if (mSink) {
375 // We have no parser context or no DTD yet (so we got terminated before we
376 // got any data). Manually break the reference cycle with the sink.
377 result = mSink->DidBuildModel(true);
378 NS_ENSURE_SUCCESS(result, result);
381 return NS_OK;
384 NS_IMETHODIMP
385 nsParser::ContinueInterruptedParsing() {
386 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
387 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
388 // to avoid introducing unintentional changes to behavior.
389 return mInternalState;
392 // If there are scripts executing, then the content sink is jumping the gun
393 // (probably due to a synchronous XMLHttpRequest) and will re-enable us
394 // later, see bug 460706.
395 if (!IsOkToProcessNetworkData()) {
396 return NS_OK;
399 // If the stream has already finished, there's a good chance
400 // that we might start closing things down when the parser
401 // is reenabled. To make sure that we're not deleted across
402 // the reenabling process, hold a reference to ourselves.
403 nsresult result = NS_OK;
404 nsCOMPtr<nsIParser> kungFuDeathGrip(this);
405 nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
407 #ifdef DEBUG
408 if (mBlocked) {
409 NS_WARNING("Don't call ContinueInterruptedParsing on a blocked parser.");
411 #endif
413 bool isFinalChunk =
414 mParserContext && mParserContext->mStreamListenerState == eOnStop;
416 mProcessingNetworkData = true;
417 if (sinkDeathGrip) {
418 sinkDeathGrip->WillParse();
420 result = ResumeParse(true, isFinalChunk); // Ref. bug 57999
421 mProcessingNetworkData = false;
423 if (result != NS_OK) {
424 result = mInternalState;
427 return result;
431 * Stops parsing temporarily. That is, it will prevent the
432 * parser from building up content model while scripts
433 * are being loaded (either an external script from a web
434 * page, or any number of extension content scripts).
436 NS_IMETHODIMP_(void)
437 nsParser::BlockParser() { mBlocked++; }
440 * Open up the parser for tokenization, building up content
441 * model..etc. However, this method does not resume parsing
442 * automatically. It's the callers' responsibility to restart
443 * the parsing engine.
445 NS_IMETHODIMP_(void)
446 nsParser::UnblockParser() {
447 MOZ_DIAGNOSTIC_ASSERT(mBlocked > 0);
448 if (MOZ_LIKELY(mBlocked > 0)) {
449 mBlocked--;
453 NS_IMETHODIMP_(void)
454 nsParser::ContinueInterruptedParsingAsync() {
455 MOZ_ASSERT(mSink);
456 if (MOZ_LIKELY(mSink)) {
457 mSink->ContinueInterruptedParsingAsync();
462 * Call this to query whether the parser is enabled or not.
464 NS_IMETHODIMP_(bool)
465 nsParser::IsParserEnabled() { return !mBlocked; }
468 * Call this to query whether the parser thinks it's done with parsing.
470 NS_IMETHODIMP_(bool)
471 nsParser::IsComplete() {
472 return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT);
475 void nsParser::HandleParserContinueEvent(nsParserContinueEvent* ev) {
476 // Ignore any revoked continue events...
477 if (mContinueEvent != ev) return;
479 mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
480 mContinueEvent = nullptr;
482 NS_ASSERTION(IsOkToProcessNetworkData(),
483 "Interrupted in the middle of a script?");
484 ContinueInterruptedParsing();
487 bool nsParser::IsInsertionPointDefined() { return false; }
489 void nsParser::IncrementScriptNestingLevel() {}
491 void nsParser::DecrementScriptNestingLevel() {}
493 bool nsParser::HasNonzeroScriptNestingLevel() const { return false; }
495 bool nsParser::IsScriptCreated() { return false; }
498 * This is the main controlling routine in the parsing process.
499 * Note that it may get called multiple times for the same scanner,
500 * since this is a pushed based system, and all the tokens may
501 * not have been consumed by the scanner during a given invocation
502 * of this method.
504 NS_IMETHODIMP
505 nsParser::Parse(nsIURI* aURL) {
506 MOZ_ASSERT(aURL, "Error: Null URL given");
508 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
509 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
510 // to avoid introducing unintentional changes to behavior.
511 return mInternalState;
514 if (!aURL) {
515 return NS_ERROR_HTMLPARSER_BADURL;
518 MOZ_ASSERT(!mParserContext, "We expect mParserContext to be null.");
520 mParserContext = MakeUnique<CParserContext>(aURL, mCommand);
522 return NS_OK;
526 * Used by XML fragment parsing below.
528 * @param aSourceBuffer contains a string-full of real content
530 nsresult nsParser::Parse(const nsAString& aSourceBuffer, bool aLastCall) {
531 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
532 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
533 // to avoid introducing unintentional changes to behavior.
534 return mInternalState;
537 // Don't bother if we're never going to parse this.
538 if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
539 return NS_OK;
542 if (!aLastCall && aSourceBuffer.IsEmpty()) {
543 // Nothing is being passed to the parser so return
544 // immediately. mUnusedInput will get processed when
545 // some data is actually passed in.
546 // But if this is the last call, make sure to finish up
547 // stuff correctly.
548 return NS_OK;
551 // Maintain a reference to ourselves so we don't go away
552 // till we're completely done.
553 nsCOMPtr<nsIParser> kungFuDeathGrip(this);
555 if (!mParserContext) {
556 // Only make a new context if we don't have one.
557 mParserContext =
558 MakeUnique<CParserContext>(mUnusedInput, mCommand, aLastCall);
560 mUnusedInput.Truncate();
561 } else if (aLastCall) {
562 // Set stream listener state to eOnStop, on the final context - Fix
563 // 68160, to guarantee DidBuildModel() call - Fix 36148
564 mParserContext->mStreamListenerState = eOnStop;
565 mParserContext->mScanner.SetIncremental(false);
568 mParserContext->mScanner.Append(aSourceBuffer);
569 return ResumeParse(false, false, false);
572 nsresult nsParser::ParseFragment(const nsAString& aSourceBuffer,
573 nsTArray<nsString>& aTagStack) {
574 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
575 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
576 // to avoid introducing unintentional changes to behavior.
577 return mInternalState;
580 nsresult result = NS_OK;
581 nsAutoString theContext;
582 uint32_t theCount = aTagStack.Length();
583 uint32_t theIndex = 0;
585 for (theIndex = 0; theIndex < theCount; theIndex++) {
586 theContext.Append('<');
587 theContext.Append(aTagStack[theCount - theIndex - 1]);
588 theContext.Append('>');
591 if (theCount == 0) {
592 // Ensure that the buffer is not empty. Because none of the DTDs care
593 // about leading whitespace, this doesn't change the result.
594 theContext.Assign(' ');
597 // First, parse the context to build up the DTD's tag stack. Note that we
598 // pass false for the aLastCall parameter.
599 result = Parse(theContext, false);
600 if (NS_FAILED(result)) {
601 return result;
604 if (!mSink) {
605 // Parse must have failed in the XML case and so the sink was killed.
606 return NS_ERROR_HTMLPARSER_STOPPARSING;
609 nsCOMPtr<nsIFragmentContentSink> fragSink = do_QueryInterface(mSink);
610 NS_ASSERTION(fragSink, "ParseFragment requires a fragment content sink");
612 fragSink->WillBuildContent();
613 // Now, parse the actual content. Note that this is the last call
614 // for HTML content, but for XML, we will want to build and parse
615 // the end tags. However, if tagStack is empty, it's the last call
616 // for XML as well.
617 if (theCount == 0) {
618 result = Parse(aSourceBuffer, true);
619 fragSink->DidBuildContent();
620 } else {
621 // Add an end tag chunk, so expat will read the whole source buffer,
622 // and not worry about ']]' etc.
623 result = Parse(aSourceBuffer + u"</"_ns, false);
624 fragSink->DidBuildContent();
626 if (NS_SUCCEEDED(result)) {
627 nsAutoString endContext;
628 for (theIndex = 0; theIndex < theCount; theIndex++) {
629 // we already added an end tag chunk above
630 if (theIndex > 0) {
631 endContext.AppendLiteral("</");
634 nsString& thisTag = aTagStack[theIndex];
635 // was there an xmlns=?
636 int32_t endOfTag = thisTag.FindChar(char16_t(' '));
637 if (endOfTag == -1) {
638 endContext.Append(thisTag);
639 } else {
640 endContext.Append(Substring(thisTag, 0, endOfTag));
643 endContext.Append('>');
646 result = Parse(endContext, true);
650 mParserContext.reset();
652 return result;
656 * This routine is called to cause the parser to continue parsing its
657 * underlying stream. This call allows the parse process to happen in
658 * chunks, such as when the content is push based, and we need to parse in
659 * pieces.
661 * An interesting change in how the parser gets used has led us to add extra
662 * processing to this method. The case occurs when the parser is blocked in
663 * one context, and gets a parse(string) call in another context. In this
664 * case, the parserContexts are linked. No problem.
666 * The problem is that Parse(string) assumes that it can proceed unabated,
667 * but if the parser is already blocked that assumption is false. So we
668 * needed to add a mechanism here to allow the parser to continue to process
669 * (the pop and free) contexts until 1) it get's blocked again; 2) it runs
670 * out of contexts.
673 * @param allowItertion : set to true if non-script resumption is requested
674 * @param aIsFinalChunk : tells us when the last chunk of data is provided.
675 * @return error code -- 0 if ok, non-zero if error.
677 nsresult nsParser::ResumeParse(bool allowIteration, bool aIsFinalChunk,
678 bool aCanInterrupt) {
679 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
680 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
681 // to avoid introducing unintentional changes to behavior.
682 return mInternalState;
685 nsresult result = NS_OK;
687 if (!mBlocked && mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
688 result = WillBuildModel();
689 if (NS_FAILED(result)) {
690 mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE;
691 return result;
694 if (mDTD) {
695 mSink->WillResume();
696 bool theIterationIsOk = true;
698 while (result == NS_OK && theIterationIsOk) {
699 if (!mUnusedInput.IsEmpty()) {
700 // -- Ref: Bug# 22485 --
701 // Insert the unused input into the source buffer
702 // as if it was read from the input stream.
703 // Adding UngetReadable() per vidur!!
704 mParserContext->mScanner.UngetReadable(mUnusedInput);
705 mUnusedInput.Truncate(0);
708 // Only allow parsing to be interrupted in the subsequent call to
709 // build model.
710 nsresult theTokenizerResult;
711 if (mFlags & NS_PARSER_FLAG_CAN_TOKENIZE) {
712 mParserContext->mScanner.Mark();
713 if (mParserContext->mDocType == eXML &&
714 mParserContext->mParserCommand != eViewSource) {
715 nsExpatDriver* expat = static_cast<nsExpatDriver*>(mDTD.get());
716 theTokenizerResult =
717 expat->ResumeParse(mParserContext->mScanner, aIsFinalChunk);
718 if (NS_FAILED(theTokenizerResult)) {
719 mParserContext->mScanner.RewindToMark();
720 if (NS_ERROR_HTMLPARSER_STOPPARSING == theTokenizerResult) {
721 theTokenizerResult = Terminate();
722 mSink = nullptr;
725 } else {
726 // Nothing to do for non-XML. Note that this should only be
727 // about:blank at this point, we're also checking for view-source
728 // above, but that shouldn't end up here anymore.
729 theTokenizerResult = NS_ERROR_HTMLPARSER_EOF;
731 } else {
732 theTokenizerResult = NS_OK;
735 result = mDTD->BuildModel(mSink);
736 if (result == NS_ERROR_HTMLPARSER_INTERRUPTED && aIsFinalChunk) {
737 PostContinueEvent();
740 theIterationIsOk = theTokenizerResult != NS_ERROR_HTMLPARSER_EOF &&
741 result != NS_ERROR_HTMLPARSER_INTERRUPTED;
743 // Make sure not to stop parsing too early. Therefore, before shutting
744 // down the parser, it's important to check whether the input buffer
745 // has been scanned to completion (theTokenizerResult should be kEOF).
746 // kEOF -> End of buffer.
748 // If we're told the parser has been blocked, we disable all further
749 // parsing (and cache any data coming in) until the parser is
750 // re-enabled.
751 if (NS_ERROR_HTMLPARSER_BLOCK == result) {
752 mSink->WillInterrupt();
753 return NS_OK;
755 if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
756 // Note: Parser Terminate() calls DidBuildModel.
757 if (mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
758 DidBuildModel();
759 mInternalState = result;
762 return NS_OK;
764 if (((NS_OK == result &&
765 theTokenizerResult == NS_ERROR_HTMLPARSER_EOF) ||
766 result == NS_ERROR_HTMLPARSER_INTERRUPTED) &&
767 mParserContext->mStreamListenerState == eOnStop) {
768 DidBuildModel();
769 return NS_OK;
772 if (theTokenizerResult == NS_ERROR_HTMLPARSER_EOF ||
773 result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
774 result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
775 mSink->WillInterrupt();
778 } else {
779 mInternalState = result = NS_ERROR_HTMLPARSER_UNRESOLVEDDTD;
783 return (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
786 /*******************************************************************
787 These methods are used to talk to the netlib system...
788 *******************************************************************/
790 nsresult nsParser::OnStartRequest(nsIRequest* request) {
791 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
792 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
793 // to avoid introducing unintentional changes to behavior.
794 return mInternalState;
797 MOZ_ASSERT(eNone == mParserContext->mStreamListenerState,
798 "Parser's nsIStreamListener API was not setup "
799 "correctly in constructor.");
801 mParserContext->mStreamListenerState = eOnStart;
802 mParserContext->mAutoDetectStatus = eUnknownDetect;
803 mParserContext->mRequest = request;
805 mDTD = nullptr;
807 nsresult rv;
808 nsAutoCString contentType;
809 nsCOMPtr<nsIChannel> channel = do_QueryInterface(request);
810 if (channel) {
811 rv = channel->GetContentType(contentType);
812 if (NS_SUCCEEDED(rv)) {
813 mParserContext->SetMimeType(contentType);
817 rv = NS_OK;
819 return rv;
822 static bool ExtractCharsetFromXmlDeclaration(const unsigned char* aBytes,
823 int32_t aLen,
824 nsCString& oCharset) {
825 // This code is rather pointless to have. Might as well reuse expat as
826 // seen in nsHtml5StreamParser. -- hsivonen
827 oCharset.Truncate();
828 if ((aLen >= 5) && ('<' == aBytes[0]) && ('?' == aBytes[1]) &&
829 ('x' == aBytes[2]) && ('m' == aBytes[3]) && ('l' == aBytes[4])) {
830 int32_t i;
831 bool versionFound = false, encodingFound = false;
832 for (i = 6; i < aLen && !encodingFound; ++i) {
833 // end of XML declaration?
834 if ((((char*)aBytes)[i] == '?') && ((i + 1) < aLen) &&
835 (((char*)aBytes)[i + 1] == '>')) {
836 break;
838 // Version is required.
839 if (!versionFound) {
840 // Want to avoid string comparisons, hence looking for 'n'
841 // and only if found check the string leading to it. Not
842 // foolproof, but fast.
843 // The shortest string allowed before this is (strlen==13):
844 // <?xml version
845 if ((((char*)aBytes)[i] == 'n') && (i >= 12) &&
846 (0 == strncmp("versio", (char*)(aBytes + i - 6), 6))) {
847 // Fast forward through version
848 char q = 0;
849 for (++i; i < aLen; ++i) {
850 char qi = ((char*)aBytes)[i];
851 if (qi == '\'' || qi == '"') {
852 if (q && q == qi) {
853 // ending quote
854 versionFound = true;
855 break;
856 } else {
857 // Starting quote
858 q = qi;
863 } else {
864 // encoding must follow version
865 // Want to avoid string comparisons, hence looking for 'g'
866 // and only if found check the string leading to it. Not
867 // foolproof, but fast.
868 // The shortest allowed string before this (strlen==26):
869 // <?xml version="1" encoding
870 if ((((char*)aBytes)[i] == 'g') && (i >= 25) &&
871 (0 == strncmp("encodin", (char*)(aBytes + i - 7), 7))) {
872 int32_t encStart = 0;
873 char q = 0;
874 for (++i; i < aLen; ++i) {
875 char qi = ((char*)aBytes)[i];
876 if (qi == '\'' || qi == '"') {
877 if (q && q == qi) {
878 int32_t count = i - encStart;
879 // encoding value is invalid if it is UTF-16
880 if (count > 0 &&
881 PL_strncasecmp("UTF-16", (char*)(aBytes + encStart),
882 count)) {
883 oCharset.Assign((char*)(aBytes + encStart), count);
885 encodingFound = true;
886 break;
887 } else {
888 encStart = i + 1;
889 q = qi;
894 } // if (!versionFound)
895 } // for
897 return !oCharset.IsEmpty();
900 inline char GetNextChar(nsACString::const_iterator& aStart,
901 nsACString::const_iterator& aEnd) {
902 NS_ASSERTION(aStart != aEnd, "end of buffer");
903 return (++aStart != aEnd) ? *aStart : '\0';
906 static nsresult NoOpParserWriteFunc(nsIInputStream* in, void* closure,
907 const char* fromRawSegment,
908 uint32_t toOffset, uint32_t count,
909 uint32_t* writeCount) {
910 *writeCount = count;
911 return NS_OK;
914 typedef struct {
915 bool mNeedCharsetCheck;
916 nsParser* mParser;
917 nsScanner* mScanner;
918 nsIRequest* mRequest;
919 } ParserWriteStruct;
922 * This function is invoked as a result of a call to a stream's
923 * ReadSegments() method. It is called for each contiguous buffer
924 * of data in the underlying stream or pipe. Using ReadSegments
925 * allows us to avoid copying data to read out of the stream.
927 static nsresult ParserWriteFunc(nsIInputStream* in, void* closure,
928 const char* fromRawSegment, uint32_t toOffset,
929 uint32_t count, uint32_t* writeCount) {
930 nsresult result;
931 ParserWriteStruct* pws = static_cast<ParserWriteStruct*>(closure);
932 const unsigned char* buf =
933 reinterpret_cast<const unsigned char*>(fromRawSegment);
934 uint32_t theNumRead = count;
936 if (!pws) {
937 return NS_ERROR_FAILURE;
940 if (pws->mNeedCharsetCheck) {
941 pws->mNeedCharsetCheck = false;
942 int32_t source;
943 auto preferred = pws->mParser->GetDocumentCharset(source);
945 // This code was bogus when I found it. It expects the BOM or the XML
946 // declaration to be entirely in the first network buffer. -- hsivonen
947 const Encoding* encoding;
948 std::tie(encoding, std::ignore) = Encoding::ForBOM(Span(buf, count));
949 if (encoding) {
950 // The decoder will swallow the BOM. The UTF-16 will re-sniff for
951 // endianness. The value of preferred is now "UTF-8", "UTF-16LE"
952 // or "UTF-16BE".
953 preferred = WrapNotNull(encoding);
954 source = kCharsetFromByteOrderMark;
955 } else if (source < kCharsetFromChannel) {
956 nsAutoCString declCharset;
958 if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) {
959 encoding = Encoding::ForLabel(declCharset);
960 if (encoding) {
961 preferred = WrapNotNull(encoding);
962 source = kCharsetFromMetaTag;
967 pws->mParser->SetDocumentCharset(preferred, source, false);
968 pws->mParser->SetSinkCharset(preferred);
971 result = pws->mScanner->Append(fromRawSegment, theNumRead);
972 if (NS_SUCCEEDED(result)) {
973 *writeCount = count;
976 return result;
979 nsresult nsParser::OnDataAvailable(nsIRequest* request,
980 nsIInputStream* pIStream,
981 uint64_t sourceOffset, uint32_t aLength) {
982 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
983 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
984 // to avoid introducing unintentional changes to behavior.
985 return mInternalState;
988 MOZ_ASSERT((eOnStart == mParserContext->mStreamListenerState ||
989 eOnDataAvail == mParserContext->mStreamListenerState),
990 "Error: OnStartRequest() must be called before OnDataAvailable()");
991 MOZ_ASSERT(NS_InputStreamIsBuffered(pIStream),
992 "Must have a buffered input stream");
994 nsresult rv = NS_OK;
996 if (mIsAboutBlank) {
997 MOZ_ASSERT(false, "Must not get OnDataAvailable for about:blank");
998 // ... but if an extension tries to feed us data for about:blank in a
999 // release build, silently ignore the data.
1000 uint32_t totalRead;
1001 rv = pIStream->ReadSegments(NoOpParserWriteFunc, nullptr, aLength,
1002 &totalRead);
1003 return rv;
1006 if (mParserContext->mRequest == request) {
1007 mParserContext->mStreamListenerState = eOnDataAvail;
1009 uint32_t totalRead;
1010 ParserWriteStruct pws;
1011 pws.mNeedCharsetCheck = true;
1012 pws.mParser = this;
1013 pws.mScanner = &mParserContext->mScanner;
1014 pws.mRequest = request;
1016 rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead);
1017 if (NS_FAILED(rv)) {
1018 return rv;
1021 if (IsOkToProcessNetworkData()) {
1022 nsCOMPtr<nsIParser> kungFuDeathGrip(this);
1023 nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
1024 mProcessingNetworkData = true;
1025 if (sinkDeathGrip) {
1026 sinkDeathGrip->WillParse();
1028 rv = ResumeParse();
1029 // Check if someone spun the event loop while we were parsing (XML
1030 // script...) If so, and OnStop was called during the spin, process it
1031 // now.
1032 if ((mParserContext->mRequest == request) && mOnStopPending) {
1033 mOnStopPending = false;
1034 mParserContext->mStreamListenerState = eOnStop;
1035 mParserContext->mScanner.SetIncremental(false);
1037 if (sinkDeathGrip) {
1038 sinkDeathGrip->WillParse();
1040 rv = ResumeParse(true, true);
1042 mProcessingNetworkData = false;
1044 } else {
1045 rv = NS_ERROR_UNEXPECTED;
1048 return rv;
1052 * This is called by the networking library once the last block of data
1053 * has been collected from the net.
1055 nsresult nsParser::OnStopRequest(nsIRequest* request, nsresult status) {
1056 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
1057 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
1058 // to avoid introducing unintentional changes to behavior.
1059 return mInternalState;
1062 nsresult rv = NS_OK;
1064 mStreamStatus = status;
1066 if (IsOkToProcessNetworkData()) {
1067 if (mParserContext->mRequest == request) {
1068 mParserContext->mStreamListenerState = eOnStop;
1069 mParserContext->mScanner.SetIncremental(false);
1072 mProcessingNetworkData = true;
1073 if (mSink) {
1074 mSink->WillParse();
1076 rv = ResumeParse(true, true);
1077 mProcessingNetworkData = false;
1078 } else {
1079 // We'll have to handle this later
1080 mOnStopPending = true;
1083 // If the parser isn't enabled, we don't finish parsing till
1084 // it is reenabled.
1086 return rv;
1090 * Get this as nsIStreamListener
1092 nsIStreamListener* nsParser::GetStreamListener() { return this; }