parser/htmlparser/nsParser.cpp

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set sw=2 ts=2 et tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 #include "nsAtom.h"
   8 #include "nsParser.h"
   9 #include "nsString.h"
  10 #include "nsCRT.h"
  11 #include "nsScanner.h"
  12 #include "plstr.h"
  13 #include "nsIChannel.h"
  14 #include "nsIInputStream.h"
  15 #include "CNavDTD.h"
  16 #include "prenv.h"
  17 #include "prlock.h"
  18 #include "prcvar.h"
  19 #include "nsReadableUtils.h"
  20 #include "nsCOMPtr.h"
  21 #include "nsExpatDriver.h"
  22 #include "nsIFragmentContentSink.h"
  23 #include "nsStreamUtils.h"
  24 #include "nsXPCOMCIDInternal.h"
  25 #include "nsMimeTypes.h"
  26 #include "nsCharsetSource.h"
  27 #include "nsThreadUtils.h"
  28 #include "nsIHTMLContentSink.h"
  29
  30 #include "mozilla/BinarySearch.h"
  31 #include "mozilla/CondVar.h"
  32 #include "mozilla/dom/ScriptLoader.h"
  33 #include "mozilla/Encoding.h"
  34 #include "mozilla/Mutex.h"
  35
  36 using namespace mozilla;
  37
  38 #define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000001
  39 #define NS_PARSER_FLAG_CAN_TOKENIZE 0x00000002
  40
  41 //-------------- Begin ParseContinue Event Definition ------------------------
  42 /*
  43 The parser can be explicitly interrupted by passing a return value of
  44 NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause
  45 the parser to stop processing and allow the application to return to the event
  46 loop. The data which was left at the time of interruption will be processed
  47 the next time OnDataAvailable is called. If the parser has received its final
  48 chunk of data then OnDataAvailable will no longer be called by the networking
  49 module, so the parser will schedule a nsParserContinueEvent which will call
  50 the parser to process the remaining data after returning to the event loop.
  51 If the parser is interrupted while processing the remaining data it will
  52 schedule another ParseContinueEvent. The processing of data followed by
  53 scheduling of the continue events will proceed until either:
  54
  55   1) All of the remaining data can be processed without interrupting
  56   2) The parser has been cancelled.
  57
  58
  59 This capability is currently used in CNavDTD and nsHTMLContentSink. The
  60 nsHTMLContentSink is notified by CNavDTD when a chunk of tokens is going to be
  61 processed and when each token is processed. The nsHTML content sink records
  62 the time when the chunk has started processing and will return
  63 NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has exceeded a
  64 threshold called max tokenizing processing time. This allows the content sink
  65 to limit how much data is processed in a single chunk which in turn gates how
  66 much time is spent away from the event loop. Processing smaller chunks of data
  67 also reduces the time spent in subsequent reflows.
  68
  69 This capability is most apparent when loading large documents. If the maximum
  70 token processing time is set small enough the application will remain
  71 responsive during document load.
  72
  73 A side-effect of this capability is that document load is not complete when
  74 the last chunk of data is passed to OnDataAvailable since  the parser may have
  75 been interrupted when the last chunk of data arrived. The document is complete
  76 when all of the document has been tokenized and there aren't any pending
  77 nsParserContinueEvents. This can cause problems if the application assumes
  78 that it can monitor the load requests to determine when the document load has
  79 been completed. This is what happens in Mozilla. The document is considered
  80 completely loaded when all of the load requests have been satisfied. To delay
  81 the document load until all of the parsing has been completed the
  82 nsHTMLContentSink adds a dummy parser load request which is not removed until
  83 the nsHTMLContentSink's DidBuildModel is called. The CNavDTD will not call
  84 DidBuildModel until the final chunk of data has been passed to the parser
  85 through the OnDataAvailable and there aren't any pending
  86 nsParserContineEvents.
  87
  88 Currently the parser is ignores requests to be interrupted during the
  89 processing of script.  This is because a document.write followed by JavaScript
  90 calls to manipulate the DOM may fail if the parser was interrupted during the
  91 document.write.
  92
  93 For more details @see bugzilla bug 76722
  94 */
  95
  96 class nsParserContinueEvent : public Runnable {
  97  public:
  98   RefPtr<nsParser> mParser;
  99
 100   explicit nsParserContinueEvent(nsParser* aParser)
 101       : mozilla::Runnable("nsParserContinueEvent"), mParser(aParser) {}
 102
 103   NS_IMETHOD Run() override {
 104     mParser->HandleParserContinueEvent(this);
 105     return NS_OK;
 106   }
 107 };
 108
 109 //-------------- End ParseContinue Event Definition ------------------------
 110
 111 /**
 112  *  default constructor
 113  */
 114 nsParser::nsParser() : mCharset(WINDOWS_1252_ENCODING) { Initialize(); }
 115
 116 nsParser::~nsParser() { Cleanup(); }
 117
 118 void nsParser::Initialize() {
 119   mContinueEvent = nullptr;
 120   mCharsetSource = kCharsetUninitialized;
 121   mCharset = WINDOWS_1252_ENCODING;
 122   mInternalState = NS_OK;
 123   mStreamStatus = NS_OK;
 124   mCommand = eViewNormal;
 125   mBlocked = 0;
 126   mFlags = NS_PARSER_FLAG_CAN_TOKENIZE;
 127
 128   mProcessingNetworkData = false;
 129   mOnStopPending = false;
 130   mIsAboutBlank = false;
 131 }
 132
 133 void nsParser::Cleanup() {
 134   // It should not be possible for this flag to be set when we are getting
 135   // destroyed since this flag implies a pending nsParserContinueEvent, which
 136   // has an owning reference to |this|.
 137   NS_ASSERTION(!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT), "bad");
 138 }
 139
 140 NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser)
 141
 142 NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser)
 143   NS_IMPL_CYCLE_COLLECTION_UNLINK(mDTD)
 144   NS_IMPL_CYCLE_COLLECTION_UNLINK(mSink)
 145   NS_IMPL_CYCLE_COLLECTION_UNLINK_WEAK_REFERENCE
 146 NS_IMPL_CYCLE_COLLECTION_UNLINK_END
 147
 148 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser)
 149   NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mDTD)
 150   NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSink)
 151 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
 152
 153 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser)
 154 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsParser)
 155 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser)
 156   NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
 157   NS_INTERFACE_MAP_ENTRY(nsIParser)
 158   NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
 159   NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
 160   NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIParser)
 161 NS_INTERFACE_MAP_END
 162
 163 // The parser continue event is posted only if
 164 // all of the data to parse has been passed to ::OnDataAvailable
 165 // and the parser has been interrupted by the content sink
 166 // because the processing of tokens took too long.
 167
 168 nsresult nsParser::PostContinueEvent() {
 169   if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT)) {
 170     // If this flag isn't set, then there shouldn't be a live continue event!
 171     NS_ASSERTION(!mContinueEvent, "bad");
 172
 173     // This creates a reference cycle between this and the event that is
 174     // broken when the event fires.
 175     nsCOMPtr<nsIRunnable> event = new nsParserContinueEvent(this);
 176     if (NS_FAILED(NS_DispatchToCurrentThread(event))) {
 177       NS_WARNING("failed to dispatch parser continuation event");
 178     } else {
 179       mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
 180       mContinueEvent = event;
 181     }
 182   }
 183   return NS_OK;
 184 }
 185
 186 NS_IMETHODIMP_(void)
 187 nsParser::GetCommand(nsCString& aCommand) { aCommand = mCommandStr; }
 188
 189 /**
 190  *  Call this method once you've created a parser, and want to instruct it
 191  *  about the command which caused the parser to be constructed. For example,
 192  *  this allows us to select a DTD which can do, say, view-source.
 193  *
 194  *  @param   aCommand the command string to set
 195  */
 196 NS_IMETHODIMP_(void)
 197 nsParser::SetCommand(const char* aCommand) {
 198   mCommandStr.Assign(aCommand);
 199   if (mCommandStr.EqualsLiteral("view-source")) {
 200     mCommand = eViewSource;
 201   } else if (mCommandStr.EqualsLiteral("view-fragment")) {
 202     mCommand = eViewFragment;
 203   } else {
 204     mCommand = eViewNormal;
 205   }
 206 }
 207
 208 /**
 209  *  Call this method once you've created a parser, and want to instruct it
 210  *  about the command which caused the parser to be constructed. For example,
 211  *  this allows us to select a DTD which can do, say, view-source.
 212  *
 213  *  @param   aParserCommand the command to set
 214  */
 215 NS_IMETHODIMP_(void)
 216 nsParser::SetCommand(eParserCommands aParserCommand) {
 217   mCommand = aParserCommand;
 218 }
 219
 220 /**
 221  *  Call this method once you've created a parser, and want to instruct it
 222  *  about what charset to load
 223  *
 224  *  @param   aCharset- the charset of a document
 225  *  @param   aCharsetSource- the source of the charset
 226  */
 227 void nsParser::SetDocumentCharset(NotNull<const Encoding*> aCharset,
 228                                   int32_t aCharsetSource,
 229                                   bool aForceAutoDetection) {
 230   mCharset = aCharset;
 231   mCharsetSource = aCharsetSource;
 232   if (mParserContext) {
 233     mParserContext->mScanner.SetDocumentCharset(aCharset, aCharsetSource);
 234   }
 235 }
 236
 237 void nsParser::SetSinkCharset(NotNull<const Encoding*> aCharset) {
 238   if (mSink) {
 239     mSink->SetDocumentCharset(aCharset);
 240   }
 241 }
 242
 243 /**
 244  *  This method gets called in order to set the content
 245  *  sink for this parser to dump nodes to.
 246  *
 247  *  @param   nsIContentSink interface for node receiver
 248  */
 249 NS_IMETHODIMP_(void)
 250 nsParser::SetContentSink(nsIContentSink* aSink) {
 251   MOZ_ASSERT(aSink, "sink cannot be null!");
 252   mSink = aSink;
 253
 254   if (mSink) {
 255     mSink->SetParser(this);
 256     nsCOMPtr<nsIHTMLContentSink> htmlSink = do_QueryInterface(mSink);
 257     if (htmlSink) {
 258       mIsAboutBlank = true;
 259     }
 260   }
 261 }
 262
 263 /**
 264  * retrieve the sink set into the parser
 265  * @return  current sink
 266  */
 267 NS_IMETHODIMP_(nsIContentSink*)
 268 nsParser::GetContentSink() { return mSink; }
 269
 270 ////////////////////////////////////////////////////////////////////////
 271
 272 /**
 273  * This gets called just prior to the model actually
 274  * being constructed. It's important to make this the
 275  * last thing that happens right before parsing, so we
 276  * can delay until the last moment the resolution of
 277  * which DTD to use (unless of course we're assigned one).
 278  */
 279 nsresult nsParser::WillBuildModel() {
 280   if (!mParserContext) return NS_ERROR_HTMLPARSER_INVALIDPARSERCONTEXT;
 281
 282   if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
 283     // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
 284     // to avoid introducing unintentional changes to behavior.
 285     return mInternalState;
 286   }
 287
 288   if (eUnknownDetect != mParserContext->mAutoDetectStatus) return NS_OK;
 289
 290   if (eDTDMode_autodetect == mParserContext->mDTDMode) {
 291     if (mIsAboutBlank) {
 292       mParserContext->mDTDMode = eDTDMode_quirks;
 293       mParserContext->mDocType = eHTML_Quirks;
 294     } else {
 295       mParserContext->mDTDMode = eDTDMode_full_standards;
 296       mParserContext->mDocType = eXML;
 297     }
 298   }  // else XML fragment with nested parser context
 299
 300   // We always find a DTD.
 301   mParserContext->mAutoDetectStatus = ePrimaryDetect;
 302
 303   // Quick check for view source.
 304   MOZ_ASSERT(mParserContext->mParserCommand != eViewSource,
 305              "The old parser is not supposed to be used for View Source "
 306              "anymore.");
 307
 308   // Now see if we're parsing XML or HTML (which, as far as we're concerned,
 309   // simply means "not XML").
 310   if (mParserContext->mDocType == eXML) {
 311     RefPtr<nsExpatDriver> expat = new nsExpatDriver();
 312     nsresult rv = expat->Initialize(mParserContext->mScanner.GetURI(), mSink);
 313     NS_ENSURE_SUCCESS(rv, rv);
 314
 315     mDTD = expat.forget();
 316   } else {
 317     mDTD = new CNavDTD();
 318   }
 319
 320   return mSink->WillBuildModel(mParserContext->mDTDMode);
 321 }
 322
 323 /**
 324  * This gets called when the parser is done with its input.
 325  */
 326 void nsParser::DidBuildModel() {
 327   if (IsComplete() && mParserContext) {
 328     // Let sink know if we're about to end load because we've been terminated.
 329     // In that case we don't want it to run deferred scripts.
 330     bool terminated = mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING;
 331     if (mDTD && mSink) {
 332       mDTD->DidBuildModel();
 333       mSink->DidBuildModel(terminated);
 334     }
 335
 336     // Ref. to bug 61462.
 337     mParserContext->mRequest = nullptr;
 338   }
 339 }
 340
 341 /**
 342  *  Call this when you want to *force* the parser to terminate the
 343  *  parsing process altogether. This is binary -- so once you terminate
 344  *  you can't resume without restarting altogether.
 345  */
 346 NS_IMETHODIMP
 347 nsParser::Terminate(void) {
 348   // We should only call DidBuildModel once, so don't do anything if this is
 349   // the second time that Terminate has been called.
 350   if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
 351     return NS_OK;
 352   }
 353
 354   nsresult result = NS_OK;
 355   // XXX - [ until we figure out a way to break parser-sink circularity ]
 356   // Hack - Hold a reference until we are completely done...
 357   nsCOMPtr<nsIParser> kungFuDeathGrip(this);
 358   mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING;
 359
 360   // @see bug 108049
 361   // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then reset it so
 362   // DidBuildModel will call DidBuildModel on the DTD. Note: The IsComplete()
 363   // call inside of DidBuildModel looks at the pendingContinueEvents flag.
 364   if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) {
 365     NS_ASSERTION(mContinueEvent, "mContinueEvent is null");
 366     // Revoke the pending continue parsing event
 367     mContinueEvent = nullptr;
 368     mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
 369   }
 370
 371   if (mDTD) {
 372     mDTD->Terminate();
 373     DidBuildModel();
 374   } else if (mSink) {
 375     // We have no parser context or no DTD yet (so we got terminated before we
 376     // got any data).  Manually break the reference cycle with the sink.
 377     result = mSink->DidBuildModel(true);
 378     NS_ENSURE_SUCCESS(result, result);
 379   }
 380
 381   return NS_OK;
 382 }
 383
 384 NS_IMETHODIMP
 385 nsParser::ContinueInterruptedParsing() {
 386   if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
 387     // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
 388     // to avoid introducing unintentional changes to behavior.
 389     return mInternalState;
 390   }
 391
 392   // If there are scripts executing, then the content sink is jumping the gun
 393   // (probably due to a synchronous XMLHttpRequest) and will re-enable us
 394   // later, see bug 460706.
 395   if (!IsOkToProcessNetworkData()) {
 396     return NS_OK;
 397   }
 398
 399   // If the stream has already finished, there's a good chance
 400   // that we might start closing things down when the parser
 401   // is reenabled. To make sure that we're not deleted across
 402   // the reenabling process, hold a reference to ourselves.
 403   nsresult result = NS_OK;
 404   nsCOMPtr<nsIParser> kungFuDeathGrip(this);
 405   nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
 406
 407 #ifdef DEBUG
 408   if (mBlocked) {
 409     NS_WARNING("Don't call ContinueInterruptedParsing on a blocked parser.");
 410   }
 411 #endif
 412
 413   bool isFinalChunk =
 414       mParserContext && mParserContext->mStreamListenerState == eOnStop;
 415
 416   mProcessingNetworkData = true;
 417   if (sinkDeathGrip) {
 418     sinkDeathGrip->WillParse();
 419   }
 420   result = ResumeParse(true, isFinalChunk);  // Ref. bug 57999
 421   mProcessingNetworkData = false;
 422
 423   if (result != NS_OK) {
 424     result = mInternalState;
 425   }
 426
 427   return result;
 428 }
 429
 430 /**
 431  *  Stops parsing temporarily. That is, it will prevent the
 432  *  parser from building up content model while scripts
 433  *  are being loaded (either an external script from a web
 434  *  page, or any number of extension content scripts).
 435  */
 436 NS_IMETHODIMP_(void)
 437 nsParser::BlockParser() { mBlocked++; }
 438
 439 /**
 440  *  Open up the parser for tokenization, building up content
 441  *  model..etc. However, this method does not resume parsing
 442  *  automatically. It's the callers' responsibility to restart
 443  *  the parsing engine.
 444  */
 445 NS_IMETHODIMP_(void)
 446 nsParser::UnblockParser() {
 447   MOZ_DIAGNOSTIC_ASSERT(mBlocked > 0);
 448   if (MOZ_LIKELY(mBlocked > 0)) {
 449     mBlocked--;
 450   }
 451 }
 452
 453 NS_IMETHODIMP_(void)
 454 nsParser::ContinueInterruptedParsingAsync() {
 455   MOZ_ASSERT(mSink);
 456   if (MOZ_LIKELY(mSink)) {
 457     mSink->ContinueInterruptedParsingAsync();
 458   }
 459 }
 460
 461 /**
 462  * Call this to query whether the parser is enabled or not.
 463  */
 464 NS_IMETHODIMP_(bool)
 465 nsParser::IsParserEnabled() { return !mBlocked; }
 466
 467 /**
 468  * Call this to query whether the parser thinks it's done with parsing.
 469  */
 470 NS_IMETHODIMP_(bool)
 471 nsParser::IsComplete() {
 472   return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT);
 473 }
 474
 475 void nsParser::HandleParserContinueEvent(nsParserContinueEvent* ev) {
 476   // Ignore any revoked continue events...
 477   if (mContinueEvent != ev) return;
 478
 479   mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
 480   mContinueEvent = nullptr;
 481
 482   NS_ASSERTION(IsOkToProcessNetworkData(),
 483                "Interrupted in the middle of a script?");
 484   ContinueInterruptedParsing();
 485 }
 486
 487 bool nsParser::IsInsertionPointDefined() { return false; }
 488
 489 void nsParser::IncrementScriptNestingLevel() {}
 490
 491 void nsParser::DecrementScriptNestingLevel() {}
 492
 493 bool nsParser::HasNonzeroScriptNestingLevel() const { return false; }
 494
 495 bool nsParser::IsScriptCreated() { return false; }
 496
 497 /**
 498  *  This is the main controlling routine in the parsing process.
 499  *  Note that it may get called multiple times for the same scanner,
 500  *  since this is a pushed based system, and all the tokens may
 501  *  not have been consumed by the scanner during a given invocation
 502  *  of this method.
 503  */
 504 NS_IMETHODIMP
 505 nsParser::Parse(nsIURI* aURL) {
 506   MOZ_ASSERT(aURL, "Error: Null URL given");
 507
 508   if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
 509     // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
 510     // to avoid introducing unintentional changes to behavior.
 511     return mInternalState;
 512   }
 513
 514   if (!aURL) {
 515     return NS_ERROR_HTMLPARSER_BADURL;
 516   }
 517
 518   MOZ_ASSERT(!mParserContext, "We expect mParserContext to be null.");
 519
 520   mParserContext = MakeUnique<CParserContext>(aURL, mCommand);
 521
 522   return NS_OK;
 523 }
 524
 525 /**
 526  * Used by XML fragment parsing below.
 527  *
 528  * @param   aSourceBuffer contains a string-full of real content
 529  */
 530 nsresult nsParser::Parse(const nsAString& aSourceBuffer, bool aLastCall) {
 531   if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
 532     // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
 533     // to avoid introducing unintentional changes to behavior.
 534     return mInternalState;
 535   }
 536
 537   // Don't bother if we're never going to parse this.
 538   if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
 539     return NS_OK;
 540   }
 541
 542   if (!aLastCall && aSourceBuffer.IsEmpty()) {
 543     // Nothing is being passed to the parser so return
 544     // immediately. mUnusedInput will get processed when
 545     // some data is actually passed in.
 546     // But if this is the last call, make sure to finish up
 547     // stuff correctly.
 548     return NS_OK;
 549   }
 550
 551   // Maintain a reference to ourselves so we don't go away
 552   // till we're completely done.
 553   nsCOMPtr<nsIParser> kungFuDeathGrip(this);
 554
 555   if (!mParserContext) {
 556     // Only make a new context if we don't have one.
 557     mParserContext =
 558         MakeUnique<CParserContext>(mUnusedInput, mCommand, aLastCall);
 559
 560     mUnusedInput.Truncate();
 561   } else if (aLastCall) {
 562     // Set stream listener state to eOnStop, on the final context - Fix
 563     // 68160, to guarantee DidBuildModel() call - Fix 36148
 564     mParserContext->mStreamListenerState = eOnStop;
 565     mParserContext->mScanner.SetIncremental(false);
 566   }
 567
 568   mParserContext->mScanner.Append(aSourceBuffer);
 569   return ResumeParse(false, false, false);
 570 }
 571
 572 nsresult nsParser::ParseFragment(const nsAString& aSourceBuffer,
 573                                  nsTArray<nsString>& aTagStack) {
 574   if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
 575     // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
 576     // to avoid introducing unintentional changes to behavior.
 577     return mInternalState;
 578   }
 579
 580   nsresult result = NS_OK;
 581   nsAutoString theContext;
 582   uint32_t theCount = aTagStack.Length();
 583   uint32_t theIndex = 0;
 584
 585   for (theIndex = 0; theIndex < theCount; theIndex++) {
 586     theContext.Append('<');
 587     theContext.Append(aTagStack[theCount - theIndex - 1]);
 588     theContext.Append('>');
 589   }
 590
 591   if (theCount == 0) {
 592     // Ensure that the buffer is not empty. Because none of the DTDs care
 593     // about leading whitespace, this doesn't change the result.
 594     theContext.Assign(' ');
 595   }
 596
 597   // First, parse the context to build up the DTD's tag stack. Note that we
 598   // pass false for the aLastCall parameter.
 599   result = Parse(theContext, false);
 600   if (NS_FAILED(result)) {
 601     return result;
 602   }
 603
 604   if (!mSink) {
 605     // Parse must have failed in the XML case and so the sink was killed.
 606     return NS_ERROR_HTMLPARSER_STOPPARSING;
 607   }
 608
 609   nsCOMPtr<nsIFragmentContentSink> fragSink = do_QueryInterface(mSink);
 610   NS_ASSERTION(fragSink, "ParseFragment requires a fragment content sink");
 611
 612   fragSink->WillBuildContent();
 613   // Now, parse the actual content. Note that this is the last call
 614   // for HTML content, but for XML, we will want to build and parse
 615   // the end tags.  However, if tagStack is empty, it's the last call
 616   // for XML as well.
 617   if (theCount == 0) {
 618     result = Parse(aSourceBuffer, true);
 619     fragSink->DidBuildContent();
 620   } else {
 621     // Add an end tag chunk, so expat will read the whole source buffer,
 622     // and not worry about ']]' etc.
 623     result = Parse(aSourceBuffer + u"</"_ns, false);
 624     fragSink->DidBuildContent();
 625
 626     if (NS_SUCCEEDED(result)) {
 627       nsAutoString endContext;
 628       for (theIndex = 0; theIndex < theCount; theIndex++) {
 629         // we already added an end tag chunk above
 630         if (theIndex > 0) {
 631           endContext.AppendLiteral("</");
 632         }
 633
 634         nsString& thisTag = aTagStack[theIndex];
 635         // was there an xmlns=?
 636         int32_t endOfTag = thisTag.FindChar(char16_t(' '));
 637         if (endOfTag == -1) {
 638           endContext.Append(thisTag);
 639         } else {
 640           endContext.Append(Substring(thisTag, 0, endOfTag));
 641         }
 642
 643         endContext.Append('>');
 644       }
 645
 646       result = Parse(endContext, true);
 647     }
 648   }
 649
 650   mParserContext.reset();
 651
 652   return result;
 653 }
 654
 655 /**
 656  *  This routine is called to cause the parser to continue parsing its
 657  *  underlying stream.  This call allows the parse process to happen in
 658  *  chunks, such as when the content is push based, and we need to parse in
 659  *  pieces.
 660  *
 661  *  An interesting change in how the parser gets used has led us to add extra
 662  *  processing to this method.  The case occurs when the parser is blocked in
 663  *  one context, and gets a parse(string) call in another context.  In this
 664  *  case, the parserContexts are linked. No problem.
 665  *
 666  *  The problem is that Parse(string) assumes that it can proceed unabated,
 667  *  but if the parser is already blocked that assumption is false. So we
 668  *  needed to add a mechanism here to allow the parser to continue to process
 669  *  (the pop and free) contexts until 1) it get's blocked again; 2) it runs
 670  *  out of contexts.
 671  *
 672  *
 673  *  @param   allowItertion : set to true if non-script resumption is requested
 674  *  @param   aIsFinalChunk : tells us when the last chunk of data is provided.
 675  *  @return  error code -- 0 if ok, non-zero if error.
 676  */
 677 nsresult nsParser::ResumeParse(bool allowIteration, bool aIsFinalChunk,
 678                                bool aCanInterrupt) {
 679   if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
 680     // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
 681     // to avoid introducing unintentional changes to behavior.
 682     return mInternalState;
 683   }
 684
 685   nsresult result = NS_OK;
 686
 687   if (!mBlocked && mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
 688     result = WillBuildModel();
 689     if (NS_FAILED(result)) {
 690       mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE;
 691       return result;
 692     }
 693
 694     if (mDTD) {
 695       mSink->WillResume();
 696       bool theIterationIsOk = true;
 697
 698       while (result == NS_OK && theIterationIsOk) {
 699         if (!mUnusedInput.IsEmpty()) {
 700           // -- Ref: Bug# 22485 --
 701           // Insert the unused input into the source buffer
 702           // as if it was read from the input stream.
 703           // Adding UngetReadable() per vidur!!
 704           mParserContext->mScanner.UngetReadable(mUnusedInput);
 705           mUnusedInput.Truncate(0);
 706         }
 707
 708         // Only allow parsing to be interrupted in the subsequent call to
 709         // build model.
 710         nsresult theTokenizerResult;
 711         if (mFlags & NS_PARSER_FLAG_CAN_TOKENIZE) {
 712           mParserContext->mScanner.Mark();
 713           if (mParserContext->mDocType == eXML &&
 714               mParserContext->mParserCommand != eViewSource) {
 715             nsExpatDriver* expat = static_cast<nsExpatDriver*>(mDTD.get());
 716             theTokenizerResult =
 717                 expat->ResumeParse(mParserContext->mScanner, aIsFinalChunk);
 718             if (NS_FAILED(theTokenizerResult)) {
 719               mParserContext->mScanner.RewindToMark();
 720               if (NS_ERROR_HTMLPARSER_STOPPARSING == theTokenizerResult) {
 721                 theTokenizerResult = Terminate();
 722                 mSink = nullptr;
 723               }
 724             }
 725           } else {
 726             // Nothing to do for non-XML. Note that this should only be
 727             // about:blank at this point, we're also checking for view-source
 728             // above, but that shouldn't end up here anymore.
 729             theTokenizerResult = NS_ERROR_HTMLPARSER_EOF;
 730           }
 731         } else {
 732           theTokenizerResult = NS_OK;
 733         }
 734
 735         result = mDTD->BuildModel(mSink);
 736         if (result == NS_ERROR_HTMLPARSER_INTERRUPTED && aIsFinalChunk) {
 737           PostContinueEvent();
 738         }
 739
 740         theIterationIsOk = theTokenizerResult != NS_ERROR_HTMLPARSER_EOF &&
 741                            result != NS_ERROR_HTMLPARSER_INTERRUPTED;
 742
 743         // Make sure not to stop parsing too early. Therefore, before shutting
 744         // down the parser, it's important to check whether the input buffer
 745         // has been scanned to completion (theTokenizerResult should be kEOF).
 746         // kEOF -> End of buffer.
 747
 748         // If we're told the parser has been blocked, we disable all further
 749         // parsing (and cache any data coming in) until the parser is
 750         // re-enabled.
 751         if (NS_ERROR_HTMLPARSER_BLOCK == result) {
 752           mSink->WillInterrupt();
 753           return NS_OK;
 754         }
 755         if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
 756           // Note: Parser Terminate() calls DidBuildModel.
 757           if (mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
 758             DidBuildModel();
 759             mInternalState = result;
 760           }
 761
 762           return NS_OK;
 763         }
 764         if (((NS_OK == result &&
 765               theTokenizerResult == NS_ERROR_HTMLPARSER_EOF) ||
 766              result == NS_ERROR_HTMLPARSER_INTERRUPTED) &&
 767             mParserContext->mStreamListenerState == eOnStop) {
 768           DidBuildModel();
 769           return NS_OK;
 770         }
 771
 772         if (theTokenizerResult == NS_ERROR_HTMLPARSER_EOF ||
 773             result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
 774           result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
 775           mSink->WillInterrupt();
 776         }
 777       }
 778     } else {
 779       mInternalState = result = NS_ERROR_HTMLPARSER_UNRESOLVEDDTD;
 780     }
 781   }
 782
 783   return (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
 784 }
 785
 786 /*******************************************************************
 787   These methods are used to talk to the netlib system...
 788  *******************************************************************/
 789
 790 nsresult nsParser::OnStartRequest(nsIRequest* request) {
 791   if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
 792     // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
 793     // to avoid introducing unintentional changes to behavior.
 794     return mInternalState;
 795   }
 796
 797   MOZ_ASSERT(eNone == mParserContext->mStreamListenerState,
 798              "Parser's nsIStreamListener API was not setup "
 799              "correctly in constructor.");
 800
 801   mParserContext->mStreamListenerState = eOnStart;
 802   mParserContext->mAutoDetectStatus = eUnknownDetect;
 803   mParserContext->mRequest = request;
 804
 805   mDTD = nullptr;
 806
 807   nsresult rv;
 808   nsAutoCString contentType;
 809   nsCOMPtr<nsIChannel> channel = do_QueryInterface(request);
 810   if (channel) {
 811     rv = channel->GetContentType(contentType);
 812     if (NS_SUCCEEDED(rv)) {
 813       mParserContext->SetMimeType(contentType);
 814     }
 815   }
 816
 817   rv = NS_OK;
 818
 819   return rv;
 820 }
 821
 822 static bool ExtractCharsetFromXmlDeclaration(const unsigned char* aBytes,
 823                                              int32_t aLen,
 824                                              nsCString& oCharset) {
 825   // This code is rather pointless to have. Might as well reuse expat as
 826   // seen in nsHtml5StreamParser. -- hsivonen
 827   oCharset.Truncate();
 828   if ((aLen >= 5) && ('<' == aBytes[0]) && ('?' == aBytes[1]) &&
 829       ('x' == aBytes[2]) && ('m' == aBytes[3]) && ('l' == aBytes[4])) {
 830     int32_t i;
 831     bool versionFound = false, encodingFound = false;
 832     for (i = 6; i < aLen && !encodingFound; ++i) {
 833       // end of XML declaration?
 834       if ((((char*)aBytes)[i] == '?') && ((i + 1) < aLen) &&
 835           (((char*)aBytes)[i + 1] == '>')) {
 836         break;
 837       }
 838       // Version is required.
 839       if (!versionFound) {
 840         // Want to avoid string comparisons, hence looking for 'n'
 841         // and only if found check the string leading to it. Not
 842         // foolproof, but fast.
 843         // The shortest string allowed before this is  (strlen==13):
 844         // <?xml version
 845         if ((((char*)aBytes)[i] == 'n') && (i >= 12) &&
 846             (0 == strncmp("versio", (char*)(aBytes + i - 6), 6))) {
 847           // Fast forward through version
 848           char q = 0;
 849           for (++i; i < aLen; ++i) {
 850             char qi = ((char*)aBytes)[i];
 851             if (qi == '\'' || qi == '"') {
 852               if (q && q == qi) {
 853                 //  ending quote
 854                 versionFound = true;
 855                 break;
 856               } else {
 857                 // Starting quote
 858                 q = qi;
 859               }
 860             }
 861           }
 862         }
 863       } else {
 864         // encoding must follow version
 865         // Want to avoid string comparisons, hence looking for 'g'
 866         // and only if found check the string leading to it. Not
 867         // foolproof, but fast.
 868         // The shortest allowed string before this (strlen==26):
 869         // <?xml version="1" encoding
 870         if ((((char*)aBytes)[i] == 'g') && (i >= 25) &&
 871             (0 == strncmp("encodin", (char*)(aBytes + i - 7), 7))) {
 872           int32_t encStart = 0;
 873           char q = 0;
 874           for (++i; i < aLen; ++i) {
 875             char qi = ((char*)aBytes)[i];
 876             if (qi == '\'' || qi == '"') {
 877               if (q && q == qi) {
 878                 int32_t count = i - encStart;
 879                 // encoding value is invalid if it is UTF-16
 880                 if (count > 0 &&
 881                     PL_strncasecmp("UTF-16", (char*)(aBytes + encStart),
 882                                    count)) {
 883                   oCharset.Assign((char*)(aBytes + encStart), count);
 884                 }
 885                 encodingFound = true;
 886                 break;
 887               } else {
 888                 encStart = i + 1;
 889                 q = qi;
 890               }
 891             }
 892           }
 893         }
 894       }  // if (!versionFound)
 895     }  // for
 896   }
 897   return !oCharset.IsEmpty();
 898 }
 899
 900 inline char GetNextChar(nsACString::const_iterator& aStart,
 901                         nsACString::const_iterator& aEnd) {
 902   NS_ASSERTION(aStart != aEnd, "end of buffer");
 903   return (++aStart != aEnd) ? *aStart : '\0';
 904 }
 905
 906 static nsresult NoOpParserWriteFunc(nsIInputStream* in, void* closure,
 907                                     const char* fromRawSegment,
 908                                     uint32_t toOffset, uint32_t count,
 909                                     uint32_t* writeCount) {
 910   *writeCount = count;
 911   return NS_OK;
 912 }
 913
 914 typedef struct {
 915   bool mNeedCharsetCheck;
 916   nsParser* mParser;
 917   nsScanner* mScanner;
 918   nsIRequest* mRequest;
 919 } ParserWriteStruct;
 920
 921 /*
 922  * This function is invoked as a result of a call to a stream's
 923  * ReadSegments() method. It is called for each contiguous buffer
 924  * of data in the underlying stream or pipe. Using ReadSegments
 925  * allows us to avoid copying data to read out of the stream.
 926  */
 927 static nsresult ParserWriteFunc(nsIInputStream* in, void* closure,
 928                                 const char* fromRawSegment, uint32_t toOffset,
 929                                 uint32_t count, uint32_t* writeCount) {
 930   nsresult result;
 931   ParserWriteStruct* pws = static_cast<ParserWriteStruct*>(closure);
 932   const unsigned char* buf =
 933       reinterpret_cast<const unsigned char*>(fromRawSegment);
 934   uint32_t theNumRead = count;
 935
 936   if (!pws) {
 937     return NS_ERROR_FAILURE;
 938   }
 939
 940   if (pws->mNeedCharsetCheck) {
 941     pws->mNeedCharsetCheck = false;
 942     int32_t source;
 943     auto preferred = pws->mParser->GetDocumentCharset(source);
 944
 945     // This code was bogus when I found it. It expects the BOM or the XML
 946     // declaration to be entirely in the first network buffer. -- hsivonen
 947     const Encoding* encoding;
 948     std::tie(encoding, std::ignore) = Encoding::ForBOM(Span(buf, count));
 949     if (encoding) {
 950       // The decoder will swallow the BOM. The UTF-16 will re-sniff for
 951       // endianness. The value of preferred is now "UTF-8", "UTF-16LE"
 952       // or "UTF-16BE".
 953       preferred = WrapNotNull(encoding);
 954       source = kCharsetFromByteOrderMark;
 955     } else if (source < kCharsetFromChannel) {
 956       nsAutoCString declCharset;
 957
 958       if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) {
 959         encoding = Encoding::ForLabel(declCharset);
 960         if (encoding) {
 961           preferred = WrapNotNull(encoding);
 962           source = kCharsetFromMetaTag;
 963         }
 964       }
 965     }
 966
 967     pws->mParser->SetDocumentCharset(preferred, source, false);
 968     pws->mParser->SetSinkCharset(preferred);
 969   }
 970
 971   result = pws->mScanner->Append(fromRawSegment, theNumRead);
 972   if (NS_SUCCEEDED(result)) {
 973     *writeCount = count;
 974   }
 975
 976   return result;
 977 }
 978
 979 nsresult nsParser::OnDataAvailable(nsIRequest* request,
 980                                    nsIInputStream* pIStream,
 981                                    uint64_t sourceOffset, uint32_t aLength) {
 982   if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
 983     // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
 984     // to avoid introducing unintentional changes to behavior.
 985     return mInternalState;
 986   }
 987
 988   MOZ_ASSERT((eOnStart == mParserContext->mStreamListenerState ||
 989               eOnDataAvail == mParserContext->mStreamListenerState),
 990              "Error: OnStartRequest() must be called before OnDataAvailable()");
 991   MOZ_ASSERT(NS_InputStreamIsBuffered(pIStream),
 992              "Must have a buffered input stream");
 993
 994   nsresult rv = NS_OK;
 995
 996   if (mIsAboutBlank) {
 997     MOZ_ASSERT(false, "Must not get OnDataAvailable for about:blank");
 998     // ... but if an extension tries to feed us data for about:blank in a
 999     // release build, silently ignore the data.
1000     uint32_t totalRead;
1001     rv = pIStream->ReadSegments(NoOpParserWriteFunc, nullptr, aLength,
1002                                 &totalRead);
1003     return rv;
1004   }
1005
1006   if (mParserContext->mRequest == request) {
1007     mParserContext->mStreamListenerState = eOnDataAvail;
1008
1009     uint32_t totalRead;
1010     ParserWriteStruct pws;
1011     pws.mNeedCharsetCheck = true;
1012     pws.mParser = this;
1013     pws.mScanner = &mParserContext->mScanner;
1014     pws.mRequest = request;
1015
1016     rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead);
1017     if (NS_FAILED(rv)) {
1018       return rv;
1019     }
1020
1021     if (IsOkToProcessNetworkData()) {
1022       nsCOMPtr<nsIParser> kungFuDeathGrip(this);
1023       nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
1024       mProcessingNetworkData = true;
1025       if (sinkDeathGrip) {
1026         sinkDeathGrip->WillParse();
1027       }
1028       rv = ResumeParse();
1029       // Check if someone spun the event loop while we were parsing (XML
1030       // script...) If so, and OnStop was called during the spin, process it
1031       // now.
1032       if ((mParserContext->mRequest == request) && mOnStopPending) {
1033         mOnStopPending = false;
1034         mParserContext->mStreamListenerState = eOnStop;
1035         mParserContext->mScanner.SetIncremental(false);
1036
1037         if (sinkDeathGrip) {
1038           sinkDeathGrip->WillParse();
1039         }
1040         rv = ResumeParse(true, true);
1041       }
1042       mProcessingNetworkData = false;
1043     }
1044   } else {
1045     rv = NS_ERROR_UNEXPECTED;
1046   }
1047
1048   return rv;
1049 }
1050
1051 /**
1052  *  This is called by the networking library once the last block of data
1053  *  has been collected from the net.
1054  */
1055 nsresult nsParser::OnStopRequest(nsIRequest* request, nsresult status) {
1056   if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
1057     // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
1058     // to avoid introducing unintentional changes to behavior.
1059     return mInternalState;
1060   }
1061
1062   nsresult rv = NS_OK;
1063
1064   mStreamStatus = status;
1065
1066   if (IsOkToProcessNetworkData()) {
1067     if (mParserContext->mRequest == request) {
1068       mParserContext->mStreamListenerState = eOnStop;
1069       mParserContext->mScanner.SetIncremental(false);
1070     }
1071
1072     mProcessingNetworkData = true;
1073     if (mSink) {
1074       mSink->WillParse();
1075     }
1076     rv = ResumeParse(true, true);
1077     mProcessingNetworkData = false;
1078   } else {
1079     // We'll have to handle this later
1080     mOnStopPending = true;
1081   }
1082
1083   // If the parser isn't enabled, we don't finish parsing till
1084   // it is reenabled.
1085
1086   return rv;
1087 }
1088
1089 /**
1090  * Get this as nsIStreamListener
1091  */
1092 nsIStreamListener* nsParser::GetStreamListener() { return this; }