2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include "core/html/parser/HTMLDocumentParser.h"
29 #include "core/HTMLNames.h"
30 #include "core/css/MediaValuesCached.h"
31 #include "core/dom/DocumentFragment.h"
32 #include "core/dom/DocumentLifecycleObserver.h"
33 #include "core/dom/Element.h"
34 #include "core/frame/LocalFrame.h"
35 #include "core/frame/Settings.h"
36 #include "core/html/HTMLDocument.h"
37 #include "core/html/parser/AtomicHTMLToken.h"
38 #include "core/html/parser/BackgroundHTMLParser.h"
39 #include "core/html/parser/HTMLParserScheduler.h"
40 #include "core/html/parser/HTMLParserThread.h"
41 #include "core/html/parser/HTMLScriptRunner.h"
42 #include "core/html/parser/HTMLTreeBuilder.h"
43 #include "core/inspector/InspectorInstrumentation.h"
44 #include "core/inspector/InspectorTraceEvents.h"
45 #include "core/loader/DocumentLoader.h"
46 #include "core/loader/NavigationScheduler.h"
47 #include "platform/SharedBuffer.h"
48 #include "platform/ThreadSafeFunctional.h"
49 #include "platform/ThreadedDataReceiver.h"
50 #include "platform/TraceEvent.h"
51 #include "platform/heap/Handle.h"
52 #include "public/platform/Platform.h"
53 #include "public/platform/WebScheduler.h"
54 #include "public/platform/WebThread.h"
55 #include "wtf/RefCounted.h"
56 #include "wtf/TemporaryChange.h"
60 using namespace HTMLNames
;
62 // This is a direct transcription of step 4 from:
63 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
64 static HTMLTokenizer::State
tokenizerStateForContextElement(Element
* contextElement
, bool reportErrors
, const HTMLParserOptions
& options
)
67 return HTMLTokenizer::DataState
;
69 const QualifiedName
& contextTag
= contextElement
->tagQName();
71 if (contextTag
.matches(titleTag
) || contextTag
.matches(textareaTag
))
72 return HTMLTokenizer::RCDATAState
;
73 if (contextTag
.matches(styleTag
)
74 || contextTag
.matches(xmpTag
)
75 || contextTag
.matches(iframeTag
)
76 || (contextTag
.matches(noembedTag
) && options
.pluginsEnabled
)
77 || (contextTag
.matches(noscriptTag
) && options
.scriptEnabled
)
78 || contextTag
.matches(noframesTag
))
79 return reportErrors
? HTMLTokenizer::RAWTEXTState
: HTMLTokenizer::PLAINTEXTState
;
80 if (contextTag
.matches(scriptTag
))
81 return reportErrors
? HTMLTokenizer::ScriptDataState
: HTMLTokenizer::PLAINTEXTState
;
82 if (contextTag
.matches(plaintextTag
))
83 return HTMLTokenizer::PLAINTEXTState
;
84 return HTMLTokenizer::DataState
;
87 class ParserDataReceiver final
: public RefCountedWillBeGarbageCollectedFinalized
<ParserDataReceiver
>, public ThreadedDataReceiver
, public DocumentLifecycleObserver
{
88 WILL_BE_USING_GARBAGE_COLLECTED_MIXIN(ParserDataReceiver
);
90 static PassRefPtrWillBeRawPtr
<ParserDataReceiver
> create(WeakPtr
<BackgroundHTMLParser
> backgroundParser
, Document
* document
)
92 return adoptRefWillBeNoop(new ParserDataReceiver(backgroundParser
, document
));
96 void ref() override
{ RefCounted
<ParserDataReceiver
>::ref(); }
97 void deref() override
{ RefCounted
<ParserDataReceiver
>::deref(); }
100 // ThreadedDataReceiver
101 void acceptData(const char* data
, int dataLength
) override
103 ASSERT(backgroundThread() && backgroundThread()->isCurrentThread());
104 if (m_backgroundParser
.get())
105 m_backgroundParser
.get()->appendRawBytesFromParserThread(data
, dataLength
);
108 WebThread
* backgroundThread() override
110 if (HTMLParserThread::shared())
111 return &HTMLParserThread::shared()->platformThread();
116 bool needsMainthreadDataCopy() override
{ return InspectorInstrumentation::hasFrontends(); }
117 void acceptMainthreadDataNotification(const char* data
, int dataLength
, int encodedDataLength
) override
119 ASSERT(!data
|| needsMainthreadDataCopy());
120 if (lifecycleContext())
121 lifecycleContext()->loader()->acceptDataFromThreadedReceiver(data
, dataLength
, encodedDataLength
);
124 DEFINE_INLINE_VIRTUAL_TRACE()
126 DocumentLifecycleObserver::trace(visitor
);
130 ParserDataReceiver(WeakPtr
<BackgroundHTMLParser
> backgroundParser
, Document
* document
)
131 : DocumentLifecycleObserver(document
)
132 , m_backgroundParser(backgroundParser
)
136 WeakPtr
<BackgroundHTMLParser
> m_backgroundParser
;
139 HTMLDocumentParser::HTMLDocumentParser(HTMLDocument
& document
, bool reportErrors
, ParserSynchronizationPolicy syncPolicy
)
140 : ScriptableDocumentParser(document
)
141 , m_options(&document
)
142 , m_token(syncPolicy
== ForceSynchronousParsing
? adoptPtr(new HTMLToken
) : nullptr)
143 , m_tokenizer(syncPolicy
== ForceSynchronousParsing
? HTMLTokenizer::create(m_options
) : nullptr)
144 , m_scriptRunner(HTMLScriptRunner::create(&document
, this))
145 , m_treeBuilder(HTMLTreeBuilder::create(this, &document
, parserContentPolicy(), reportErrors
, m_options
))
146 , m_parserScheduler(HTMLParserScheduler::create(this))
147 , m_xssAuditorDelegate(&document
)
148 , m_weakFactory(this)
149 , m_preloader(HTMLResourcePreloader::create(document
))
150 , m_shouldUseThreading(syncPolicy
== AllowAsynchronousParsing
)
151 , m_endWasDelayed(false)
152 , m_haveBackgroundParser(false)
153 , m_tasksWereSuspended(false)
154 , m_pumpSessionNestingLevel(0)
155 , m_pumpSpeculationsSessionNestingLevel(0)
156 , m_isParsingAtLineNumber(false)
158 ASSERT(shouldUseThreading() || (m_token
&& m_tokenizer
));
161 // FIXME: Member variables should be grouped into self-initializing structs to
162 // minimize code duplication between these constructors.
163 HTMLDocumentParser::HTMLDocumentParser(DocumentFragment
* fragment
, Element
* contextElement
, ParserContentPolicy parserContentPolicy
)
164 : ScriptableDocumentParser(fragment
->document(), parserContentPolicy
)
165 , m_options(&fragment
->document())
166 , m_token(adoptPtr(new HTMLToken
))
167 , m_tokenizer(HTMLTokenizer::create(m_options
))
168 , m_treeBuilder(HTMLTreeBuilder::create(this, fragment
, contextElement
, this->parserContentPolicy(), m_options
))
169 , m_xssAuditorDelegate(&fragment
->document())
170 , m_weakFactory(this)
171 , m_shouldUseThreading(false)
172 , m_endWasDelayed(false)
173 , m_haveBackgroundParser(false)
174 , m_tasksWereSuspended(false)
175 , m_pumpSessionNestingLevel(0)
176 , m_pumpSpeculationsSessionNestingLevel(0)
178 bool reportErrors
= false; // For now document fragment parsing never reports errors.
179 m_tokenizer
->setState(tokenizerStateForContextElement(contextElement
, reportErrors
, m_options
));
180 m_xssAuditor
.initForFragment();
183 HTMLDocumentParser::~HTMLDocumentParser()
186 if (m_haveBackgroundParser
)
187 stopBackgroundParser();
188 // In Oilpan, HTMLDocumentParser can die together with Document, and
189 // detach() is not called in this case.
191 ASSERT(!m_parserScheduler
);
192 ASSERT(!m_pumpSessionNestingLevel
);
193 ASSERT(!m_preloadScanner
);
194 ASSERT(!m_insertionPreloadScanner
);
195 ASSERT(!m_haveBackgroundParser
);
196 // FIXME: We should be able to ASSERT(m_speculations.isEmpty()),
197 // but there are cases where that's not true currently. For example,
198 // we we're told to stop parsing before we've consumed all the input.
202 DEFINE_TRACE(HTMLDocumentParser
)
204 visitor
->trace(m_treeBuilder
);
205 visitor
->trace(m_xssAuditorDelegate
);
206 visitor
->trace(m_scriptRunner
);
207 visitor
->trace(m_preloader
);
208 ScriptableDocumentParser::trace(visitor
);
209 HTMLScriptRunnerHost::trace(visitor
);
212 void HTMLDocumentParser::detach()
214 if (m_haveBackgroundParser
)
215 stopBackgroundParser();
216 DocumentParser::detach();
218 m_scriptRunner
->detach();
219 m_treeBuilder
->detach();
220 // FIXME: It seems wrong that we would have a preload scanner here.
221 // Yet during fast/dom/HTMLScriptElement/script-load-events.html we do.
222 m_preloadScanner
.clear();
223 m_insertionPreloadScanner
.clear();
224 if (m_parserScheduler
) {
225 m_parserScheduler
->detach();
226 m_parserScheduler
.clear();
228 // Oilpan: It is important to clear m_token to deallocate backing memory of
229 // HTMLToken::m_data and let the allocator reuse the memory for
230 // HTMLToken::m_data of a next HTMLDocumentParser. We need to clear
231 // m_tokenizer first because m_tokenizer has a raw pointer to m_token.
236 void HTMLDocumentParser::stopParsing()
238 DocumentParser::stopParsing();
239 if (m_parserScheduler
) {
240 m_parserScheduler
->detach();
241 m_parserScheduler
.clear();
243 if (m_haveBackgroundParser
)
244 stopBackgroundParser();
247 // This kicks off "Once the user agent stops parsing" as described by:
248 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#the-end
249 void HTMLDocumentParser::prepareToStopParsing()
251 // FIXME: It may not be correct to disable this for the background parser.
252 // That means hasInsertionPoint() may not be correct in some cases.
253 ASSERT(!hasInsertionPoint() || m_haveBackgroundParser
);
255 // pumpTokenizer can cause this parser to be detached from the Document,
256 // but we need to ensure it isn't deleted yet.
257 RefPtrWillBeRawPtr
<HTMLDocumentParser
> protect(this);
259 // NOTE: This pump should only ever emit buffered character tokens.
261 ASSERT(!m_haveBackgroundParser
);
262 pumpTokenizerIfPossible();
268 DocumentParser::prepareToStopParsing();
270 // We will not have a scriptRunner when parsing a DocumentFragment.
272 document()->setReadyState(Document::Interactive
);
274 // Setting the ready state above can fire mutation event and detach us
275 // from underneath. In that case, just bail out.
279 attemptToRunDeferredScriptsAndEnd();
282 bool HTMLDocumentParser::isParsingFragment() const
284 return m_treeBuilder
->isParsingFragment();
287 bool HTMLDocumentParser::processingData() const
289 return isScheduledForResume() || inPumpSession() || m_haveBackgroundParser
;
292 void HTMLDocumentParser::pumpTokenizerIfPossible()
294 if (isStopped() || isWaitingForScripts())
300 bool HTMLDocumentParser::isScheduledForResume() const
302 return m_parserScheduler
&& m_parserScheduler
->isScheduledForResume();
305 // Used by HTMLParserScheduler
306 void HTMLDocumentParser::resumeParsingAfterYield()
308 ASSERT(shouldUseThreading());
309 ASSERT(m_haveBackgroundParser
);
311 // pumpPendingSpeculations can cause this parser to be detached from the Document,
312 // but we need to ensure it isn't deleted yet.
313 RefPtrWillBeRawPtr
<HTMLDocumentParser
> protect(this);
314 pumpPendingSpeculations();
317 void HTMLDocumentParser::runScriptsForPausedTreeBuilder()
319 ASSERT(scriptingContentIsAllowed(parserContentPolicy()));
321 TextPosition scriptStartPosition
= TextPosition::belowRangePosition();
322 RefPtrWillBeRawPtr
<Element
> scriptElement
= m_treeBuilder
->takeScriptToProcess(scriptStartPosition
);
323 // We will not have a scriptRunner when parsing a DocumentFragment.
325 m_scriptRunner
->execute(scriptElement
.release(), scriptStartPosition
);
328 bool HTMLDocumentParser::canTakeNextToken()
333 if (isWaitingForScripts()) {
334 // If we're paused waiting for a script, we try to execute scripts before continuing.
335 runScriptsForPausedTreeBuilder();
338 if (isWaitingForScripts())
342 // FIXME: It's wrong for the HTMLDocumentParser to reach back to the
343 // LocalFrame, but this approach is how the old parser handled
344 // stopping when the page assigns window.location. What really
345 // should happen is that assigning window.location causes the
346 // parser to stop parsing cleanly. The problem is we're not
347 // perpared to do that at every point where we run JavaScript.
348 if (!isParsingFragment()
349 && document()->frame() && document()->frame()->navigationScheduler().locationChangePending())
355 void HTMLDocumentParser::didReceiveParsedChunkFromBackgroundParser(PassOwnPtr
<ParsedChunk
> chunk
)
357 TRACE_EVENT0("blink", "HTMLDocumentParser::didReceiveParsedChunkFromBackgroundParser");
362 // ApplicationCache needs to be initialized before issuing preloads.
363 // We suspend preload until HTMLHTMLElement is inserted and
364 // ApplicationCache is initialized.
365 if (!document()->documentElement()) {
366 for (auto& request
: chunk
->preloads
)
367 m_queuedPreloads
.append(request
.release());
369 // We can safely assume that there are no queued preloads request after
370 // the document element is available, as we empty the queue immediately
371 // after the document element is created in pumpPendingSpeculations().
372 ASSERT(m_queuedPreloads
.isEmpty());
373 m_preloader
->takeAndPreload(chunk
->preloads
);
376 m_speculations
.append(chunk
);
378 if (!isWaitingForScripts() && !isScheduledForResume()) {
379 if (m_tasksWereSuspended
)
380 m_parserScheduler
->forceResumeAfterYield();
382 m_parserScheduler
->scheduleForResume();
386 void HTMLDocumentParser::didReceiveEncodingDataFromBackgroundParser(const DocumentEncodingData
& data
)
388 document()->setEncodingData(data
);
391 void HTMLDocumentParser::validateSpeculations(PassOwnPtr
<ParsedChunk
> chunk
)
394 if (isWaitingForScripts()) {
395 // We're waiting on a network script, just save the chunk, we'll get
396 // a second validateSpeculations call after the script completes.
397 // This call should have been made immediately after runScriptsForPausedTreeBuilder
398 // which may have started a network load and left us waiting.
399 ASSERT(!m_lastChunkBeforeScript
);
400 m_lastChunkBeforeScript
= chunk
;
404 ASSERT(!m_lastChunkBeforeScript
);
405 OwnPtr
<HTMLTokenizer
> tokenizer
= m_tokenizer
.release();
406 OwnPtr
<HTMLToken
> token
= m_token
.release();
409 // There must not have been any changes to the HTMLTokenizer state on
410 // the main thread, which means the speculation buffer is correct.
414 // Currently we're only smart enough to reuse the speculation buffer if the tokenizer
415 // both starts and ends in the DataState. That state is simplest because the HTMLToken
416 // is always in the Uninitialized state. We should consider whether we can reuse the
417 // speculation buffer in other states, but we'd likely need to do something more
418 // sophisticated with the HTMLToken.
419 if (chunk
->tokenizerState
== HTMLTokenizer::DataState
420 && tokenizer
->state() == HTMLTokenizer::DataState
421 && m_input
.current().isEmpty()
422 && chunk
->treeBuilderState
== HTMLTreeBuilderSimulator::stateFor(m_treeBuilder
.get())) {
423 ASSERT(token
->isUninitialized());
427 discardSpeculationsAndResumeFrom(chunk
, token
.release(), tokenizer
.release());
430 void HTMLDocumentParser::discardSpeculationsAndResumeFrom(PassOwnPtr
<ParsedChunk
> lastChunkBeforeScript
, PassOwnPtr
<HTMLToken
> token
, PassOwnPtr
<HTMLTokenizer
> tokenizer
)
432 m_weakFactory
.revokeAll();
433 m_speculations
.clear();
435 OwnPtr
<BackgroundHTMLParser::Checkpoint
> checkpoint
= adoptPtr(new BackgroundHTMLParser::Checkpoint
);
436 checkpoint
->parser
= m_weakFactory
.createWeakPtr();
437 checkpoint
->token
= token
;
438 checkpoint
->tokenizer
= tokenizer
;
439 checkpoint
->treeBuilderState
= HTMLTreeBuilderSimulator::stateFor(m_treeBuilder
.get());
440 checkpoint
->inputCheckpoint
= lastChunkBeforeScript
->inputCheckpoint
;
441 checkpoint
->preloadScannerCheckpoint
= lastChunkBeforeScript
->preloadScannerCheckpoint
;
442 checkpoint
->unparsedInput
= m_input
.current().toString().isolatedCopy();
443 m_input
.current().clear(); // FIXME: This should be passed in instead of cleared.
445 ASSERT(checkpoint
->unparsedInput
.isSafeToSendToAnotherThread());
446 HTMLParserThread::shared()->postTask(threadSafeBind(&BackgroundHTMLParser::resumeFrom
, AllowCrossThreadAccess(m_backgroundParser
), checkpoint
.release()));
449 size_t HTMLDocumentParser::processParsedChunkFromBackgroundParser(PassOwnPtr
<ParsedChunk
> popChunk
)
451 TRACE_EVENT0("blink", "HTMLDocumentParser::processParsedChunkFromBackgroundParser");
452 TemporaryChange
<bool> hasLineNumber(m_isParsingAtLineNumber
, true);
454 ASSERT_WITH_SECURITY_IMPLICATION(document()->activeParserCount() == 1);
455 ASSERT(!isParsingFragment());
456 ASSERT(!isWaitingForScripts());
457 ASSERT(!isStopped());
459 // ASSERT that this object is both attached to the Document and protected.
460 ASSERT(refCount() >= 2);
462 ASSERT(shouldUseThreading());
463 ASSERT(!m_tokenizer
);
465 ASSERT(!m_lastChunkBeforeScript
);
467 OwnPtr
<ParsedChunk
> chunk(popChunk
);
468 OwnPtr
<CompactHTMLTokenStream
> tokens
= chunk
->tokens
.release();
469 size_t elementTokenCount
= 0;
471 HTMLParserThread::shared()->postTask(threadSafeBind(&BackgroundHTMLParser::startedChunkWithCheckpoint
, AllowCrossThreadAccess(m_backgroundParser
), chunk
->inputCheckpoint
));
473 for (const auto& xssInfo
: chunk
->xssInfos
) {
474 m_textPosition
= xssInfo
->m_textPosition
;
475 m_xssAuditorDelegate
.didBlockScript(*xssInfo
);
480 for (Vector
<CompactHTMLToken
>::const_iterator it
= tokens
->begin(); it
!= tokens
->end(); ++it
) {
481 ASSERT(!isWaitingForScripts());
483 if (!chunk
->startingScript
&& (it
->type() == HTMLToken::StartTag
|| it
->type() == HTMLToken::EndTag
))
486 if (document()->frame() && document()->frame()->navigationScheduler().locationChangePending()) {
488 // To match main-thread parser behavior (which never checks locationChangePending on the EOF path)
489 // we peek to see if this chunk has an EOF and process it anyway.
490 if (tokens
->last().type() == HTMLToken::EndOfFile
) {
491 ASSERT(m_speculations
.isEmpty()); // There should never be any chunks after the EOF.
492 prepareToStopParsing();
497 m_textPosition
= it
->textPosition();
499 constructTreeFromCompactHTMLToken(*it
);
504 if (!m_queuedPreloads
.isEmpty() && document()->documentElement())
505 m_preloader
->takeAndPreload(m_queuedPreloads
);
507 if (isWaitingForScripts()) {
508 ASSERT(it
+ 1 == tokens
->end()); // The </script> is assumed to be the last token of this bunch.
509 runScriptsForPausedTreeBuilder();
510 validateSpeculations(chunk
.release());
514 if (it
->type() == HTMLToken::EndOfFile
) {
515 ASSERT(it
+ 1 == tokens
->end()); // The EOF is assumed to be the last token of this bunch.
516 ASSERT(m_speculations
.isEmpty()); // There should never be any chunks after the EOF.
517 prepareToStopParsing();
521 ASSERT(!m_tokenizer
);
525 // Make sure all required pending text nodes are emitted before returning.
526 // This leaves "script", "style" and "svg" nodes text nodes intact.
528 m_treeBuilder
->flush(FlushIfAtTextLimit
);
530 m_isParsingAtLineNumber
= false;
532 return elementTokenCount
;
535 void HTMLDocumentParser::pumpPendingSpeculations()
538 // ASSERT that this object is both attached to the Document and protected.
539 ASSERT(refCount() >= 2);
541 // If this assert fails, you need to call validateSpeculations to make sure
542 // m_tokenizer and m_token don't have state that invalidates m_speculations.
543 ASSERT(!m_tokenizer
);
545 ASSERT(!m_lastChunkBeforeScript
);
546 ASSERT(!isWaitingForScripts());
547 ASSERT(!isStopped());
548 ASSERT(!isScheduledForResume());
549 ASSERT(!inPumpSession());
551 // FIXME: Here should never be reached when there is a blocking script,
552 // but it happens in unknown scenarios. See https://crbug.com/440901
553 if (isWaitingForScripts()) {
554 m_parserScheduler
->scheduleForResume();
558 // Do not allow pumping speculations in nested event loops.
559 if (m_pumpSpeculationsSessionNestingLevel
) {
560 m_parserScheduler
->scheduleForResume();
564 // FIXME: Pass in current input length.
565 TRACE_EVENT_BEGIN1("devtools.timeline", "ParseHTML", "beginData", InspectorParseHtmlEvent::beginData(document(), lineNumber().zeroBasedInt()));
567 SpeculationsPumpSession
session(m_pumpSpeculationsSessionNestingLevel
, contextForParsingSession());
568 while (!m_speculations
.isEmpty()) {
569 ASSERT(!isScheduledForResume());
570 size_t elementTokenCount
= processParsedChunkFromBackgroundParser(m_speculations
.takeFirst());
571 session
.addedElementTokens(elementTokenCount
);
573 // Always check isParsing first as m_document may be null.
574 // Surprisingly, isScheduledForResume() may be set here as a result of
575 // processParsedChunkFromBackgroundParser running arbitrary javascript
576 // which invokes nested event loops. (e.g. inspector breakpoints)
577 if (!isParsing() || isWaitingForScripts() || isScheduledForResume())
580 if (m_speculations
.isEmpty() || m_parserScheduler
->yieldIfNeeded(session
, m_speculations
.first()->startingScript
))
584 TRACE_EVENT_END1("devtools.timeline", "ParseHTML", "endData", InspectorParseHtmlEvent::endData(lineNumber().zeroBasedInt() - 1));
585 TRACE_EVENT_INSTANT1(TRACE_DISABLED_BY_DEFAULT("devtools.timeline"), "UpdateCounters", TRACE_EVENT_SCOPE_THREAD
, "data", InspectorUpdateCountersEvent::data());
588 void HTMLDocumentParser::forcePlaintextForTextDocument()
590 if (shouldUseThreading()) {
591 // This method is called before any data is appended, so we have to start
592 // the background parser ourselves.
593 if (!m_haveBackgroundParser
)
594 startBackgroundParser();
596 HTMLParserThread::shared()->postTask(threadSafeBind(&BackgroundHTMLParser::forcePlaintextForTextDocument
, AllowCrossThreadAccess(m_backgroundParser
)));
598 m_tokenizer
->setState(HTMLTokenizer::PLAINTEXTState
);
601 Document
* HTMLDocumentParser::contextForParsingSession()
603 // The parsing session should interact with the document only when parsing
604 // non-fragments. Otherwise, we might delay the load event mistakenly.
605 if (isParsingFragment())
610 void HTMLDocumentParser::pumpTokenizer()
612 ASSERT(!isStopped());
614 // ASSERT that this object is both attached to the Document and protected.
615 ASSERT(refCount() >= 2);
620 PumpSession
session(m_pumpSessionNestingLevel
, contextForParsingSession());
622 // We tell the InspectorInstrumentation about every pump, even if we
623 // end up pumping nothing. It can filter out empty pumps itself.
624 // FIXME: m_input.current().length() is only accurate if we
625 // end up parsing the whole buffer in this pump. We should pass how
626 // much we parsed as part of didWriteHTML instead of willWriteHTML.
627 TRACE_EVENT_BEGIN1("devtools.timeline", "ParseHTML", "beginData", InspectorParseHtmlEvent::beginData(document(), m_input
.current().currentLine().zeroBasedInt()));
629 if (!isParsingFragment())
630 m_xssAuditor
.init(document(), &m_xssAuditorDelegate
);
632 while (canTakeNextToken()) {
633 if (m_xssAuditor
.isEnabled())
634 m_sourceTracker
.start(m_input
.current(), m_tokenizer
.get(), token());
636 if (!m_tokenizer
->nextToken(m_input
.current(), token()))
639 if (m_xssAuditor
.isEnabled()) {
640 m_sourceTracker
.end(m_input
.current(), m_tokenizer
.get(), token());
642 // We do not XSS filter innerHTML, which means we (intentionally) fail
643 // http/tests/security/xssAuditor/dom-write-innerHTML.html
644 if (OwnPtr
<XSSInfo
> xssInfo
= m_xssAuditor
.filterToken(FilterTokenRequest(token(), m_sourceTracker
, m_tokenizer
->shouldAllowCDATA())))
645 m_xssAuditorDelegate
.didBlockScript(*xssInfo
);
648 constructTreeFromHTMLToken();
649 ASSERT(isStopped() || token().isUninitialized());
653 // Ensure we haven't been totally deref'ed after pumping. Any caller of this
654 // function should be holding a RefPtr to this to ensure we weren't deleted.
655 ASSERT(refCount() >= 1);
661 // There should only be PendingText left since the tree-builder always flushes
662 // the task queue before returning. In case that ever changes, crash.
663 m_treeBuilder
->flush(FlushAlways
);
664 RELEASE_ASSERT(!isStopped());
666 if (isWaitingForScripts()) {
667 ASSERT(m_tokenizer
->state() == HTMLTokenizer::DataState
);
670 // TODO(kouhei): m_preloader should be always available for synchronous parsing case,
671 // adding paranoia if for speculative crash fix for crbug.com/465478
673 if (!m_preloadScanner
) {
674 m_preloadScanner
= adoptPtr(new HTMLPreloadScanner(m_options
,
676 CachedDocumentParameters::create(document())));
677 m_preloadScanner
->appendToEnd(m_input
.current());
679 m_preloadScanner
->scan(m_preloader
.get(), document()->baseElementURL());
683 TRACE_EVENT_END1("devtools.timeline", "ParseHTML", "endData", InspectorParseHtmlEvent::endData(m_input
.current().currentLine().zeroBasedInt() - 1));
686 void HTMLDocumentParser::constructTreeFromHTMLToken()
688 AtomicHTMLToken
atomicToken(token());
690 // We clear the m_token in case constructTreeFromAtomicToken
691 // synchronously re-enters the parser. We don't clear the token immedately
692 // for Character tokens because the AtomicHTMLToken avoids copying the
693 // characters by keeping a pointer to the underlying buffer in the
694 // HTMLToken. Fortunately, Character tokens can't cause us to re-enter
697 // FIXME: Stop clearing the m_token once we start running the parser off
698 // the main thread or once we stop allowing synchronous JavaScript
699 // execution from parseAttribute.
700 if (token().type() != HTMLToken::Character
)
703 m_treeBuilder
->constructTree(&atomicToken
);
705 // FIXME: constructTree may synchronously cause Document to be detached.
709 if (!token().isUninitialized()) {
710 ASSERT(token().type() == HTMLToken::Character
);
715 void HTMLDocumentParser::constructTreeFromCompactHTMLToken(const CompactHTMLToken
& compactToken
)
717 AtomicHTMLToken
token(compactToken
);
718 m_treeBuilder
->constructTree(&token
);
721 bool HTMLDocumentParser::hasInsertionPoint()
723 // FIXME: The wasCreatedByScript() branch here might not be fully correct.
724 // Our model of the EOF character differs slightly from the one in
725 // the spec because our treatment is uniform between network-sourced
726 // and script-sourced input streams whereas the spec treats them
728 return m_input
.hasInsertionPoint() || (wasCreatedByScript() && !m_input
.haveSeenEndOfFile());
731 void HTMLDocumentParser::insert(const SegmentedString
& source
)
736 TRACE_EVENT1("blink", "HTMLDocumentParser::insert", "source_length", source
.length());
738 // pumpTokenizer can cause this parser to be detached from the Document,
739 // but we need to ensure it isn't deleted yet.
740 RefPtrWillBeRawPtr
<HTMLDocumentParser
> protect(this);
743 ASSERT(!inPumpSession());
744 ASSERT(m_haveBackgroundParser
|| wasCreatedByScript());
745 m_token
= adoptPtr(new HTMLToken
);
746 m_tokenizer
= HTMLTokenizer::create(m_options
);
749 SegmentedString
excludedLineNumberSource(source
);
750 excludedLineNumberSource
.setExcludeLineNumbers();
751 m_input
.insertAtCurrentInsertionPoint(excludedLineNumberSource
);
752 pumpTokenizerIfPossible();
754 if (isWaitingForScripts()) {
755 // Check the document.write() output with a separate preload scanner as
756 // the main scanner can't deal with insertions.
757 if (!m_insertionPreloadScanner
) {
758 m_insertionPreloadScanner
= adoptPtr(new HTMLPreloadScanner(m_options
,
760 CachedDocumentParameters::create(document())));
763 m_insertionPreloadScanner
->appendToEnd(source
);
764 m_insertionPreloadScanner
->scan(m_preloader
.get(), document()->baseElementURL());
770 void HTMLDocumentParser::startBackgroundParser()
772 ASSERT(!isStopped());
773 ASSERT(shouldUseThreading());
774 ASSERT(!m_haveBackgroundParser
);
776 m_haveBackgroundParser
= true;
778 RefPtr
<WeakReference
<BackgroundHTMLParser
>> reference
= WeakReference
<BackgroundHTMLParser
>::createUnbound();
779 m_backgroundParser
= WeakPtr
<BackgroundHTMLParser
>(reference
);
781 // FIXME(oysteine): Disabled due to crbug.com/398076 until a full fix can be implemented.
782 if (RuntimeEnabledFeatures::threadedParserDataReceiverEnabled()) {
783 if (DocumentLoader
* loader
= document()->loader())
784 loader
->attachThreadedDataReceiver(ParserDataReceiver::create(m_backgroundParser
, document()->contextDocument().get()));
787 OwnPtr
<BackgroundHTMLParser::Configuration
> config
= adoptPtr(new BackgroundHTMLParser::Configuration
);
788 config
->options
= m_options
;
789 config
->parser
= m_weakFactory
.createWeakPtr();
790 config
->xssAuditor
= adoptPtr(new XSSAuditor
);
791 config
->xssAuditor
->init(document(), &m_xssAuditorDelegate
);
792 config
->preloadScanner
= adoptPtr(new TokenPreloadScanner(document()->url().copy(), CachedDocumentParameters::create(document())));
793 config
->decoder
= takeDecoder();
794 if (document()->settings()) {
795 if (document()->settings()->backgroundHtmlParserOutstandingTokenLimit())
796 config
->outstandingTokenLimit
= document()->settings()->backgroundHtmlParserOutstandingTokenLimit();
797 if (document()->settings()->backgroundHtmlParserPendingTokenLimit())
798 config
->pendingTokenLimit
= document()->settings()->backgroundHtmlParserPendingTokenLimit();
801 ASSERT(config
->xssAuditor
->isSafeToSendToAnotherThread());
802 ASSERT(config
->preloadScanner
->isSafeToSendToAnotherThread());
803 HTMLParserThread::shared()->postTask(threadSafeBind(&BackgroundHTMLParser::start
, reference
.release(), config
.release(),
804 AllowCrossThreadAccess(Platform::current()->currentThread()->scheduler())));
807 void HTMLDocumentParser::stopBackgroundParser()
809 ASSERT(shouldUseThreading());
810 ASSERT(m_haveBackgroundParser
);
811 m_haveBackgroundParser
= false;
813 HTMLParserThread::shared()->postTask(threadSafeBind(&BackgroundHTMLParser::stop
, AllowCrossThreadAccess(m_backgroundParser
)));
814 m_weakFactory
.revokeAll();
817 void HTMLDocumentParser::append(const String
& inputSource
)
822 // We should never reach this point if we're using a parser thread,
823 // as appendBytes() will directly ship the data to the thread.
824 ASSERT(!shouldUseThreading());
826 // pumpTokenizer can cause this parser to be detached from the Document,
827 // but we need to ensure it isn't deleted yet.
828 RefPtrWillBeRawPtr
<HTMLDocumentParser
> protect(this);
829 TRACE_EVENT1(TRACE_DISABLED_BY_DEFAULT("blink.debug"), "HTMLDocumentParser::append", "size", inputSource
.length());
830 const SegmentedString
source(inputSource
);
832 if (m_preloadScanner
) {
833 if (m_input
.current().isEmpty() && !isWaitingForScripts()) {
834 // We have parsed until the end of the current input and so are now moving ahead of the preload scanner.
835 // Clear the scanner so we know to scan starting from the current input point if we block again.
836 m_preloadScanner
.clear();
838 m_preloadScanner
->appendToEnd(source
);
839 if (isWaitingForScripts())
840 m_preloadScanner
->scan(m_preloader
.get(), document()->baseElementURL());
844 m_input
.appendToEnd(source
);
846 if (inPumpSession()) {
847 // We've gotten data off the network in a nested write.
848 // We don't want to consume any more of the input stream now. Do
849 // not worry. We'll consume this data in a less-nested write().
853 pumpTokenizerIfPossible();
858 void HTMLDocumentParser::end()
860 ASSERT(!isDetached());
861 ASSERT(!isScheduledForResume());
863 if (m_haveBackgroundParser
)
864 stopBackgroundParser();
866 // Informs the the rest of WebCore that parsing is really finished (and deletes this).
867 m_treeBuilder
->finished();
869 DocumentParser::stopParsing();
872 void HTMLDocumentParser::attemptToRunDeferredScriptsAndEnd()
874 ASSERT(isStopping());
875 // FIXME: It may not be correct to disable this for the background parser.
876 // That means hasInsertionPoint() may not be correct in some cases.
877 ASSERT(!hasInsertionPoint() || m_haveBackgroundParser
);
878 if (m_scriptRunner
&& !m_scriptRunner
->executeScriptsWaitingForParsing())
883 void HTMLDocumentParser::attemptToEnd()
885 // finish() indicates we will not receive any more data. If we are waiting on
886 // an external script to load, we can't finish parsing quite yet.
888 if (shouldDelayEnd()) {
889 m_endWasDelayed
= true;
892 prepareToStopParsing();
895 void HTMLDocumentParser::endIfDelayed()
897 // If we've already been detached, don't bother ending.
901 if (!m_endWasDelayed
|| shouldDelayEnd())
904 m_endWasDelayed
= false;
905 prepareToStopParsing();
908 void HTMLDocumentParser::finish()
910 // FIXME: We should ASSERT(!m_parserStopped) here, since it does not
911 // makes sense to call any methods on DocumentParser once it's been stopped.
912 // However, FrameLoader::stop calls DocumentParser::finish unconditionally.
914 // flush may ending up executing arbitrary script, and possibly detach the parser.
915 RefPtrWillBeRawPtr
<HTMLDocumentParser
> protect(this);
920 // Empty documents never got an append() call, and thus have never started
921 // a background parser. In those cases, we ignore shouldUseThreading()
922 // and fall through to the non-threading case.
923 if (m_haveBackgroundParser
) {
924 if (!m_input
.haveSeenEndOfFile())
925 m_input
.closeWithoutMarkingEndOfFile();
926 HTMLParserThread::shared()->postTask(threadSafeBind(&BackgroundHTMLParser::finish
, AllowCrossThreadAccess(m_backgroundParser
)));
932 // We're finishing before receiving any data. Rather than booting up
933 // the background parser just to spin it down, we finish parsing
935 m_token
= adoptPtr(new HTMLToken
);
936 m_tokenizer
= HTMLTokenizer::create(m_options
);
939 // We're not going to get any more data off the network, so we tell the
940 // input stream we've reached the end of file. finish() can be called more
941 // than once, if the first time does not call end().
942 if (!m_input
.haveSeenEndOfFile())
943 m_input
.markEndOfFile();
948 bool HTMLDocumentParser::isExecutingScript() const
952 return m_scriptRunner
->isExecutingScript();
955 bool HTMLDocumentParser::isParsingAtLineNumber() const
957 return m_isParsingAtLineNumber
&& ScriptableDocumentParser::isParsingAtLineNumber();
960 OrdinalNumber
HTMLDocumentParser::lineNumber() const
962 if (m_haveBackgroundParser
)
963 return m_textPosition
.m_line
;
965 return m_input
.current().currentLine();
968 TextPosition
HTMLDocumentParser::textPosition() const
970 if (m_haveBackgroundParser
)
971 return m_textPosition
;
973 const SegmentedString
& currentString
= m_input
.current();
974 OrdinalNumber line
= currentString
.currentLine();
975 OrdinalNumber column
= currentString
.currentColumn();
977 return TextPosition(line
, column
);
980 bool HTMLDocumentParser::isWaitingForScripts() const
982 // When the TreeBuilder encounters a </script> tag, it returns to the HTMLDocumentParser
983 // where the script is transfered from the treebuilder to the script runner.
984 // The script runner will hold the script until its loaded and run. During
985 // any of this time, we want to count ourselves as "waiting for a script" and thus
986 // run the preload scanner, as well as delay completion of parsing.
987 bool treeBuilderHasBlockingScript
= m_treeBuilder
->hasParserBlockingScript();
988 bool scriptRunnerHasBlockingScript
= m_scriptRunner
&& m_scriptRunner
->hasParserBlockingScript();
989 // Since the parser is paused while a script runner has a blocking script, it should
990 // never be possible to end up with both objects holding a blocking script.
991 ASSERT(!(treeBuilderHasBlockingScript
&& scriptRunnerHasBlockingScript
));
992 // If either object has a blocking script, the parser should be paused.
993 return treeBuilderHasBlockingScript
|| scriptRunnerHasBlockingScript
;
996 void HTMLDocumentParser::resumeParsingAfterScriptExecution()
998 ASSERT(!isExecutingScript());
999 ASSERT(!isWaitingForScripts());
1001 if (m_haveBackgroundParser
) {
1002 validateSpeculations(m_lastChunkBeforeScript
.release());
1003 ASSERT(!m_lastChunkBeforeScript
);
1004 // processParsedChunkFromBackgroundParser can cause this parser to be detached from the Document,
1005 // but we need to ensure it isn't deleted yet.
1006 RefPtrWillBeRawPtr
<HTMLDocumentParser
> protect(this);
1007 pumpPendingSpeculations();
1011 m_insertionPreloadScanner
.clear();
1012 pumpTokenizerIfPossible();
1016 void HTMLDocumentParser::appendCurrentInputStreamToPreloadScannerAndScan()
1018 ASSERT(m_preloadScanner
);
1019 m_preloadScanner
->appendToEnd(m_input
.current());
1020 m_preloadScanner
->scan(m_preloader
.get(), document()->baseElementURL());
1023 void HTMLDocumentParser::notifyScriptLoaded(Resource
* cachedResource
)
1025 // pumpTokenizer can cause this parser to be detached from the Document,
1026 // but we need to ensure it isn't deleted yet.
1027 RefPtrWillBeRawPtr
<HTMLDocumentParser
> protect(this);
1029 ASSERT(m_scriptRunner
);
1030 ASSERT(!isExecutingScript());
1037 attemptToRunDeferredScriptsAndEnd();
1041 m_scriptRunner
->executeScriptsWaitingForLoad(cachedResource
);
1042 if (!isWaitingForScripts())
1043 resumeParsingAfterScriptExecution();
1046 void HTMLDocumentParser::executeScriptsWaitingForResources()
1048 // Document only calls this when the Document owns the DocumentParser
1049 // so this will not be called in the DocumentFragment case.
1050 ASSERT(m_scriptRunner
);
1051 // Ignore calls unless we have a script blocking the parser waiting on a
1052 // stylesheet load. Otherwise we are currently parsing and this
1053 // is a re-entrant call from encountering a </ style> tag.
1054 if (!m_scriptRunner
->hasScriptsWaitingForResources())
1057 // pumpTokenizer can cause this parser to be detached from the Document,
1058 // but we need to ensure it isn't deleted yet.
1059 RefPtrWillBeRawPtr
<HTMLDocumentParser
> protect(this);
1060 m_scriptRunner
->executeScriptsWaitingForResources();
1061 if (!isWaitingForScripts())
1062 resumeParsingAfterScriptExecution();
1065 void HTMLDocumentParser::parseDocumentFragment(const String
& source
, DocumentFragment
* fragment
, Element
* contextElement
, ParserContentPolicy parserContentPolicy
)
1067 RefPtrWillBeRawPtr
<HTMLDocumentParser
> parser
= HTMLDocumentParser::create(fragment
, contextElement
, parserContentPolicy
);
1068 parser
->append(source
);
1070 ASSERT(!parser
->processingData()); // Make sure we're done. <rdar://problem/3963151>
1071 parser
->detach(); // Allows ~DocumentParser to assert it was detached before destruction.
1074 void HTMLDocumentParser::suspendScheduledTasks()
1076 ASSERT(!m_tasksWereSuspended
);
1077 m_tasksWereSuspended
= true;
1078 if (m_parserScheduler
)
1079 m_parserScheduler
->suspend();
1082 void HTMLDocumentParser::resumeScheduledTasks()
1084 ASSERT(m_tasksWereSuspended
);
1085 m_tasksWereSuspended
= false;
1086 if (m_parserScheduler
)
1087 m_parserScheduler
->resume();
1090 void HTMLDocumentParser::appendBytes(const char* data
, size_t length
)
1092 if (!length
|| isStopped())
1095 if (shouldUseThreading()) {
1096 if (!m_haveBackgroundParser
)
1097 startBackgroundParser();
1099 OwnPtr
<Vector
<char>> buffer
= adoptPtr(new Vector
<char>(length
));
1100 memcpy(buffer
->data(), data
, length
);
1101 TRACE_EVENT1(TRACE_DISABLED_BY_DEFAULT("blink.debug"), "HTMLDocumentParser::appendBytes", "size", (unsigned)length
);
1103 HTMLParserThread::shared()->postTask(threadSafeBind(&BackgroundHTMLParser::appendRawBytesFromMainThread
, AllowCrossThreadAccess(m_backgroundParser
), buffer
.release()));
1107 DecodedDataDocumentParser::appendBytes(data
, length
);
1110 void HTMLDocumentParser::flush()
1112 // If we've got no decoder, we never received any data.
1113 if (isDetached() || needsDecoder())
1116 if (m_haveBackgroundParser
)
1117 HTMLParserThread::shared()->postTask(threadSafeBind(&BackgroundHTMLParser::flush
, AllowCrossThreadAccess(m_backgroundParser
)));
1119 DecodedDataDocumentParser::flush();
1122 void HTMLDocumentParser::setDecoder(PassOwnPtr
<TextResourceDecoder
> decoder
)
1125 DecodedDataDocumentParser::setDecoder(decoder
);
1127 if (m_haveBackgroundParser
)
1128 HTMLParserThread::shared()->postTask(threadSafeBind(&BackgroundHTMLParser::setDecoder
, AllowCrossThreadAccess(m_backgroundParser
), takeDecoder()));