Move parseFontFaceDescriptor to CSSPropertyParser.cpp
[chromium-blink-merge.git] / third_party / WebKit / Source / core / html / parser / HTMLDocumentParser.cpp
blob868e71e8c160c3c41f126250859126989ad402f7
1 /*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "config.h"
27 #include "core/html/parser/HTMLDocumentParser.h"
29 #include "core/HTMLNames.h"
30 #include "core/css/MediaValuesCached.h"
31 #include "core/dom/DocumentFragment.h"
32 #include "core/dom/DocumentLifecycleObserver.h"
33 #include "core/dom/Element.h"
34 #include "core/frame/LocalFrame.h"
35 #include "core/frame/Settings.h"
36 #include "core/html/HTMLDocument.h"
37 #include "core/html/parser/AtomicHTMLToken.h"
38 #include "core/html/parser/BackgroundHTMLParser.h"
39 #include "core/html/parser/HTMLParserScheduler.h"
40 #include "core/html/parser/HTMLParserThread.h"
41 #include "core/html/parser/HTMLScriptRunner.h"
42 #include "core/html/parser/HTMLTreeBuilder.h"
43 #include "core/inspector/InspectorInstrumentation.h"
44 #include "core/inspector/InspectorTraceEvents.h"
45 #include "core/loader/DocumentLoader.h"
46 #include "core/loader/NavigationScheduler.h"
47 #include "platform/SharedBuffer.h"
48 #include "platform/ThreadSafeFunctional.h"
49 #include "platform/ThreadedDataReceiver.h"
50 #include "platform/TraceEvent.h"
51 #include "platform/heap/Handle.h"
52 #include "public/platform/Platform.h"
53 #include "public/platform/WebScheduler.h"
54 #include "public/platform/WebThread.h"
55 #include "wtf/RefCounted.h"
56 #include "wtf/TemporaryChange.h"
58 namespace blink {
60 using namespace HTMLNames;
62 // This is a direct transcription of step 4 from:
63 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
64 static HTMLTokenizer::State tokenizerStateForContextElement(Element* contextElement, bool reportErrors, const HTMLParserOptions& options)
66 if (!contextElement)
67 return HTMLTokenizer::DataState;
69 const QualifiedName& contextTag = contextElement->tagQName();
71 if (contextTag.matches(titleTag) || contextTag.matches(textareaTag))
72 return HTMLTokenizer::RCDATAState;
73 if (contextTag.matches(styleTag)
74 || contextTag.matches(xmpTag)
75 || contextTag.matches(iframeTag)
76 || (contextTag.matches(noembedTag) && options.pluginsEnabled)
77 || (contextTag.matches(noscriptTag) && options.scriptEnabled)
78 || contextTag.matches(noframesTag))
79 return reportErrors ? HTMLTokenizer::RAWTEXTState : HTMLTokenizer::PLAINTEXTState;
80 if (contextTag.matches(scriptTag))
81 return reportErrors ? HTMLTokenizer::ScriptDataState : HTMLTokenizer::PLAINTEXTState;
82 if (contextTag.matches(plaintextTag))
83 return HTMLTokenizer::PLAINTEXTState;
84 return HTMLTokenizer::DataState;
87 class ParserDataReceiver final : public RefCountedWillBeGarbageCollectedFinalized<ParserDataReceiver>, public ThreadedDataReceiver, public DocumentLifecycleObserver {
88 WILL_BE_USING_GARBAGE_COLLECTED_MIXIN(ParserDataReceiver);
89 public:
90 static PassRefPtrWillBeRawPtr<ParserDataReceiver> create(WeakPtr<BackgroundHTMLParser> backgroundParser, Document* document)
92 return adoptRefWillBeNoop(new ParserDataReceiver(backgroundParser, document));
95 #if !ENABLE(OILPAN)
96 void ref() override { RefCounted<ParserDataReceiver>::ref(); }
97 void deref() override { RefCounted<ParserDataReceiver>::deref(); }
98 #endif
100 // ThreadedDataReceiver
101 void acceptData(const char* data, int dataLength) override
103 ASSERT(backgroundThread() && backgroundThread()->isCurrentThread());
104 if (m_backgroundParser.get())
105 m_backgroundParser.get()->appendRawBytesFromParserThread(data, dataLength);
108 WebThread* backgroundThread() override
110 if (HTMLParserThread::shared())
111 return &HTMLParserThread::shared()->platformThread();
113 return nullptr;
116 bool needsMainthreadDataCopy() override { return InspectorInstrumentation::hasFrontends(); }
117 void acceptMainthreadDataNotification(const char* data, int dataLength, int encodedDataLength) override
119 ASSERT(!data || needsMainthreadDataCopy());
120 if (lifecycleContext())
121 lifecycleContext()->loader()->acceptDataFromThreadedReceiver(data, dataLength, encodedDataLength);
124 DEFINE_INLINE_VIRTUAL_TRACE()
126 DocumentLifecycleObserver::trace(visitor);
129 private:
130 ParserDataReceiver(WeakPtr<BackgroundHTMLParser> backgroundParser, Document* document)
131 : DocumentLifecycleObserver(document)
132 , m_backgroundParser(backgroundParser)
136 WeakPtr<BackgroundHTMLParser> m_backgroundParser;
139 HTMLDocumentParser::HTMLDocumentParser(HTMLDocument& document, bool reportErrors, ParserSynchronizationPolicy syncPolicy)
140 : ScriptableDocumentParser(document)
141 , m_options(&document)
142 , m_token(syncPolicy == ForceSynchronousParsing ? adoptPtr(new HTMLToken) : nullptr)
143 , m_tokenizer(syncPolicy == ForceSynchronousParsing ? HTMLTokenizer::create(m_options) : nullptr)
144 , m_scriptRunner(HTMLScriptRunner::create(&document, this))
145 , m_treeBuilder(HTMLTreeBuilder::create(this, &document, parserContentPolicy(), reportErrors, m_options))
146 , m_parserScheduler(HTMLParserScheduler::create(this))
147 , m_xssAuditorDelegate(&document)
148 , m_weakFactory(this)
149 , m_preloader(HTMLResourcePreloader::create(document))
150 , m_shouldUseThreading(syncPolicy == AllowAsynchronousParsing)
151 , m_endWasDelayed(false)
152 , m_haveBackgroundParser(false)
153 , m_tasksWereSuspended(false)
154 , m_pumpSessionNestingLevel(0)
155 , m_pumpSpeculationsSessionNestingLevel(0)
156 , m_isParsingAtLineNumber(false)
158 ASSERT(shouldUseThreading() || (m_token && m_tokenizer));
161 // FIXME: Member variables should be grouped into self-initializing structs to
162 // minimize code duplication between these constructors.
163 HTMLDocumentParser::HTMLDocumentParser(DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy)
164 : ScriptableDocumentParser(fragment->document(), parserContentPolicy)
165 , m_options(&fragment->document())
166 , m_token(adoptPtr(new HTMLToken))
167 , m_tokenizer(HTMLTokenizer::create(m_options))
168 , m_treeBuilder(HTMLTreeBuilder::create(this, fragment, contextElement, this->parserContentPolicy(), m_options))
169 , m_xssAuditorDelegate(&fragment->document())
170 , m_weakFactory(this)
171 , m_shouldUseThreading(false)
172 , m_endWasDelayed(false)
173 , m_haveBackgroundParser(false)
174 , m_tasksWereSuspended(false)
175 , m_pumpSessionNestingLevel(0)
176 , m_pumpSpeculationsSessionNestingLevel(0)
178 bool reportErrors = false; // For now document fragment parsing never reports errors.
179 m_tokenizer->setState(tokenizerStateForContextElement(contextElement, reportErrors, m_options));
180 m_xssAuditor.initForFragment();
183 HTMLDocumentParser::~HTMLDocumentParser()
185 #if ENABLE(OILPAN)
186 if (m_haveBackgroundParser)
187 stopBackgroundParser();
188 // In Oilpan, HTMLDocumentParser can die together with Document, and
189 // detach() is not called in this case.
190 #else
191 ASSERT(!m_parserScheduler);
192 ASSERT(!m_pumpSessionNestingLevel);
193 ASSERT(!m_preloadScanner);
194 ASSERT(!m_insertionPreloadScanner);
195 ASSERT(!m_haveBackgroundParser);
196 // FIXME: We should be able to ASSERT(m_speculations.isEmpty()),
197 // but there are cases where that's not true currently. For example,
198 // we we're told to stop parsing before we've consumed all the input.
199 #endif
202 DEFINE_TRACE(HTMLDocumentParser)
204 visitor->trace(m_treeBuilder);
205 visitor->trace(m_xssAuditorDelegate);
206 visitor->trace(m_scriptRunner);
207 visitor->trace(m_preloader);
208 ScriptableDocumentParser::trace(visitor);
209 HTMLScriptRunnerHost::trace(visitor);
212 void HTMLDocumentParser::detach()
214 if (m_haveBackgroundParser)
215 stopBackgroundParser();
216 DocumentParser::detach();
217 if (m_scriptRunner)
218 m_scriptRunner->detach();
219 m_treeBuilder->detach();
220 // FIXME: It seems wrong that we would have a preload scanner here.
221 // Yet during fast/dom/HTMLScriptElement/script-load-events.html we do.
222 m_preloadScanner.clear();
223 m_insertionPreloadScanner.clear();
224 if (m_parserScheduler) {
225 m_parserScheduler->detach();
226 m_parserScheduler.clear();
228 // Oilpan: It is important to clear m_token to deallocate backing memory of
229 // HTMLToken::m_data and let the allocator reuse the memory for
230 // HTMLToken::m_data of a next HTMLDocumentParser. We need to clear
231 // m_tokenizer first because m_tokenizer has a raw pointer to m_token.
232 m_tokenizer.clear();
233 m_token.clear();
236 void HTMLDocumentParser::stopParsing()
238 DocumentParser::stopParsing();
239 if (m_parserScheduler) {
240 m_parserScheduler->detach();
241 m_parserScheduler.clear();
243 if (m_haveBackgroundParser)
244 stopBackgroundParser();
247 // This kicks off "Once the user agent stops parsing" as described by:
248 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#the-end
249 void HTMLDocumentParser::prepareToStopParsing()
251 // FIXME: It may not be correct to disable this for the background parser.
252 // That means hasInsertionPoint() may not be correct in some cases.
253 ASSERT(!hasInsertionPoint() || m_haveBackgroundParser);
255 // pumpTokenizer can cause this parser to be detached from the Document,
256 // but we need to ensure it isn't deleted yet.
257 RefPtrWillBeRawPtr<HTMLDocumentParser> protect(this);
259 // NOTE: This pump should only ever emit buffered character tokens.
260 if (m_tokenizer) {
261 ASSERT(!m_haveBackgroundParser);
262 pumpTokenizerIfPossible();
265 if (isStopped())
266 return;
268 DocumentParser::prepareToStopParsing();
270 // We will not have a scriptRunner when parsing a DocumentFragment.
271 if (m_scriptRunner)
272 document()->setReadyState(Document::Interactive);
274 // Setting the ready state above can fire mutation event and detach us
275 // from underneath. In that case, just bail out.
276 if (isDetached())
277 return;
279 attemptToRunDeferredScriptsAndEnd();
282 bool HTMLDocumentParser::isParsingFragment() const
284 return m_treeBuilder->isParsingFragment();
287 bool HTMLDocumentParser::processingData() const
289 return isScheduledForResume() || inPumpSession() || m_haveBackgroundParser;
292 void HTMLDocumentParser::pumpTokenizerIfPossible()
294 if (isStopped() || isWaitingForScripts())
295 return;
297 pumpTokenizer();
300 bool HTMLDocumentParser::isScheduledForResume() const
302 return m_parserScheduler && m_parserScheduler->isScheduledForResume();
305 // Used by HTMLParserScheduler
306 void HTMLDocumentParser::resumeParsingAfterYield()
308 ASSERT(shouldUseThreading());
309 ASSERT(m_haveBackgroundParser);
311 // pumpPendingSpeculations can cause this parser to be detached from the Document,
312 // but we need to ensure it isn't deleted yet.
313 RefPtrWillBeRawPtr<HTMLDocumentParser> protect(this);
314 pumpPendingSpeculations();
317 void HTMLDocumentParser::runScriptsForPausedTreeBuilder()
319 ASSERT(scriptingContentIsAllowed(parserContentPolicy()));
321 TextPosition scriptStartPosition = TextPosition::belowRangePosition();
322 RefPtrWillBeRawPtr<Element> scriptElement = m_treeBuilder->takeScriptToProcess(scriptStartPosition);
323 // We will not have a scriptRunner when parsing a DocumentFragment.
324 if (m_scriptRunner)
325 m_scriptRunner->execute(scriptElement.release(), scriptStartPosition);
328 bool HTMLDocumentParser::canTakeNextToken()
330 if (isStopped())
331 return false;
333 if (isWaitingForScripts()) {
334 // If we're paused waiting for a script, we try to execute scripts before continuing.
335 runScriptsForPausedTreeBuilder();
336 if (isStopped())
337 return false;
338 if (isWaitingForScripts())
339 return false;
342 // FIXME: It's wrong for the HTMLDocumentParser to reach back to the
343 // LocalFrame, but this approach is how the old parser handled
344 // stopping when the page assigns window.location. What really
345 // should happen is that assigning window.location causes the
346 // parser to stop parsing cleanly. The problem is we're not
347 // perpared to do that at every point where we run JavaScript.
348 if (!isParsingFragment()
349 && document()->frame() && document()->frame()->navigationScheduler().locationChangePending())
350 return false;
352 return true;
355 void HTMLDocumentParser::didReceiveParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk> chunk)
357 TRACE_EVENT0("blink", "HTMLDocumentParser::didReceiveParsedChunkFromBackgroundParser");
359 if (!isParsing())
360 return;
362 // ApplicationCache needs to be initialized before issuing preloads.
363 // We suspend preload until HTMLHTMLElement is inserted and
364 // ApplicationCache is initialized.
365 if (!document()->documentElement()) {
366 for (auto& request : chunk->preloads)
367 m_queuedPreloads.append(request.release());
368 } else {
369 // We can safely assume that there are no queued preloads request after
370 // the document element is available, as we empty the queue immediately
371 // after the document element is created in pumpPendingSpeculations().
372 ASSERT(m_queuedPreloads.isEmpty());
373 m_preloader->takeAndPreload(chunk->preloads);
376 m_speculations.append(chunk);
378 if (!isWaitingForScripts() && !isScheduledForResume()) {
379 if (m_tasksWereSuspended)
380 m_parserScheduler->forceResumeAfterYield();
381 else
382 m_parserScheduler->scheduleForResume();
386 void HTMLDocumentParser::didReceiveEncodingDataFromBackgroundParser(const DocumentEncodingData& data)
388 document()->setEncodingData(data);
391 void HTMLDocumentParser::validateSpeculations(PassOwnPtr<ParsedChunk> chunk)
393 ASSERT(chunk);
394 if (isWaitingForScripts()) {
395 // We're waiting on a network script, just save the chunk, we'll get
396 // a second validateSpeculations call after the script completes.
397 // This call should have been made immediately after runScriptsForPausedTreeBuilder
398 // which may have started a network load and left us waiting.
399 ASSERT(!m_lastChunkBeforeScript);
400 m_lastChunkBeforeScript = chunk;
401 return;
404 ASSERT(!m_lastChunkBeforeScript);
405 OwnPtr<HTMLTokenizer> tokenizer = m_tokenizer.release();
406 OwnPtr<HTMLToken> token = m_token.release();
408 if (!tokenizer) {
409 // There must not have been any changes to the HTMLTokenizer state on
410 // the main thread, which means the speculation buffer is correct.
411 return;
414 // Currently we're only smart enough to reuse the speculation buffer if the tokenizer
415 // both starts and ends in the DataState. That state is simplest because the HTMLToken
416 // is always in the Uninitialized state. We should consider whether we can reuse the
417 // speculation buffer in other states, but we'd likely need to do something more
418 // sophisticated with the HTMLToken.
419 if (chunk->tokenizerState == HTMLTokenizer::DataState
420 && tokenizer->state() == HTMLTokenizer::DataState
421 && m_input.current().isEmpty()
422 && chunk->treeBuilderState == HTMLTreeBuilderSimulator::stateFor(m_treeBuilder.get())) {
423 ASSERT(token->isUninitialized());
424 return;
427 discardSpeculationsAndResumeFrom(chunk, token.release(), tokenizer.release());
430 void HTMLDocumentParser::discardSpeculationsAndResumeFrom(PassOwnPtr<ParsedChunk> lastChunkBeforeScript, PassOwnPtr<HTMLToken> token, PassOwnPtr<HTMLTokenizer> tokenizer)
432 m_weakFactory.revokeAll();
433 m_speculations.clear();
435 OwnPtr<BackgroundHTMLParser::Checkpoint> checkpoint = adoptPtr(new BackgroundHTMLParser::Checkpoint);
436 checkpoint->parser = m_weakFactory.createWeakPtr();
437 checkpoint->token = token;
438 checkpoint->tokenizer = tokenizer;
439 checkpoint->treeBuilderState = HTMLTreeBuilderSimulator::stateFor(m_treeBuilder.get());
440 checkpoint->inputCheckpoint = lastChunkBeforeScript->inputCheckpoint;
441 checkpoint->preloadScannerCheckpoint = lastChunkBeforeScript->preloadScannerCheckpoint;
442 checkpoint->unparsedInput = m_input.current().toString().isolatedCopy();
443 m_input.current().clear(); // FIXME: This should be passed in instead of cleared.
445 ASSERT(checkpoint->unparsedInput.isSafeToSendToAnotherThread());
446 HTMLParserThread::shared()->postTask(threadSafeBind(&BackgroundHTMLParser::resumeFrom, AllowCrossThreadAccess(m_backgroundParser), checkpoint.release()));
449 size_t HTMLDocumentParser::processParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk> popChunk)
451 TRACE_EVENT0("blink", "HTMLDocumentParser::processParsedChunkFromBackgroundParser");
452 TemporaryChange<bool> hasLineNumber(m_isParsingAtLineNumber, true);
454 ASSERT_WITH_SECURITY_IMPLICATION(document()->activeParserCount() == 1);
455 ASSERT(!isParsingFragment());
456 ASSERT(!isWaitingForScripts());
457 ASSERT(!isStopped());
458 #if !ENABLE(OILPAN)
459 // ASSERT that this object is both attached to the Document and protected.
460 ASSERT(refCount() >= 2);
461 #endif
462 ASSERT(shouldUseThreading());
463 ASSERT(!m_tokenizer);
464 ASSERT(!m_token);
465 ASSERT(!m_lastChunkBeforeScript);
467 OwnPtr<ParsedChunk> chunk(popChunk);
468 OwnPtr<CompactHTMLTokenStream> tokens = chunk->tokens.release();
469 size_t elementTokenCount = 0;
471 HTMLParserThread::shared()->postTask(threadSafeBind(&BackgroundHTMLParser::startedChunkWithCheckpoint, AllowCrossThreadAccess(m_backgroundParser), chunk->inputCheckpoint));
473 for (const auto& xssInfo : chunk->xssInfos) {
474 m_textPosition = xssInfo->m_textPosition;
475 m_xssAuditorDelegate.didBlockScript(*xssInfo);
476 if (isStopped())
477 break;
480 for (Vector<CompactHTMLToken>::const_iterator it = tokens->begin(); it != tokens->end(); ++it) {
481 ASSERT(!isWaitingForScripts());
483 if (!chunk->startingScript && (it->type() == HTMLToken::StartTag || it->type() == HTMLToken::EndTag))
484 elementTokenCount++;
486 if (document()->frame() && document()->frame()->navigationScheduler().locationChangePending()) {
488 // To match main-thread parser behavior (which never checks locationChangePending on the EOF path)
489 // we peek to see if this chunk has an EOF and process it anyway.
490 if (tokens->last().type() == HTMLToken::EndOfFile) {
491 ASSERT(m_speculations.isEmpty()); // There should never be any chunks after the EOF.
492 prepareToStopParsing();
494 break;
497 m_textPosition = it->textPosition();
499 constructTreeFromCompactHTMLToken(*it);
501 if (isStopped())
502 break;
504 if (!m_queuedPreloads.isEmpty() && document()->documentElement())
505 m_preloader->takeAndPreload(m_queuedPreloads);
507 if (isWaitingForScripts()) {
508 ASSERT(it + 1 == tokens->end()); // The </script> is assumed to be the last token of this bunch.
509 runScriptsForPausedTreeBuilder();
510 validateSpeculations(chunk.release());
511 break;
514 if (it->type() == HTMLToken::EndOfFile) {
515 ASSERT(it + 1 == tokens->end()); // The EOF is assumed to be the last token of this bunch.
516 ASSERT(m_speculations.isEmpty()); // There should never be any chunks after the EOF.
517 prepareToStopParsing();
518 break;
521 ASSERT(!m_tokenizer);
522 ASSERT(!m_token);
525 // Make sure all required pending text nodes are emitted before returning.
526 // This leaves "script", "style" and "svg" nodes text nodes intact.
527 if (!isStopped())
528 m_treeBuilder->flush(FlushIfAtTextLimit);
530 m_isParsingAtLineNumber = false;
532 return elementTokenCount;
535 void HTMLDocumentParser::pumpPendingSpeculations()
537 #if !ENABLE(OILPAN)
538 // ASSERT that this object is both attached to the Document and protected.
539 ASSERT(refCount() >= 2);
540 #endif
541 // If this assert fails, you need to call validateSpeculations to make sure
542 // m_tokenizer and m_token don't have state that invalidates m_speculations.
543 ASSERT(!m_tokenizer);
544 ASSERT(!m_token);
545 ASSERT(!m_lastChunkBeforeScript);
546 ASSERT(!isWaitingForScripts());
547 ASSERT(!isStopped());
548 ASSERT(!isScheduledForResume());
549 ASSERT(!inPumpSession());
551 // FIXME: Here should never be reached when there is a blocking script,
552 // but it happens in unknown scenarios. See https://crbug.com/440901
553 if (isWaitingForScripts()) {
554 m_parserScheduler->scheduleForResume();
555 return;
558 // Do not allow pumping speculations in nested event loops.
559 if (m_pumpSpeculationsSessionNestingLevel) {
560 m_parserScheduler->scheduleForResume();
561 return;
564 // FIXME: Pass in current input length.
565 TRACE_EVENT_BEGIN1("devtools.timeline", "ParseHTML", "beginData", InspectorParseHtmlEvent::beginData(document(), lineNumber().zeroBasedInt()));
567 SpeculationsPumpSession session(m_pumpSpeculationsSessionNestingLevel, contextForParsingSession());
568 while (!m_speculations.isEmpty()) {
569 ASSERT(!isScheduledForResume());
570 size_t elementTokenCount = processParsedChunkFromBackgroundParser(m_speculations.takeFirst());
571 session.addedElementTokens(elementTokenCount);
573 // Always check isParsing first as m_document may be null.
574 // Surprisingly, isScheduledForResume() may be set here as a result of
575 // processParsedChunkFromBackgroundParser running arbitrary javascript
576 // which invokes nested event loops. (e.g. inspector breakpoints)
577 if (!isParsing() || isWaitingForScripts() || isScheduledForResume())
578 break;
580 if (m_speculations.isEmpty() || m_parserScheduler->yieldIfNeeded(session, m_speculations.first()->startingScript))
581 break;
584 TRACE_EVENT_END1("devtools.timeline", "ParseHTML", "endData", InspectorParseHtmlEvent::endData(lineNumber().zeroBasedInt() - 1));
585 TRACE_EVENT_INSTANT1(TRACE_DISABLED_BY_DEFAULT("devtools.timeline"), "UpdateCounters", TRACE_EVENT_SCOPE_THREAD, "data", InspectorUpdateCountersEvent::data());
588 void HTMLDocumentParser::forcePlaintextForTextDocument()
590 if (shouldUseThreading()) {
591 // This method is called before any data is appended, so we have to start
592 // the background parser ourselves.
593 if (!m_haveBackgroundParser)
594 startBackgroundParser();
596 HTMLParserThread::shared()->postTask(threadSafeBind(&BackgroundHTMLParser::forcePlaintextForTextDocument, AllowCrossThreadAccess(m_backgroundParser)));
597 } else
598 m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState);
601 Document* HTMLDocumentParser::contextForParsingSession()
603 // The parsing session should interact with the document only when parsing
604 // non-fragments. Otherwise, we might delay the load event mistakenly.
605 if (isParsingFragment())
606 return nullptr;
607 return document();
610 void HTMLDocumentParser::pumpTokenizer()
612 ASSERT(!isStopped());
613 #if !ENABLE(OILPAN)
614 // ASSERT that this object is both attached to the Document and protected.
615 ASSERT(refCount() >= 2);
616 #endif
617 ASSERT(m_tokenizer);
618 ASSERT(m_token);
620 PumpSession session(m_pumpSessionNestingLevel, contextForParsingSession());
622 // We tell the InspectorInstrumentation about every pump, even if we
623 // end up pumping nothing. It can filter out empty pumps itself.
624 // FIXME: m_input.current().length() is only accurate if we
625 // end up parsing the whole buffer in this pump. We should pass how
626 // much we parsed as part of didWriteHTML instead of willWriteHTML.
627 TRACE_EVENT_BEGIN1("devtools.timeline", "ParseHTML", "beginData", InspectorParseHtmlEvent::beginData(document(), m_input.current().currentLine().zeroBasedInt()));
629 if (!isParsingFragment())
630 m_xssAuditor.init(document(), &m_xssAuditorDelegate);
632 while (canTakeNextToken()) {
633 if (m_xssAuditor.isEnabled())
634 m_sourceTracker.start(m_input.current(), m_tokenizer.get(), token());
636 if (!m_tokenizer->nextToken(m_input.current(), token()))
637 break;
639 if (m_xssAuditor.isEnabled()) {
640 m_sourceTracker.end(m_input.current(), m_tokenizer.get(), token());
642 // We do not XSS filter innerHTML, which means we (intentionally) fail
643 // http/tests/security/xssAuditor/dom-write-innerHTML.html
644 if (OwnPtr<XSSInfo> xssInfo = m_xssAuditor.filterToken(FilterTokenRequest(token(), m_sourceTracker, m_tokenizer->shouldAllowCDATA())))
645 m_xssAuditorDelegate.didBlockScript(*xssInfo);
648 constructTreeFromHTMLToken();
649 ASSERT(isStopped() || token().isUninitialized());
652 #if !ENABLE(OILPAN)
653 // Ensure we haven't been totally deref'ed after pumping. Any caller of this
654 // function should be holding a RefPtr to this to ensure we weren't deleted.
655 ASSERT(refCount() >= 1);
656 #endif
658 if (isStopped())
659 return;
661 // There should only be PendingText left since the tree-builder always flushes
662 // the task queue before returning. In case that ever changes, crash.
663 m_treeBuilder->flush(FlushAlways);
664 RELEASE_ASSERT(!isStopped());
666 if (isWaitingForScripts()) {
667 ASSERT(m_tokenizer->state() == HTMLTokenizer::DataState);
669 ASSERT(m_preloader);
670 // TODO(kouhei): m_preloader should be always available for synchronous parsing case,
671 // adding paranoia if for speculative crash fix for crbug.com/465478
672 if (m_preloader) {
673 if (!m_preloadScanner) {
674 m_preloadScanner = adoptPtr(new HTMLPreloadScanner(m_options,
675 document()->url(),
676 CachedDocumentParameters::create(document())));
677 m_preloadScanner->appendToEnd(m_input.current());
679 m_preloadScanner->scan(m_preloader.get(), document()->baseElementURL());
683 TRACE_EVENT_END1("devtools.timeline", "ParseHTML", "endData", InspectorParseHtmlEvent::endData(m_input.current().currentLine().zeroBasedInt() - 1));
686 void HTMLDocumentParser::constructTreeFromHTMLToken()
688 AtomicHTMLToken atomicToken(token());
690 // We clear the m_token in case constructTreeFromAtomicToken
691 // synchronously re-enters the parser. We don't clear the token immedately
692 // for Character tokens because the AtomicHTMLToken avoids copying the
693 // characters by keeping a pointer to the underlying buffer in the
694 // HTMLToken. Fortunately, Character tokens can't cause us to re-enter
695 // the parser.
697 // FIXME: Stop clearing the m_token once we start running the parser off
698 // the main thread or once we stop allowing synchronous JavaScript
699 // execution from parseAttribute.
700 if (token().type() != HTMLToken::Character)
701 token().clear();
703 m_treeBuilder->constructTree(&atomicToken);
705 // FIXME: constructTree may synchronously cause Document to be detached.
706 if (!m_token)
707 return;
709 if (!token().isUninitialized()) {
710 ASSERT(token().type() == HTMLToken::Character);
711 token().clear();
715 void HTMLDocumentParser::constructTreeFromCompactHTMLToken(const CompactHTMLToken& compactToken)
717 AtomicHTMLToken token(compactToken);
718 m_treeBuilder->constructTree(&token);
721 bool HTMLDocumentParser::hasInsertionPoint()
723 // FIXME: The wasCreatedByScript() branch here might not be fully correct.
724 // Our model of the EOF character differs slightly from the one in
725 // the spec because our treatment is uniform between network-sourced
726 // and script-sourced input streams whereas the spec treats them
727 // differently.
728 return m_input.hasInsertionPoint() || (wasCreatedByScript() && !m_input.haveSeenEndOfFile());
731 void HTMLDocumentParser::insert(const SegmentedString& source)
733 if (isStopped())
734 return;
736 TRACE_EVENT1("blink", "HTMLDocumentParser::insert", "source_length", source.length());
738 // pumpTokenizer can cause this parser to be detached from the Document,
739 // but we need to ensure it isn't deleted yet.
740 RefPtrWillBeRawPtr<HTMLDocumentParser> protect(this);
742 if (!m_tokenizer) {
743 ASSERT(!inPumpSession());
744 ASSERT(m_haveBackgroundParser || wasCreatedByScript());
745 m_token = adoptPtr(new HTMLToken);
746 m_tokenizer = HTMLTokenizer::create(m_options);
749 SegmentedString excludedLineNumberSource(source);
750 excludedLineNumberSource.setExcludeLineNumbers();
751 m_input.insertAtCurrentInsertionPoint(excludedLineNumberSource);
752 pumpTokenizerIfPossible();
754 if (isWaitingForScripts()) {
755 // Check the document.write() output with a separate preload scanner as
756 // the main scanner can't deal with insertions.
757 if (!m_insertionPreloadScanner) {
758 m_insertionPreloadScanner = adoptPtr(new HTMLPreloadScanner(m_options,
759 document()->url(),
760 CachedDocumentParameters::create(document())));
763 m_insertionPreloadScanner->appendToEnd(source);
764 m_insertionPreloadScanner->scan(m_preloader.get(), document()->baseElementURL());
767 endIfDelayed();
770 void HTMLDocumentParser::startBackgroundParser()
772 ASSERT(!isStopped());
773 ASSERT(shouldUseThreading());
774 ASSERT(!m_haveBackgroundParser);
775 ASSERT(document());
776 m_haveBackgroundParser = true;
778 RefPtr<WeakReference<BackgroundHTMLParser>> reference = WeakReference<BackgroundHTMLParser>::createUnbound();
779 m_backgroundParser = WeakPtr<BackgroundHTMLParser>(reference);
781 // FIXME(oysteine): Disabled due to crbug.com/398076 until a full fix can be implemented.
782 if (RuntimeEnabledFeatures::threadedParserDataReceiverEnabled()) {
783 if (DocumentLoader* loader = document()->loader())
784 loader->attachThreadedDataReceiver(ParserDataReceiver::create(m_backgroundParser, document()->contextDocument().get()));
787 OwnPtr<BackgroundHTMLParser::Configuration> config = adoptPtr(new BackgroundHTMLParser::Configuration);
788 config->options = m_options;
789 config->parser = m_weakFactory.createWeakPtr();
790 config->xssAuditor = adoptPtr(new XSSAuditor);
791 config->xssAuditor->init(document(), &m_xssAuditorDelegate);
792 config->preloadScanner = adoptPtr(new TokenPreloadScanner(document()->url().copy(), CachedDocumentParameters::create(document())));
793 config->decoder = takeDecoder();
794 if (document()->settings()) {
795 if (document()->settings()->backgroundHtmlParserOutstandingTokenLimit())
796 config->outstandingTokenLimit = document()->settings()->backgroundHtmlParserOutstandingTokenLimit();
797 if (document()->settings()->backgroundHtmlParserPendingTokenLimit())
798 config->pendingTokenLimit = document()->settings()->backgroundHtmlParserPendingTokenLimit();
801 ASSERT(config->xssAuditor->isSafeToSendToAnotherThread());
802 ASSERT(config->preloadScanner->isSafeToSendToAnotherThread());
803 HTMLParserThread::shared()->postTask(threadSafeBind(&BackgroundHTMLParser::start, reference.release(), config.release(),
804 AllowCrossThreadAccess(Platform::current()->currentThread()->scheduler())));
807 void HTMLDocumentParser::stopBackgroundParser()
809 ASSERT(shouldUseThreading());
810 ASSERT(m_haveBackgroundParser);
811 m_haveBackgroundParser = false;
813 HTMLParserThread::shared()->postTask(threadSafeBind(&BackgroundHTMLParser::stop, AllowCrossThreadAccess(m_backgroundParser)));
814 m_weakFactory.revokeAll();
817 void HTMLDocumentParser::append(const String& inputSource)
819 if (isStopped())
820 return;
822 // We should never reach this point if we're using a parser thread,
823 // as appendBytes() will directly ship the data to the thread.
824 ASSERT(!shouldUseThreading());
826 // pumpTokenizer can cause this parser to be detached from the Document,
827 // but we need to ensure it isn't deleted yet.
828 RefPtrWillBeRawPtr<HTMLDocumentParser> protect(this);
829 TRACE_EVENT1(TRACE_DISABLED_BY_DEFAULT("blink.debug"), "HTMLDocumentParser::append", "size", inputSource.length());
830 const SegmentedString source(inputSource);
832 if (m_preloadScanner) {
833 if (m_input.current().isEmpty() && !isWaitingForScripts()) {
834 // We have parsed until the end of the current input and so are now moving ahead of the preload scanner.
835 // Clear the scanner so we know to scan starting from the current input point if we block again.
836 m_preloadScanner.clear();
837 } else {
838 m_preloadScanner->appendToEnd(source);
839 if (isWaitingForScripts())
840 m_preloadScanner->scan(m_preloader.get(), document()->baseElementURL());
844 m_input.appendToEnd(source);
846 if (inPumpSession()) {
847 // We've gotten data off the network in a nested write.
848 // We don't want to consume any more of the input stream now. Do
849 // not worry. We'll consume this data in a less-nested write().
850 return;
853 pumpTokenizerIfPossible();
855 endIfDelayed();
858 void HTMLDocumentParser::end()
860 ASSERT(!isDetached());
861 ASSERT(!isScheduledForResume());
863 if (m_haveBackgroundParser)
864 stopBackgroundParser();
866 // Informs the the rest of WebCore that parsing is really finished (and deletes this).
867 m_treeBuilder->finished();
869 DocumentParser::stopParsing();
872 void HTMLDocumentParser::attemptToRunDeferredScriptsAndEnd()
874 ASSERT(isStopping());
875 // FIXME: It may not be correct to disable this for the background parser.
876 // That means hasInsertionPoint() may not be correct in some cases.
877 ASSERT(!hasInsertionPoint() || m_haveBackgroundParser);
878 if (m_scriptRunner && !m_scriptRunner->executeScriptsWaitingForParsing())
879 return;
880 end();
883 void HTMLDocumentParser::attemptToEnd()
885 // finish() indicates we will not receive any more data. If we are waiting on
886 // an external script to load, we can't finish parsing quite yet.
888 if (shouldDelayEnd()) {
889 m_endWasDelayed = true;
890 return;
892 prepareToStopParsing();
895 void HTMLDocumentParser::endIfDelayed()
897 // If we've already been detached, don't bother ending.
898 if (isDetached())
899 return;
901 if (!m_endWasDelayed || shouldDelayEnd())
902 return;
904 m_endWasDelayed = false;
905 prepareToStopParsing();
908 void HTMLDocumentParser::finish()
910 // FIXME: We should ASSERT(!m_parserStopped) here, since it does not
911 // makes sense to call any methods on DocumentParser once it's been stopped.
912 // However, FrameLoader::stop calls DocumentParser::finish unconditionally.
914 // flush may ending up executing arbitrary script, and possibly detach the parser.
915 RefPtrWillBeRawPtr<HTMLDocumentParser> protect(this);
916 flush();
917 if (isDetached())
918 return;
920 // Empty documents never got an append() call, and thus have never started
921 // a background parser. In those cases, we ignore shouldUseThreading()
922 // and fall through to the non-threading case.
923 if (m_haveBackgroundParser) {
924 if (!m_input.haveSeenEndOfFile())
925 m_input.closeWithoutMarkingEndOfFile();
926 HTMLParserThread::shared()->postTask(threadSafeBind(&BackgroundHTMLParser::finish, AllowCrossThreadAccess(m_backgroundParser)));
927 return;
930 if (!m_tokenizer) {
931 ASSERT(!m_token);
932 // We're finishing before receiving any data. Rather than booting up
933 // the background parser just to spin it down, we finish parsing
934 // synchronously.
935 m_token = adoptPtr(new HTMLToken);
936 m_tokenizer = HTMLTokenizer::create(m_options);
939 // We're not going to get any more data off the network, so we tell the
940 // input stream we've reached the end of file. finish() can be called more
941 // than once, if the first time does not call end().
942 if (!m_input.haveSeenEndOfFile())
943 m_input.markEndOfFile();
945 attemptToEnd();
948 bool HTMLDocumentParser::isExecutingScript() const
950 if (!m_scriptRunner)
951 return false;
952 return m_scriptRunner->isExecutingScript();
955 bool HTMLDocumentParser::isParsingAtLineNumber() const
957 return m_isParsingAtLineNumber && ScriptableDocumentParser::isParsingAtLineNumber();
960 OrdinalNumber HTMLDocumentParser::lineNumber() const
962 if (m_haveBackgroundParser)
963 return m_textPosition.m_line;
965 return m_input.current().currentLine();
968 TextPosition HTMLDocumentParser::textPosition() const
970 if (m_haveBackgroundParser)
971 return m_textPosition;
973 const SegmentedString& currentString = m_input.current();
974 OrdinalNumber line = currentString.currentLine();
975 OrdinalNumber column = currentString.currentColumn();
977 return TextPosition(line, column);
980 bool HTMLDocumentParser::isWaitingForScripts() const
982 // When the TreeBuilder encounters a </script> tag, it returns to the HTMLDocumentParser
983 // where the script is transfered from the treebuilder to the script runner.
984 // The script runner will hold the script until its loaded and run. During
985 // any of this time, we want to count ourselves as "waiting for a script" and thus
986 // run the preload scanner, as well as delay completion of parsing.
987 bool treeBuilderHasBlockingScript = m_treeBuilder->hasParserBlockingScript();
988 bool scriptRunnerHasBlockingScript = m_scriptRunner && m_scriptRunner->hasParserBlockingScript();
989 // Since the parser is paused while a script runner has a blocking script, it should
990 // never be possible to end up with both objects holding a blocking script.
991 ASSERT(!(treeBuilderHasBlockingScript && scriptRunnerHasBlockingScript));
992 // If either object has a blocking script, the parser should be paused.
993 return treeBuilderHasBlockingScript || scriptRunnerHasBlockingScript;
996 void HTMLDocumentParser::resumeParsingAfterScriptExecution()
998 ASSERT(!isExecutingScript());
999 ASSERT(!isWaitingForScripts());
1001 if (m_haveBackgroundParser) {
1002 validateSpeculations(m_lastChunkBeforeScript.release());
1003 ASSERT(!m_lastChunkBeforeScript);
1004 // processParsedChunkFromBackgroundParser can cause this parser to be detached from the Document,
1005 // but we need to ensure it isn't deleted yet.
1006 RefPtrWillBeRawPtr<HTMLDocumentParser> protect(this);
1007 pumpPendingSpeculations();
1008 return;
1011 m_insertionPreloadScanner.clear();
1012 pumpTokenizerIfPossible();
1013 endIfDelayed();
1016 void HTMLDocumentParser::appendCurrentInputStreamToPreloadScannerAndScan()
1018 ASSERT(m_preloadScanner);
1019 m_preloadScanner->appendToEnd(m_input.current());
1020 m_preloadScanner->scan(m_preloader.get(), document()->baseElementURL());
1023 void HTMLDocumentParser::notifyScriptLoaded(Resource* cachedResource)
1025 // pumpTokenizer can cause this parser to be detached from the Document,
1026 // but we need to ensure it isn't deleted yet.
1027 RefPtrWillBeRawPtr<HTMLDocumentParser> protect(this);
1029 ASSERT(m_scriptRunner);
1030 ASSERT(!isExecutingScript());
1032 if (isStopped()) {
1033 return;
1036 if (isStopping()) {
1037 attemptToRunDeferredScriptsAndEnd();
1038 return;
1041 m_scriptRunner->executeScriptsWaitingForLoad(cachedResource);
1042 if (!isWaitingForScripts())
1043 resumeParsingAfterScriptExecution();
1046 void HTMLDocumentParser::executeScriptsWaitingForResources()
1048 // Document only calls this when the Document owns the DocumentParser
1049 // so this will not be called in the DocumentFragment case.
1050 ASSERT(m_scriptRunner);
1051 // Ignore calls unless we have a script blocking the parser waiting on a
1052 // stylesheet load. Otherwise we are currently parsing and this
1053 // is a re-entrant call from encountering a </ style> tag.
1054 if (!m_scriptRunner->hasScriptsWaitingForResources())
1055 return;
1057 // pumpTokenizer can cause this parser to be detached from the Document,
1058 // but we need to ensure it isn't deleted yet.
1059 RefPtrWillBeRawPtr<HTMLDocumentParser> protect(this);
1060 m_scriptRunner->executeScriptsWaitingForResources();
1061 if (!isWaitingForScripts())
1062 resumeParsingAfterScriptExecution();
1065 void HTMLDocumentParser::parseDocumentFragment(const String& source, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy)
1067 RefPtrWillBeRawPtr<HTMLDocumentParser> parser = HTMLDocumentParser::create(fragment, contextElement, parserContentPolicy);
1068 parser->append(source);
1069 parser->finish();
1070 ASSERT(!parser->processingData()); // Make sure we're done. <rdar://problem/3963151>
1071 parser->detach(); // Allows ~DocumentParser to assert it was detached before destruction.
1074 void HTMLDocumentParser::suspendScheduledTasks()
1076 ASSERT(!m_tasksWereSuspended);
1077 m_tasksWereSuspended = true;
1078 if (m_parserScheduler)
1079 m_parserScheduler->suspend();
1082 void HTMLDocumentParser::resumeScheduledTasks()
1084 ASSERT(m_tasksWereSuspended);
1085 m_tasksWereSuspended = false;
1086 if (m_parserScheduler)
1087 m_parserScheduler->resume();
1090 void HTMLDocumentParser::appendBytes(const char* data, size_t length)
1092 if (!length || isStopped())
1093 return;
1095 if (shouldUseThreading()) {
1096 if (!m_haveBackgroundParser)
1097 startBackgroundParser();
1099 OwnPtr<Vector<char>> buffer = adoptPtr(new Vector<char>(length));
1100 memcpy(buffer->data(), data, length);
1101 TRACE_EVENT1(TRACE_DISABLED_BY_DEFAULT("blink.debug"), "HTMLDocumentParser::appendBytes", "size", (unsigned)length);
1103 HTMLParserThread::shared()->postTask(threadSafeBind(&BackgroundHTMLParser::appendRawBytesFromMainThread, AllowCrossThreadAccess(m_backgroundParser), buffer.release()));
1104 return;
1107 DecodedDataDocumentParser::appendBytes(data, length);
1110 void HTMLDocumentParser::flush()
1112 // If we've got no decoder, we never received any data.
1113 if (isDetached() || needsDecoder())
1114 return;
1116 if (m_haveBackgroundParser)
1117 HTMLParserThread::shared()->postTask(threadSafeBind(&BackgroundHTMLParser::flush, AllowCrossThreadAccess(m_backgroundParser)));
1118 else
1119 DecodedDataDocumentParser::flush();
1122 void HTMLDocumentParser::setDecoder(PassOwnPtr<TextResourceDecoder> decoder)
1124 ASSERT(decoder);
1125 DecodedDataDocumentParser::setDecoder(decoder);
1127 if (m_haveBackgroundParser)
1128 HTMLParserThread::shared()->postTask(threadSafeBind(&BackgroundHTMLParser::setDecoder, AllowCrossThreadAccess(m_backgroundParser), takeDecoder()));