2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #ifndef HTMLDocumentParser_h
27 #define HTMLDocumentParser_h
29 #include "core/dom/ParserContentPolicy.h"
30 #include "core/dom/ScriptableDocumentParser.h"
31 #include "core/fetch/ResourceClient.h"
32 #include "core/frame/UseCounter.h"
33 #include "core/html/parser/BackgroundHTMLInputStream.h"
34 #include "core/html/parser/CompactHTMLToken.h"
35 #include "core/html/parser/HTMLInputStream.h"
36 #include "core/html/parser/HTMLParserOptions.h"
37 #include "core/html/parser/HTMLPreloadScanner.h"
38 #include "core/html/parser/HTMLScriptRunnerHost.h"
39 #include "core/html/parser/HTMLSourceTracker.h"
40 #include "core/html/parser/HTMLToken.h"
41 #include "core/html/parser/HTMLTokenizer.h"
42 #include "core/html/parser/HTMLTreeBuilderSimulator.h"
43 #include "core/html/parser/ParserSynchronizationPolicy.h"
44 #include "core/html/parser/TextResourceDecoder.h"
45 #include "core/html/parser/XSSAuditor.h"
46 #include "core/html/parser/XSSAuditorDelegate.h"
47 #include "platform/text/SegmentedString.h"
48 #include "wtf/Deque.h"
49 #include "wtf/OwnPtr.h"
50 #include "wtf/WeakPtr.h"
51 #include "wtf/text/TextPosition.h"
55 class BackgroundHTMLParser
;
56 class CompactHTMLToken
;
58 class DocumentEncodingData
;
59 class DocumentFragment
;
62 class HTMLParserScheduler
;
63 class HTMLScriptRunner
;
64 class HTMLTreeBuilder
;
65 class HTMLResourcePreloader
;
69 class HTMLDocumentParser
: public ScriptableDocumentParser
, private HTMLScriptRunnerHost
{
70 WTF_MAKE_FAST_ALLOCATED_WILL_BE_REMOVED(HTMLDocumentParser
);
71 WILL_BE_USING_GARBAGE_COLLECTED_MIXIN(HTMLDocumentParser
);
73 static PassRefPtrWillBeRawPtr
<HTMLDocumentParser
> create(HTMLDocument
& document
, bool reportErrors
, ParserSynchronizationPolicy backgroundParsingPolicy
)
75 return adoptRefWillBeNoop(new HTMLDocumentParser(document
, reportErrors
, backgroundParsingPolicy
));
77 ~HTMLDocumentParser() override
;
78 DECLARE_VIRTUAL_TRACE();
80 // Exposed for HTMLParserScheduler
81 void resumeParsingAfterYield();
83 static void parseDocumentFragment(const String
&, DocumentFragment
*, Element
* contextElement
, ParserContentPolicy
= AllowScriptingContent
);
85 HTMLTokenizer
* tokenizer() const { return m_tokenizer
.get(); }
87 TextPosition
textPosition() const final
;
88 bool isParsingAtLineNumber() const final
;
89 OrdinalNumber
lineNumber() const final
;
91 void suspendScheduledTasks() final
;
92 void resumeScheduledTasks() final
;
95 WTF_MAKE_FAST_ALLOCATED(ParsedChunk
);
97 OwnPtr
<CompactHTMLTokenStream
> tokens
;
98 PreloadRequestStream preloads
;
99 XSSInfoStream xssInfos
;
100 HTMLTokenizer::State tokenizerState
;
101 HTMLTreeBuilderSimulator::State treeBuilderState
;
102 HTMLInputCheckpoint inputCheckpoint
;
103 TokenPreloadScannerCheckpoint preloadScannerCheckpoint
;
106 void didReceiveParsedChunkFromBackgroundParser(PassOwnPtr
<ParsedChunk
>);
107 void didReceiveEncodingDataFromBackgroundParser(const DocumentEncodingData
&);
109 void appendBytes(const char* bytes
, size_t length
) override
;
111 void setDecoder(PassOwnPtr
<TextResourceDecoder
>) final
;
113 UseCounter
* useCounter() { return UseCounter::getFrom(contextForParsingSession()); }
116 void insert(const SegmentedString
&) final
;
117 void append(const String
&) override
;
120 HTMLDocumentParser(HTMLDocument
&, bool reportErrors
, ParserSynchronizationPolicy
);
121 HTMLDocumentParser(DocumentFragment
*, Element
* contextElement
, ParserContentPolicy
);
123 HTMLTreeBuilder
* treeBuilder() const { return m_treeBuilder
.get(); }
125 void forcePlaintextForTextDocument();
128 static PassRefPtrWillBeRawPtr
<HTMLDocumentParser
> create(DocumentFragment
* fragment
, Element
* contextElement
, ParserContentPolicy parserContentPolicy
)
130 return adoptRefWillBeNoop(new HTMLDocumentParser(fragment
, contextElement
, parserContentPolicy
));
135 bool hasInsertionPoint() final
;
136 bool processingData() const final
;
137 void prepareToStopParsing() final
;
138 void stopParsing() final
;
139 bool isWaitingForScripts() const final
;
140 bool isExecutingScript() const final
;
141 void executeScriptsWaitingForResources() final
;
143 // HTMLScriptRunnerHost
144 void notifyScriptLoaded(Resource
*) final
;
145 HTMLInputStream
& inputStream() final
{ return m_input
; }
146 bool hasPreloadScanner() const final
{ return m_preloadScanner
.get() && !shouldUseThreading(); }
147 void appendCurrentInputStreamToPreloadScannerAndScan() final
;
149 void startBackgroundParser();
150 void stopBackgroundParser();
151 void validateSpeculations(PassOwnPtr
<ParsedChunk
> lastChunk
);
152 void discardSpeculationsAndResumeFrom(PassOwnPtr
<ParsedChunk
> lastChunk
, PassOwnPtr
<HTMLToken
>, PassOwnPtr
<HTMLTokenizer
>);
153 size_t processParsedChunkFromBackgroundParser(PassOwnPtr
<ParsedChunk
>);
154 void pumpPendingSpeculations();
156 Document
* contextForParsingSession();
158 bool canTakeNextToken();
159 void pumpTokenizer();
160 void pumpTokenizerIfPossible();
161 void constructTreeFromHTMLToken();
162 void constructTreeFromCompactHTMLToken(const CompactHTMLToken
&);
164 void runScriptsForPausedTreeBuilder();
165 void resumeParsingAfterScriptExecution();
169 void attemptToRunDeferredScriptsAndEnd();
172 bool shouldUseThreading() const { return m_shouldUseThreading
; }
174 bool isParsingFragment() const;
175 bool isScheduledForResume() const;
176 bool inPumpSession() const { return m_pumpSessionNestingLevel
> 0; }
177 bool shouldDelayEnd() const { return inPumpSession() || isWaitingForScripts() || isScheduledForResume() || isExecutingScript(); }
179 HTMLToken
& token() { return *m_token
; }
181 HTMLParserOptions m_options
;
182 HTMLInputStream m_input
;
184 OwnPtr
<HTMLToken
> m_token
;
185 OwnPtr
<HTMLTokenizer
> m_tokenizer
;
186 OwnPtrWillBeMember
<HTMLScriptRunner
> m_scriptRunner
;
187 OwnPtrWillBeMember
<HTMLTreeBuilder
> m_treeBuilder
;
188 OwnPtr
<HTMLPreloadScanner
> m_preloadScanner
;
189 OwnPtr
<HTMLPreloadScanner
> m_insertionPreloadScanner
;
190 OwnPtr
<HTMLParserScheduler
> m_parserScheduler
;
191 HTMLSourceTracker m_sourceTracker
;
192 TextPosition m_textPosition
;
193 XSSAuditor m_xssAuditor
;
194 XSSAuditorDelegate m_xssAuditorDelegate
;
196 // FIXME: m_lastChunkBeforeScript, m_tokenizer, m_token, and m_input should be combined into a single state object
197 // so they can be set and cleared together and passed between threads together.
198 OwnPtr
<ParsedChunk
> m_lastChunkBeforeScript
;
199 Deque
<OwnPtr
<ParsedChunk
>> m_speculations
;
200 WeakPtrFactory
<HTMLDocumentParser
> m_weakFactory
;
201 WeakPtr
<BackgroundHTMLParser
> m_backgroundParser
;
202 OwnPtrWillBeMember
<HTMLResourcePreloader
> m_preloader
;
203 PreloadRequestStream m_queuedPreloads
;
205 bool m_shouldUseThreading
;
206 bool m_endWasDelayed
;
207 bool m_haveBackgroundParser
;
208 bool m_tasksWereSuspended
;
209 unsigned m_pumpSessionNestingLevel
;
210 unsigned m_pumpSpeculationsSessionNestingLevel
;
211 bool m_isParsingAtLineNumber
;