2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #ifndef HTMLTokenizer_h
28 #define HTMLTokenizer_h
30 #include "ios/third_party/blink/src/html_input_stream_preprocessor.h"
31 #include "ios/third_party/blink/src/html_token.h"
36 WTF_MAKE_NONCOPYABLE(HTMLTokenizer
);
49 BeforeAttributeNameState
,
51 AfterAttributeNameState
,
52 BeforeAttributeValueState
,
53 AttributeValueDoubleQuotedState
,
54 AttributeValueSingleQuotedState
,
55 AttributeValueUnquotedState
,
56 AfterAttributeValueQuotedState
,
57 SelfClosingStartTagState
,
59 // The ContinueBogusCommentState is not in the HTML5 spec, but we use
60 // it internally to keep track of whether we've started the bogus
62 ContinueBogusCommentState
,
63 MarkupDeclarationOpenState
,
65 CommentStartDashState
,
71 BeforeDOCTYPENameState
,
73 AfterDOCTYPENameState
,
74 AfterDOCTYPEPublicKeywordState
,
75 BeforeDOCTYPEPublicIdentifierState
,
76 DOCTYPEPublicIdentifierDoubleQuotedState
,
77 DOCTYPEPublicIdentifierSingleQuotedState
,
78 AfterDOCTYPEPublicIdentifierState
,
79 BetweenDOCTYPEPublicAndSystemIdentifiersState
,
80 AfterDOCTYPESystemKeywordState
,
81 BeforeDOCTYPESystemIdentifierState
,
82 DOCTYPESystemIdentifierDoubleQuotedState
,
83 DOCTYPESystemIdentifierSingleQuotedState
,
84 AfterDOCTYPESystemIdentifierState
,
87 // These CDATA states are not in the HTML5 spec, but we use them internally.
88 CDATASectionRightSquareBracketState
,
89 CDATASectionDoubleRightSquareBracketState
,
92 // This function returns true if it emits a token. Otherwise, callers
93 // must provide the same (in progress) token on the next call (unless
94 // they call reset() first).
95 bool nextToken(CharacterProvider
&, HTMLToken
&);
97 State
state() const { return m_state
; }
98 void setState(State state
) { m_state
= state
; }
100 inline bool shouldSkipNullCharacters() const
102 return m_state
== HTMLTokenizer::DataState
;
106 inline void parseError();
108 inline bool emitAndResumeIn(CharacterProvider
& source
, State state
)
110 ASSERT(m_token
->type() != HTMLToken::Uninitialized
);
116 inline bool emitAndReconsumeIn(CharacterProvider
&, State state
)
118 ASSERT(m_token
->type() != HTMLToken::Uninitialized
);
123 inline bool emitEndOfFile(CharacterProvider
& source
)
125 if (haveBufferedCharacterToken())
127 m_state
= HTMLTokenizer::DataState
;
130 m_token
->makeEndOfFile();
134 // Return whether we need to emit a character token before dealing with
135 // the buffered end tag.
136 inline bool flushBufferedEndTag(CharacterProvider
&);
138 inline bool haveBufferedCharacterToken()
140 return m_token
->type() == HTMLToken::Character
;
145 // m_token is owned by the caller. If nextToken is not on the stack,
146 // this member might be pointing to unallocated memory.
149 // http://www.whatwg.org/specs/web-apps/current-work/#additional-allowed-character
150 LChar m_additionalAllowedCharacter
;
152 // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream
153 InputStreamPreprocessor
<HTMLTokenizer
> m_inputStreamPreprocessor
;