Roll src/third_party/WebKit 9f7fb92:f103b33 (svn 202621:202622)
[chromium-blink-merge.git] / ios / third_party / blink / src / html_tokenizer.h
blob70623aef6ca7a064f5ede4240a0cab328cda5a35
1 /*
2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #ifndef HTMLTokenizer_h
28 #define HTMLTokenizer_h
30 #include "ios/third_party/blink/src/html_input_stream_preprocessor.h"
31 #include "ios/third_party/blink/src/html_token.h"
33 namespace WebCore {
35 class HTMLTokenizer {
36 WTF_MAKE_NONCOPYABLE(HTMLTokenizer);
38 public:
39 HTMLTokenizer();
40 ~HTMLTokenizer();
42 void reset();
44 enum State {
45 DataState,
46 TagOpenState,
47 EndTagOpenState,
48 TagNameState,
49 BeforeAttributeNameState,
50 AttributeNameState,
51 AfterAttributeNameState,
52 BeforeAttributeValueState,
53 AttributeValueDoubleQuotedState,
54 AttributeValueSingleQuotedState,
55 AttributeValueUnquotedState,
56 AfterAttributeValueQuotedState,
57 SelfClosingStartTagState,
58 BogusCommentState,
59 // The ContinueBogusCommentState is not in the HTML5 spec, but we use
60 // it internally to keep track of whether we've started the bogus
61 // comment token yet.
62 ContinueBogusCommentState,
63 MarkupDeclarationOpenState,
64 CommentStartState,
65 CommentStartDashState,
66 CommentState,
67 CommentEndDashState,
68 CommentEndState,
69 CommentEndBangState,
70 DOCTYPEState,
71 BeforeDOCTYPENameState,
72 DOCTYPENameState,
73 AfterDOCTYPENameState,
74 AfterDOCTYPEPublicKeywordState,
75 BeforeDOCTYPEPublicIdentifierState,
76 DOCTYPEPublicIdentifierDoubleQuotedState,
77 DOCTYPEPublicIdentifierSingleQuotedState,
78 AfterDOCTYPEPublicIdentifierState,
79 BetweenDOCTYPEPublicAndSystemIdentifiersState,
80 AfterDOCTYPESystemKeywordState,
81 BeforeDOCTYPESystemIdentifierState,
82 DOCTYPESystemIdentifierDoubleQuotedState,
83 DOCTYPESystemIdentifierSingleQuotedState,
84 AfterDOCTYPESystemIdentifierState,
85 BogusDOCTYPEState,
86 CDATASectionState,
87 // These CDATA states are not in the HTML5 spec, but we use them internally.
88 CDATASectionRightSquareBracketState,
89 CDATASectionDoubleRightSquareBracketState,
92 // This function returns true if it emits a token. Otherwise, callers
93 // must provide the same (in progress) token on the next call (unless
94 // they call reset() first).
95 bool nextToken(CharacterProvider&, HTMLToken&);
97 State state() const { return m_state; }
98 void setState(State state) { m_state = state; }
100 inline bool shouldSkipNullCharacters() const
102 return m_state == HTMLTokenizer::DataState;
105 private:
106 inline void parseError();
108 inline bool emitAndResumeIn(CharacterProvider& source, State state)
110 ASSERT(m_token->type() != HTMLToken::Uninitialized);
111 m_state = state;
112 source.next();
113 return true;
116 inline bool emitAndReconsumeIn(CharacterProvider&, State state)
118 ASSERT(m_token->type() != HTMLToken::Uninitialized);
119 m_state = state;
120 return true;
123 inline bool emitEndOfFile(CharacterProvider& source)
125 if (haveBufferedCharacterToken())
126 return true;
127 m_state = HTMLTokenizer::DataState;
128 source.next();
129 m_token->clear();
130 m_token->makeEndOfFile();
131 return true;
134 // Return whether we need to emit a character token before dealing with
135 // the buffered end tag.
136 inline bool flushBufferedEndTag(CharacterProvider&);
138 inline bool haveBufferedCharacterToken()
140 return m_token->type() == HTMLToken::Character;
143 State m_state;
145 // m_token is owned by the caller. If nextToken is not on the stack,
146 // this member might be pointing to unallocated memory.
147 HTMLToken* m_token;
149 // http://www.whatwg.org/specs/web-apps/current-work/#additional-allowed-character
150 LChar m_additionalAllowedCharacter;
152 // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream
153 InputStreamPreprocessor<HTMLTokenizer> m_inputStreamPreprocessor;
157 #endif