2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
4 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "ios/third_party/blink/src/html_tokenizer.h"
30 #include "html_markup_tokenizer_inlines.h"
34 #define HTML_BEGIN_STATE(stateName) BEGIN_STATE(HTMLTokenizer, stateName)
35 #define HTML_RECONSUME_IN(stateName) RECONSUME_IN(HTMLTokenizer, stateName)
36 #define HTML_ADVANCE_TO(stateName) ADVANCE_TO(HTMLTokenizer, stateName)
37 #define HTML_SWITCH_TO(stateName) SWITCH_TO(HTMLTokenizer, stateName)
39 HTMLTokenizer::HTMLTokenizer()
40 : m_state(HTMLTokenizer::DataState)
42 , m_additionalAllowedCharacter('\0')
43 , m_inputStreamPreprocessor(this)
47 HTMLTokenizer::~HTMLTokenizer()
51 void HTMLTokenizer::reset()
53 m_state = HTMLTokenizer::DataState;
55 m_additionalAllowedCharacter = '\0';
58 bool HTMLTokenizer::flushBufferedEndTag(CharacterProvider& source)
60 ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLToken::Uninitialized);
62 if (m_token->type() == HTMLToken::Character)
68 #define FLUSH_AND_ADVANCE_TO(stateName) \
70 m_state = HTMLTokenizer::stateName; \
71 if (flushBufferedEndTag(source)) \
73 if (source.isEmpty() \
74 || !m_inputStreamPreprocessor.peek(source)) \
75 return haveBufferedCharacterToken(); \
76 cc = m_inputStreamPreprocessor.nextInputCharacter(); \
80 bool HTMLTokenizer::nextToken(CharacterProvider& source, HTMLToken& token)
82 // If we have a token in progress, then we're supposed to be called back
83 // with the same token so we can finish it.
84 ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitialized);
87 if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source))
88 return haveBufferedCharacterToken();
89 UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
91 // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0
93 HTML_BEGIN_STATE(DataState) {
95 if (m_token->type() == HTMLToken::Character) {
96 // We have a bunch of character tokens queued up that we
97 // are emitting lazily here.
100 HTML_ADVANCE_TO(TagOpenState);
101 } else if (cc == kEndOfFileMarker)
102 return emitEndOfFile(source);
104 m_token->ensureIsCharacterToken();
105 HTML_ADVANCE_TO(DataState);
110 HTML_BEGIN_STATE(TagOpenState) {
112 HTML_ADVANCE_TO(MarkupDeclarationOpenState);
114 HTML_ADVANCE_TO(EndTagOpenState);
115 else if (isASCIIUpper(cc)) {
116 m_token->beginStartTag(toLowerCase(cc));
117 HTML_ADVANCE_TO(TagNameState);
118 } else if (isASCIILower(cc)) {
119 m_token->beginStartTag(cc);
120 HTML_ADVANCE_TO(TagNameState);
121 } else if (cc == '?') {
123 // The spec consumes the current character before switching
124 // to the bogus comment state, but it's easier to implement
125 // if we reconsume the current character.
126 HTML_RECONSUME_IN(BogusCommentState);
129 m_token->ensureIsCharacterToken();
130 HTML_RECONSUME_IN(DataState);
135 HTML_BEGIN_STATE(EndTagOpenState) {
136 if (isASCIIUpper(cc)) {
137 m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc)));
138 HTML_ADVANCE_TO(TagNameState);
139 } else if (isASCIILower(cc)) {
140 m_token->beginEndTag(static_cast<LChar>(cc));
141 HTML_ADVANCE_TO(TagNameState);
142 } else if (cc == '>') {
144 HTML_ADVANCE_TO(DataState);
145 } else if (cc == kEndOfFileMarker) {
147 m_token->ensureIsCharacterToken();
148 HTML_RECONSUME_IN(DataState);
151 HTML_RECONSUME_IN(BogusCommentState);
156 HTML_BEGIN_STATE(TagNameState) {
157 if (isTokenizerWhitespace(cc))
158 HTML_ADVANCE_TO(BeforeAttributeNameState);
160 HTML_ADVANCE_TO(SelfClosingStartTagState);
162 return emitAndResumeIn(source, HTMLTokenizer::DataState);
163 else if (isASCIIUpper(cc)) {
164 m_token->appendToName(toLowerCase(cc));
165 HTML_ADVANCE_TO(TagNameState);
166 } else if (cc == kEndOfFileMarker) {
168 HTML_RECONSUME_IN(DataState);
170 m_token->appendToName(cc);
171 HTML_ADVANCE_TO(TagNameState);
176 HTML_BEGIN_STATE(BeforeAttributeNameState) {
177 if (isTokenizerWhitespace(cc))
178 HTML_ADVANCE_TO(BeforeAttributeNameState);
180 HTML_ADVANCE_TO(SelfClosingStartTagState);
182 return emitAndResumeIn(source, HTMLTokenizer::DataState);
183 else if (isASCIIUpper(cc)) {
184 HTML_ADVANCE_TO(AttributeNameState);
185 } else if (cc == kEndOfFileMarker) {
187 HTML_RECONSUME_IN(DataState);
189 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
191 HTML_ADVANCE_TO(AttributeNameState);
196 HTML_BEGIN_STATE(AttributeNameState) {
197 if (isTokenizerWhitespace(cc)) {
198 HTML_ADVANCE_TO(AfterAttributeNameState);
199 } else if (cc == '/') {
200 HTML_ADVANCE_TO(SelfClosingStartTagState);
201 } else if (cc == '=') {
202 HTML_ADVANCE_TO(BeforeAttributeValueState);
203 } else if (cc == '>') {
204 return emitAndResumeIn(source, HTMLTokenizer::DataState);
205 } else if (isASCIIUpper(cc)) {
206 HTML_ADVANCE_TO(AttributeNameState);
207 } else if (cc == kEndOfFileMarker) {
209 HTML_RECONSUME_IN(DataState);
211 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
213 HTML_ADVANCE_TO(AttributeNameState);
218 HTML_BEGIN_STATE(AfterAttributeNameState) {
219 if (isTokenizerWhitespace(cc))
220 HTML_ADVANCE_TO(AfterAttributeNameState);
222 HTML_ADVANCE_TO(SelfClosingStartTagState);
224 HTML_ADVANCE_TO(BeforeAttributeValueState);
226 return emitAndResumeIn(source, HTMLTokenizer::DataState);
227 else if (isASCIIUpper(cc)) {
228 HTML_ADVANCE_TO(AttributeNameState);
229 } else if (cc == kEndOfFileMarker) {
231 HTML_RECONSUME_IN(DataState);
233 if (cc == '"' || cc == '\'' || cc == '<')
235 HTML_ADVANCE_TO(AttributeNameState);
240 HTML_BEGIN_STATE(BeforeAttributeValueState) {
241 if (isTokenizerWhitespace(cc))
242 HTML_ADVANCE_TO(BeforeAttributeValueState);
243 else if (cc == '"') {
244 HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
245 } else if (cc == '&') {
246 HTML_RECONSUME_IN(AttributeValueUnquotedState);
247 } else if (cc == '\'') {
248 HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
249 } else if (cc == '>') {
251 return emitAndResumeIn(source, HTMLTokenizer::DataState);
252 } else if (cc == kEndOfFileMarker) {
254 HTML_RECONSUME_IN(DataState);
256 if (cc == '<' || cc == '=' || cc == '`')
258 HTML_ADVANCE_TO(AttributeValueUnquotedState);
263 HTML_BEGIN_STATE(AttributeValueDoubleQuotedState) {
265 HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
266 } else if (cc == kEndOfFileMarker) {
268 HTML_RECONSUME_IN(DataState);
270 HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
275 HTML_BEGIN_STATE(AttributeValueSingleQuotedState) {
277 HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
278 } else if (cc == kEndOfFileMarker) {
280 HTML_RECONSUME_IN(DataState);
282 HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
287 HTML_BEGIN_STATE(AttributeValueUnquotedState) {
288 if (isTokenizerWhitespace(cc)) {
289 HTML_ADVANCE_TO(BeforeAttributeNameState);
290 } else if (cc == '>') {
291 return emitAndResumeIn(source, HTMLTokenizer::DataState);
292 } else if (cc == kEndOfFileMarker) {
294 HTML_RECONSUME_IN(DataState);
296 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`')
298 HTML_ADVANCE_TO(AttributeValueUnquotedState);
303 HTML_BEGIN_STATE(AfterAttributeValueQuotedState) {
304 if (isTokenizerWhitespace(cc))
305 HTML_ADVANCE_TO(BeforeAttributeNameState);
307 HTML_ADVANCE_TO(SelfClosingStartTagState);
309 return emitAndResumeIn(source, HTMLTokenizer::DataState);
310 else if (cc == kEndOfFileMarker) {
312 HTML_RECONSUME_IN(DataState);
315 HTML_RECONSUME_IN(BeforeAttributeNameState);
320 HTML_BEGIN_STATE(SelfClosingStartTagState) {
322 return emitAndResumeIn(source, HTMLTokenizer::DataState);
323 } else if (cc == kEndOfFileMarker) {
325 HTML_RECONSUME_IN(DataState);
328 HTML_RECONSUME_IN(BeforeAttributeNameState);
333 HTML_BEGIN_STATE(BogusCommentState) {
334 m_token->beginComment();
335 HTML_RECONSUME_IN(ContinueBogusCommentState);
339 HTML_BEGIN_STATE(ContinueBogusCommentState) {
341 return emitAndResumeIn(source, HTMLTokenizer::DataState);
342 else if (cc == kEndOfFileMarker)
343 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
345 HTML_ADVANCE_TO(ContinueBogusCommentState);
350 HTML_BEGIN_STATE(MarkupDeclarationOpenState) {
351 DEFINE_STATIC_LOCAL_STRING(dashDashString, "--");
352 DEFINE_STATIC_LOCAL_STRING(doctypeString, "doctype");
354 if (source.startsWith(dashDashString, dashDashStringLength)) {
355 advanceAndASSERT(source, '-');
356 advanceAndASSERT(source, '-');
357 m_token->beginComment();
358 HTML_SWITCH_TO(CommentStartState);
359 } else if (source.remainingBytes() < dashDashStringLength)
360 return haveBufferedCharacterToken();
361 } else if (cc == 'D' || cc == 'd') {
362 if (source.startsWith(doctypeString, doctypeStringLength, true)) {
363 advanceStringAndASSERTIgnoringCase(source, doctypeString);
364 HTML_SWITCH_TO(DOCTYPEState);
365 } else if (source.remainingBytes() < doctypeStringLength)
366 return haveBufferedCharacterToken();
369 HTML_RECONSUME_IN(BogusCommentState);
373 HTML_BEGIN_STATE(CommentStartState) {
375 HTML_ADVANCE_TO(CommentStartDashState);
376 else if (cc == '>') {
378 return emitAndResumeIn(source, HTMLTokenizer::DataState);
379 } else if (cc == kEndOfFileMarker) {
381 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
383 HTML_ADVANCE_TO(CommentState);
388 HTML_BEGIN_STATE(CommentStartDashState) {
390 HTML_ADVANCE_TO(CommentEndState);
391 else if (cc == '>') {
393 return emitAndResumeIn(source, HTMLTokenizer::DataState);
394 } else if (cc == kEndOfFileMarker) {
396 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
398 HTML_ADVANCE_TO(CommentState);
403 HTML_BEGIN_STATE(CommentState) {
405 HTML_ADVANCE_TO(CommentEndDashState);
406 else if (cc == kEndOfFileMarker) {
408 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
410 HTML_ADVANCE_TO(CommentState);
415 HTML_BEGIN_STATE(CommentEndDashState) {
417 HTML_ADVANCE_TO(CommentEndState);
418 else if (cc == kEndOfFileMarker) {
420 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
422 HTML_ADVANCE_TO(CommentState);
427 HTML_BEGIN_STATE(CommentEndState) {
429 return emitAndResumeIn(source, HTMLTokenizer::DataState);
430 else if (cc == '!') {
432 HTML_ADVANCE_TO(CommentEndBangState);
433 } else if (cc == '-') {
435 HTML_ADVANCE_TO(CommentEndState);
436 } else if (cc == kEndOfFileMarker) {
438 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
441 HTML_ADVANCE_TO(CommentState);
446 HTML_BEGIN_STATE(CommentEndBangState) {
448 HTML_ADVANCE_TO(CommentEndDashState);
449 } else if (cc == '>')
450 return emitAndResumeIn(source, HTMLTokenizer::DataState);
451 else if (cc == kEndOfFileMarker) {
453 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
455 HTML_ADVANCE_TO(CommentState);
460 HTML_BEGIN_STATE(DOCTYPEState) {
461 if (isTokenizerWhitespace(cc))
462 HTML_ADVANCE_TO(BeforeDOCTYPENameState);
463 else if (cc == kEndOfFileMarker) {
465 m_token->beginDOCTYPE();
466 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
469 HTML_RECONSUME_IN(BeforeDOCTYPENameState);
474 HTML_BEGIN_STATE(BeforeDOCTYPENameState) {
475 if (isTokenizerWhitespace(cc))
476 HTML_ADVANCE_TO(BeforeDOCTYPENameState);
477 else if (cc == '>') {
479 m_token->beginDOCTYPE();
480 return emitAndResumeIn(source, HTMLTokenizer::DataState);
481 } else if (cc == kEndOfFileMarker) {
483 m_token->beginDOCTYPE();
484 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
486 m_token->beginDOCTYPE();
487 HTML_ADVANCE_TO(DOCTYPENameState);
492 HTML_BEGIN_STATE(DOCTYPENameState) {
493 if (isTokenizerWhitespace(cc))
494 HTML_ADVANCE_TO(AfterDOCTYPENameState);
496 return emitAndResumeIn(source, HTMLTokenizer::DataState);
497 else if (cc == kEndOfFileMarker) {
499 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
501 HTML_ADVANCE_TO(DOCTYPENameState);
506 HTML_BEGIN_STATE(AfterDOCTYPENameState) {
507 if (isTokenizerWhitespace(cc))
508 HTML_ADVANCE_TO(AfterDOCTYPENameState);
510 return emitAndResumeIn(source, HTMLTokenizer::DataState);
511 else if (cc == kEndOfFileMarker) {
513 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
515 DEFINE_STATIC_LOCAL_STRING(publicString, "public");
516 DEFINE_STATIC_LOCAL_STRING(systemString, "system");
517 if (cc == 'P' || cc == 'p') {
518 if (source.startsWith(publicString, publicStringLength, true)) {
519 advanceStringAndASSERTIgnoringCase(source, publicString);
520 HTML_SWITCH_TO(AfterDOCTYPEPublicKeywordState);
521 } else if (source.remainingBytes() < publicStringLength)
522 return haveBufferedCharacterToken();
523 } else if (cc == 'S' || cc == 's') {
524 if (source.startsWith(systemString, systemStringLength, true)) {
525 advanceStringAndASSERTIgnoringCase(source, systemString);
526 HTML_SWITCH_TO(AfterDOCTYPESystemKeywordState);
527 } else if (source.remainingBytes() < systemStringLength)
528 return haveBufferedCharacterToken();
531 HTML_ADVANCE_TO(BogusDOCTYPEState);
536 HTML_BEGIN_STATE(AfterDOCTYPEPublicKeywordState) {
537 if (isTokenizerWhitespace(cc))
538 HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
539 else if (cc == '"') {
541 HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
542 } else if (cc == '\'') {
544 HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
545 } else if (cc == '>') {
547 return emitAndResumeIn(source, HTMLTokenizer::DataState);
548 } else if (cc == kEndOfFileMarker) {
550 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
553 HTML_ADVANCE_TO(BogusDOCTYPEState);
558 HTML_BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) {
559 if (isTokenizerWhitespace(cc))
560 HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
561 else if (cc == '"') {
562 HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
563 } else if (cc == '\'') {
564 HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
565 } else if (cc == '>') {
567 return emitAndResumeIn(source, HTMLTokenizer::DataState);
568 } else if (cc == kEndOfFileMarker) {
570 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
573 HTML_ADVANCE_TO(BogusDOCTYPEState);
578 HTML_BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) {
580 HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
581 else if (cc == '>') {
583 return emitAndResumeIn(source, HTMLTokenizer::DataState);
584 } else if (cc == kEndOfFileMarker) {
586 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
588 HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
593 HTML_BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) {
595 HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
596 else if (cc == '>') {
598 return emitAndResumeIn(source, HTMLTokenizer::DataState);
599 } else if (cc == kEndOfFileMarker) {
601 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
603 HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
608 HTML_BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) {
609 if (isTokenizerWhitespace(cc))
610 HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
612 return emitAndResumeIn(source, HTMLTokenizer::DataState);
613 else if (cc == '"') {
615 HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
616 } else if (cc == '\'') {
618 HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
619 } else if (cc == kEndOfFileMarker) {
621 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
624 HTML_ADVANCE_TO(BogusDOCTYPEState);
629 HTML_BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState) {
630 if (isTokenizerWhitespace(cc))
631 HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
633 return emitAndResumeIn(source, HTMLTokenizer::DataState);
634 else if (cc == '"') {
635 HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
636 } else if (cc == '\'') {
637 HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
638 } else if (cc == kEndOfFileMarker) {
640 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
643 HTML_ADVANCE_TO(BogusDOCTYPEState);
648 HTML_BEGIN_STATE(AfterDOCTYPESystemKeywordState) {
649 if (isTokenizerWhitespace(cc))
650 HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
651 else if (cc == '"') {
653 HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
654 } else if (cc == '\'') {
656 HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
657 } else if (cc == '>') {
659 return emitAndResumeIn(source, HTMLTokenizer::DataState);
660 } else if (cc == kEndOfFileMarker) {
662 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
665 HTML_ADVANCE_TO(BogusDOCTYPEState);
670 HTML_BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) {
671 if (isTokenizerWhitespace(cc))
672 HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
674 HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
675 } else if (cc == '\'') {
676 HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
677 } else if (cc == '>') {
679 return emitAndResumeIn(source, HTMLTokenizer::DataState);
680 } else if (cc == kEndOfFileMarker) {
682 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
685 HTML_ADVANCE_TO(BogusDOCTYPEState);
690 HTML_BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) {
692 HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
693 else if (cc == '>') {
695 return emitAndResumeIn(source, HTMLTokenizer::DataState);
696 } else if (cc == kEndOfFileMarker) {
698 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
700 HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
705 HTML_BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) {
707 HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
708 else if (cc == '>') {
710 return emitAndResumeIn(source, HTMLTokenizer::DataState);
711 } else if (cc == kEndOfFileMarker) {
713 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
715 HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
720 HTML_BEGIN_STATE(AfterDOCTYPESystemIdentifierState) {
721 if (isTokenizerWhitespace(cc))
722 HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
724 return emitAndResumeIn(source, HTMLTokenizer::DataState);
725 else if (cc == kEndOfFileMarker) {
727 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
730 HTML_ADVANCE_TO(BogusDOCTYPEState);
735 HTML_BEGIN_STATE(BogusDOCTYPEState) {
737 return emitAndResumeIn(source, HTMLTokenizer::DataState);
738 else if (cc == kEndOfFileMarker)
739 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
740 HTML_ADVANCE_TO(BogusDOCTYPEState);
744 HTML_BEGIN_STATE(CDATASectionState) {
746 HTML_ADVANCE_TO(CDATASectionRightSquareBracketState);
747 else if (cc == kEndOfFileMarker)
748 HTML_RECONSUME_IN(DataState);
750 m_token->ensureIsCharacterToken();
751 HTML_ADVANCE_TO(CDATASectionState);
756 HTML_BEGIN_STATE(CDATASectionRightSquareBracketState) {
758 HTML_ADVANCE_TO(CDATASectionDoubleRightSquareBracketState);
760 m_token->ensureIsCharacterToken();
761 HTML_RECONSUME_IN(CDATASectionState);
766 HTML_BEGIN_STATE(CDATASectionDoubleRightSquareBracketState) {
768 HTML_ADVANCE_TO(DataState);
770 m_token->ensureIsCharacterToken();
771 HTML_RECONSUME_IN(CDATASectionState);
778 ASSERT_NOT_REACHED();
782 inline void HTMLTokenizer::parseError()