Revert of Roll src/third_party/WebKit e0eac24:489c548 (svn 193311:193320) (patchset...
[chromium-blink-merge.git] / ios / third_party / blink / src / html_tokenizer.mm
blob709f4d13cc11abfd7b3e5f7d4f365f4a6f2c7649
1 /*
2  * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3  * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
4  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
16  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
19  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
28 #include "ios/third_party/blink/src/html_tokenizer.h"
30 #include "html_markup_tokenizer_inlines.h"
32 namespace WebCore {
34 #define HTML_BEGIN_STATE(stateName) BEGIN_STATE(HTMLTokenizer, stateName)
35 #define HTML_RECONSUME_IN(stateName) RECONSUME_IN(HTMLTokenizer, stateName)
36 #define HTML_ADVANCE_TO(stateName) ADVANCE_TO(HTMLTokenizer, stateName)
37 #define HTML_SWITCH_TO(stateName) SWITCH_TO(HTMLTokenizer, stateName)
39 HTMLTokenizer::HTMLTokenizer()
40     : m_state(HTMLTokenizer::DataState)
41     , m_token(nullptr)
42     , m_additionalAllowedCharacter('\0')
43     , m_inputStreamPreprocessor(this)
47 HTMLTokenizer::~HTMLTokenizer()
51 void HTMLTokenizer::reset()
53     m_state = HTMLTokenizer::DataState;
54     m_token = 0;
55     m_additionalAllowedCharacter = '\0';
58 bool HTMLTokenizer::flushBufferedEndTag(CharacterProvider& source)
60     ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLToken::Uninitialized);
61     source.next();
62     if (m_token->type() == HTMLToken::Character)
63         return true;
65     return false;
68 #define FLUSH_AND_ADVANCE_TO(stateName)                                    \
69     do {                                                                   \
70         m_state = HTMLTokenizer::stateName;                           \
71         if (flushBufferedEndTag(source))                                   \
72             return true;                                                   \
73         if (source.isEmpty()                                               \
74             || !m_inputStreamPreprocessor.peek(source))                    \
75             return haveBufferedCharacterToken();                           \
76         cc = m_inputStreamPreprocessor.nextInputCharacter();               \
77         goto stateName;                                                    \
78     } while (false)
80 bool HTMLTokenizer::nextToken(CharacterProvider& source, HTMLToken& token)
82     // If we have a token in progress, then we're supposed to be called back
83     // with the same token so we can finish it.
84     ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitialized);
85     m_token = &token;
87     if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source))
88         return haveBufferedCharacterToken();
89     UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
91     // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0
92     switch (m_state) {
93     HTML_BEGIN_STATE(DataState) {
94         if (cc == '<') {
95             if (m_token->type() == HTMLToken::Character) {
96                 // We have a bunch of character tokens queued up that we
97                 // are emitting lazily here.
98                 return true;
99             }
100             HTML_ADVANCE_TO(TagOpenState);
101         } else if (cc == kEndOfFileMarker)
102             return emitEndOfFile(source);
103         else {
104             m_token->ensureIsCharacterToken();
105             HTML_ADVANCE_TO(DataState);
106         }
107     }
108     END_STATE()
110     HTML_BEGIN_STATE(TagOpenState) {
111         if (cc == '!')
112             HTML_ADVANCE_TO(MarkupDeclarationOpenState);
113         else if (cc == '/')
114             HTML_ADVANCE_TO(EndTagOpenState);
115         else if (isASCIIUpper(cc)) {
116             m_token->beginStartTag(toLowerCase(cc));
117             HTML_ADVANCE_TO(TagNameState);
118         } else if (isASCIILower(cc)) {
119             m_token->beginStartTag(cc);
120             HTML_ADVANCE_TO(TagNameState);
121         } else if (cc == '?') {
122             parseError();
123             // The spec consumes the current character before switching
124             // to the bogus comment state, but it's easier to implement
125             // if we reconsume the current character.
126             HTML_RECONSUME_IN(BogusCommentState);
127         } else {
128             parseError();
129             m_token->ensureIsCharacterToken();
130             HTML_RECONSUME_IN(DataState);
131         }
132     }
133     END_STATE()
135     HTML_BEGIN_STATE(EndTagOpenState) {
136         if (isASCIIUpper(cc)) {
137             m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc)));
138             HTML_ADVANCE_TO(TagNameState);
139         } else if (isASCIILower(cc)) {
140             m_token->beginEndTag(static_cast<LChar>(cc));
141             HTML_ADVANCE_TO(TagNameState);
142         } else if (cc == '>') {
143             parseError();
144             HTML_ADVANCE_TO(DataState);
145         } else if (cc == kEndOfFileMarker) {
146             parseError();
147             m_token->ensureIsCharacterToken();
148             HTML_RECONSUME_IN(DataState);
149         } else {
150             parseError();
151             HTML_RECONSUME_IN(BogusCommentState);
152         }
153     }
154     END_STATE()
156     HTML_BEGIN_STATE(TagNameState) {
157         if (isTokenizerWhitespace(cc))
158             HTML_ADVANCE_TO(BeforeAttributeNameState);
159         else if (cc == '/')
160             HTML_ADVANCE_TO(SelfClosingStartTagState);
161         else if (cc == '>')
162             return emitAndResumeIn(source, HTMLTokenizer::DataState);
163         else if (isASCIIUpper(cc)) {
164             m_token->appendToName(toLowerCase(cc));
165             HTML_ADVANCE_TO(TagNameState);
166         } else if (cc == kEndOfFileMarker) {
167             parseError();
168             HTML_RECONSUME_IN(DataState);
169         } else {
170             m_token->appendToName(cc);
171             HTML_ADVANCE_TO(TagNameState);
172         }
173     }
174     END_STATE()
176     HTML_BEGIN_STATE(BeforeAttributeNameState) {
177         if (isTokenizerWhitespace(cc))
178             HTML_ADVANCE_TO(BeforeAttributeNameState);
179         else if (cc == '/')
180             HTML_ADVANCE_TO(SelfClosingStartTagState);
181         else if (cc == '>')
182             return emitAndResumeIn(source, HTMLTokenizer::DataState);
183         else if (isASCIIUpper(cc)) {
184             HTML_ADVANCE_TO(AttributeNameState);
185         } else if (cc == kEndOfFileMarker) {
186             parseError();
187             HTML_RECONSUME_IN(DataState);
188         } else {
189             if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
190                 parseError();
191             HTML_ADVANCE_TO(AttributeNameState);
192         }
193     }
194     END_STATE()
196     HTML_BEGIN_STATE(AttributeNameState) {
197         if (isTokenizerWhitespace(cc)) {
198             HTML_ADVANCE_TO(AfterAttributeNameState);
199         } else if (cc == '/') {
200             HTML_ADVANCE_TO(SelfClosingStartTagState);
201         } else if (cc == '=') {
202             HTML_ADVANCE_TO(BeforeAttributeValueState);
203         } else if (cc == '>') {
204             return emitAndResumeIn(source, HTMLTokenizer::DataState);
205         } else if (isASCIIUpper(cc)) {
206             HTML_ADVANCE_TO(AttributeNameState);
207         } else if (cc == kEndOfFileMarker) {
208             parseError();
209             HTML_RECONSUME_IN(DataState);
210         } else {
211             if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
212                 parseError();
213             HTML_ADVANCE_TO(AttributeNameState);
214         }
215     }
216     END_STATE()
218     HTML_BEGIN_STATE(AfterAttributeNameState) {
219         if (isTokenizerWhitespace(cc))
220             HTML_ADVANCE_TO(AfterAttributeNameState);
221         else if (cc == '/')
222             HTML_ADVANCE_TO(SelfClosingStartTagState);
223         else if (cc == '=')
224             HTML_ADVANCE_TO(BeforeAttributeValueState);
225         else if (cc == '>')
226             return emitAndResumeIn(source, HTMLTokenizer::DataState);
227         else if (isASCIIUpper(cc)) {
228             HTML_ADVANCE_TO(AttributeNameState);
229         } else if (cc == kEndOfFileMarker) {
230             parseError();
231             HTML_RECONSUME_IN(DataState);
232         } else {
233             if (cc == '"' || cc == '\'' || cc == '<')
234                 parseError();
235             HTML_ADVANCE_TO(AttributeNameState);
236         }
237     }
238     END_STATE()
240     HTML_BEGIN_STATE(BeforeAttributeValueState) {
241         if (isTokenizerWhitespace(cc))
242             HTML_ADVANCE_TO(BeforeAttributeValueState);
243         else if (cc == '"') {
244             HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
245         } else if (cc == '&') {
246             HTML_RECONSUME_IN(AttributeValueUnquotedState);
247         } else if (cc == '\'') {
248             HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
249         } else if (cc == '>') {
250             parseError();
251             return emitAndResumeIn(source, HTMLTokenizer::DataState);
252         } else if (cc == kEndOfFileMarker) {
253             parseError();
254             HTML_RECONSUME_IN(DataState);
255         } else {
256             if (cc == '<' || cc == '=' || cc == '`')
257                 parseError();
258             HTML_ADVANCE_TO(AttributeValueUnquotedState);
259         }
260     }
261     END_STATE()
263     HTML_BEGIN_STATE(AttributeValueDoubleQuotedState) {
264         if (cc == '"') {
265             HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
266         } else if (cc == kEndOfFileMarker) {
267             parseError();
268             HTML_RECONSUME_IN(DataState);
269         } else {
270             HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
271         }
272     }
273     END_STATE()
275     HTML_BEGIN_STATE(AttributeValueSingleQuotedState) {
276         if (cc == '\'') {
277             HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
278         } else if (cc == kEndOfFileMarker) {
279             parseError();
280             HTML_RECONSUME_IN(DataState);
281         } else {
282             HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
283         }
284     }
285     END_STATE()
287     HTML_BEGIN_STATE(AttributeValueUnquotedState) {
288         if (isTokenizerWhitespace(cc)) {
289             HTML_ADVANCE_TO(BeforeAttributeNameState);
290         } else if (cc == '>') {
291             return emitAndResumeIn(source, HTMLTokenizer::DataState);
292         } else if (cc == kEndOfFileMarker) {
293             parseError();
294             HTML_RECONSUME_IN(DataState);
295         } else {
296             if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`')
297                 parseError();
298             HTML_ADVANCE_TO(AttributeValueUnquotedState);
299         }
300     }
301     END_STATE()
303     HTML_BEGIN_STATE(AfterAttributeValueQuotedState) {
304         if (isTokenizerWhitespace(cc))
305             HTML_ADVANCE_TO(BeforeAttributeNameState);
306         else if (cc == '/')
307             HTML_ADVANCE_TO(SelfClosingStartTagState);
308         else if (cc == '>')
309             return emitAndResumeIn(source, HTMLTokenizer::DataState);
310         else if (cc == kEndOfFileMarker) {
311             parseError();
312             HTML_RECONSUME_IN(DataState);
313         } else {
314             parseError();
315             HTML_RECONSUME_IN(BeforeAttributeNameState);
316         }
317     }
318     END_STATE()
320     HTML_BEGIN_STATE(SelfClosingStartTagState) {
321         if (cc == '>') {
322             return emitAndResumeIn(source, HTMLTokenizer::DataState);
323         } else if (cc == kEndOfFileMarker) {
324             parseError();
325             HTML_RECONSUME_IN(DataState);
326         } else {
327             parseError();
328             HTML_RECONSUME_IN(BeforeAttributeNameState);
329         }
330     }
331     END_STATE()
333     HTML_BEGIN_STATE(BogusCommentState) {
334         m_token->beginComment();
335         HTML_RECONSUME_IN(ContinueBogusCommentState);
336     }
337     END_STATE()
339     HTML_BEGIN_STATE(ContinueBogusCommentState) {
340         if (cc == '>')
341             return emitAndResumeIn(source, HTMLTokenizer::DataState);
342         else if (cc == kEndOfFileMarker)
343             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
344         else {
345             HTML_ADVANCE_TO(ContinueBogusCommentState);
346         }
347     }
348     END_STATE()
350     HTML_BEGIN_STATE(MarkupDeclarationOpenState) {
351         DEFINE_STATIC_LOCAL_STRING(dashDashString, "--");
352         DEFINE_STATIC_LOCAL_STRING(doctypeString, "doctype");
353         if (cc == '-') {
354             if (source.startsWith(dashDashString, dashDashStringLength)) {
355                 advanceAndASSERT(source, '-');
356                 advanceAndASSERT(source, '-');
357                 m_token->beginComment();
358                 HTML_SWITCH_TO(CommentStartState);
359             } else if (source.remainingBytes() < dashDashStringLength)
360                 return haveBufferedCharacterToken();
361         } else if (cc == 'D' || cc == 'd') {
362             if (source.startsWith(doctypeString, doctypeStringLength, true)) {
363                 advanceStringAndASSERTIgnoringCase(source, doctypeString);
364                 HTML_SWITCH_TO(DOCTYPEState);
365             } else if (source.remainingBytes() < doctypeStringLength)
366                 return haveBufferedCharacterToken();
367         }
368         parseError();
369         HTML_RECONSUME_IN(BogusCommentState);
370     }
371     END_STATE()
373     HTML_BEGIN_STATE(CommentStartState) {
374         if (cc == '-')
375             HTML_ADVANCE_TO(CommentStartDashState);
376         else if (cc == '>') {
377             parseError();
378             return emitAndResumeIn(source, HTMLTokenizer::DataState);
379         } else if (cc == kEndOfFileMarker) {
380             parseError();
381             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
382         } else {
383             HTML_ADVANCE_TO(CommentState);
384         }
385     }
386     END_STATE()
388     HTML_BEGIN_STATE(CommentStartDashState) {
389         if (cc == '-')
390             HTML_ADVANCE_TO(CommentEndState);
391         else if (cc == '>') {
392             parseError();
393             return emitAndResumeIn(source, HTMLTokenizer::DataState);
394         } else if (cc == kEndOfFileMarker) {
395             parseError();
396             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
397         } else {
398             HTML_ADVANCE_TO(CommentState);
399         }
400     }
401     END_STATE()
403     HTML_BEGIN_STATE(CommentState) {
404         if (cc == '-')
405             HTML_ADVANCE_TO(CommentEndDashState);
406         else if (cc == kEndOfFileMarker) {
407             parseError();
408             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
409         } else {
410             HTML_ADVANCE_TO(CommentState);
411         }
412     }
413     END_STATE()
415     HTML_BEGIN_STATE(CommentEndDashState) {
416         if (cc == '-')
417             HTML_ADVANCE_TO(CommentEndState);
418         else if (cc == kEndOfFileMarker) {
419             parseError();
420             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
421         } else {
422             HTML_ADVANCE_TO(CommentState);
423         }
424     }
425     END_STATE()
427     HTML_BEGIN_STATE(CommentEndState) {
428         if (cc == '>')
429             return emitAndResumeIn(source, HTMLTokenizer::DataState);
430         else if (cc == '!') {
431             parseError();
432             HTML_ADVANCE_TO(CommentEndBangState);
433         } else if (cc == '-') {
434             parseError();
435             HTML_ADVANCE_TO(CommentEndState);
436         } else if (cc == kEndOfFileMarker) {
437             parseError();
438             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
439         } else {
440             parseError();
441             HTML_ADVANCE_TO(CommentState);
442         }
443     }
444     END_STATE()
446     HTML_BEGIN_STATE(CommentEndBangState) {
447         if (cc == '-') {
448             HTML_ADVANCE_TO(CommentEndDashState);
449         } else if (cc == '>')
450             return emitAndResumeIn(source, HTMLTokenizer::DataState);
451         else if (cc == kEndOfFileMarker) {
452             parseError();
453             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
454         } else {
455             HTML_ADVANCE_TO(CommentState);
456         }
457     }
458     END_STATE()
460     HTML_BEGIN_STATE(DOCTYPEState) {
461         if (isTokenizerWhitespace(cc))
462             HTML_ADVANCE_TO(BeforeDOCTYPENameState);
463         else if (cc == kEndOfFileMarker) {
464             parseError();
465             m_token->beginDOCTYPE();
466             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
467         } else {
468             parseError();
469             HTML_RECONSUME_IN(BeforeDOCTYPENameState);
470         }
471     }
472     END_STATE()
474     HTML_BEGIN_STATE(BeforeDOCTYPENameState) {
475         if (isTokenizerWhitespace(cc))
476             HTML_ADVANCE_TO(BeforeDOCTYPENameState);
477         else if (cc == '>') {
478             parseError();
479             m_token->beginDOCTYPE();
480             return emitAndResumeIn(source, HTMLTokenizer::DataState);
481         } else if (cc == kEndOfFileMarker) {
482             parseError();
483             m_token->beginDOCTYPE();
484             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
485         } else {
486             m_token->beginDOCTYPE();
487             HTML_ADVANCE_TO(DOCTYPENameState);
488         }
489     }
490     END_STATE()
492     HTML_BEGIN_STATE(DOCTYPENameState) {
493         if (isTokenizerWhitespace(cc))
494             HTML_ADVANCE_TO(AfterDOCTYPENameState);
495         else if (cc == '>')
496             return emitAndResumeIn(source, HTMLTokenizer::DataState);
497         else if (cc == kEndOfFileMarker) {
498             parseError();
499             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
500         } else {
501             HTML_ADVANCE_TO(DOCTYPENameState);
502         }
503     }
504     END_STATE()
506     HTML_BEGIN_STATE(AfterDOCTYPENameState) {
507         if (isTokenizerWhitespace(cc))
508             HTML_ADVANCE_TO(AfterDOCTYPENameState);
509         if (cc == '>')
510             return emitAndResumeIn(source, HTMLTokenizer::DataState);
511         else if (cc == kEndOfFileMarker) {
512             parseError();
513             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
514         } else {
515             DEFINE_STATIC_LOCAL_STRING(publicString, "public");
516             DEFINE_STATIC_LOCAL_STRING(systemString, "system");
517             if (cc == 'P' || cc == 'p') {
518                 if (source.startsWith(publicString, publicStringLength, true)) {
519                     advanceStringAndASSERTIgnoringCase(source, publicString);
520                     HTML_SWITCH_TO(AfterDOCTYPEPublicKeywordState);
521                 } else if (source.remainingBytes() < publicStringLength)
522                     return haveBufferedCharacterToken();
523             } else if (cc == 'S' || cc == 's') {
524                 if (source.startsWith(systemString, systemStringLength, true)) {
525                     advanceStringAndASSERTIgnoringCase(source, systemString);
526                     HTML_SWITCH_TO(AfterDOCTYPESystemKeywordState);
527                 } else if (source.remainingBytes() < systemStringLength)
528                     return haveBufferedCharacterToken();
529             }
530             parseError();
531             HTML_ADVANCE_TO(BogusDOCTYPEState);
532         }
533     }
534     END_STATE()
536     HTML_BEGIN_STATE(AfterDOCTYPEPublicKeywordState) {
537         if (isTokenizerWhitespace(cc))
538             HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
539         else if (cc == '"') {
540             parseError();
541             HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
542         } else if (cc == '\'') {
543             parseError();
544             HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
545         } else if (cc == '>') {
546             parseError();
547             return emitAndResumeIn(source, HTMLTokenizer::DataState);
548         } else if (cc == kEndOfFileMarker) {
549             parseError();
550             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
551         } else {
552             parseError();
553             HTML_ADVANCE_TO(BogusDOCTYPEState);
554         }
555     }
556     END_STATE()
558     HTML_BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) {
559         if (isTokenizerWhitespace(cc))
560             HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
561         else if (cc == '"') {
562             HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
563         } else if (cc == '\'') {
564             HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
565         } else if (cc == '>') {
566             parseError();
567             return emitAndResumeIn(source, HTMLTokenizer::DataState);
568         } else if (cc == kEndOfFileMarker) {
569             parseError();
570             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
571         } else {
572             parseError();
573             HTML_ADVANCE_TO(BogusDOCTYPEState);
574         }
575     }
576     END_STATE()
578     HTML_BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) {
579         if (cc == '"')
580             HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
581         else if (cc == '>') {
582             parseError();
583             return emitAndResumeIn(source, HTMLTokenizer::DataState);
584         } else if (cc == kEndOfFileMarker) {
585             parseError();
586             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
587         } else {
588             HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
589         }
590     }
591     END_STATE()
593     HTML_BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) {
594         if (cc == '\'')
595             HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
596         else if (cc == '>') {
597             parseError();
598             return emitAndResumeIn(source, HTMLTokenizer::DataState);
599         } else if (cc == kEndOfFileMarker) {
600             parseError();
601             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
602         } else {
603             HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
604         }
605     }
606     END_STATE()
608     HTML_BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) {
609         if (isTokenizerWhitespace(cc))
610             HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
611         else if (cc == '>')
612             return emitAndResumeIn(source, HTMLTokenizer::DataState);
613         else if (cc == '"') {
614             parseError();
615             HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
616         } else if (cc == '\'') {
617             parseError();
618             HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
619         } else if (cc == kEndOfFileMarker) {
620             parseError();
621             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
622         } else {
623             parseError();
624             HTML_ADVANCE_TO(BogusDOCTYPEState);
625         }
626     }
627     END_STATE()
629     HTML_BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState) {
630         if (isTokenizerWhitespace(cc))
631             HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
632         else if (cc == '>')
633             return emitAndResumeIn(source, HTMLTokenizer::DataState);
634         else if (cc == '"') {
635             HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
636         } else if (cc == '\'') {
637             HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
638         } else if (cc == kEndOfFileMarker) {
639             parseError();
640             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
641         } else {
642             parseError();
643             HTML_ADVANCE_TO(BogusDOCTYPEState);
644         }
645     }
646     END_STATE()
648     HTML_BEGIN_STATE(AfterDOCTYPESystemKeywordState) {
649         if (isTokenizerWhitespace(cc))
650             HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
651         else if (cc == '"') {
652             parseError();
653             HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
654         } else if (cc == '\'') {
655             parseError();
656             HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
657         } else if (cc == '>') {
658             parseError();
659             return emitAndResumeIn(source, HTMLTokenizer::DataState);
660         } else if (cc == kEndOfFileMarker) {
661             parseError();
662             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
663         } else {
664             parseError();
665             HTML_ADVANCE_TO(BogusDOCTYPEState);
666         }
667     }
668     END_STATE()
670     HTML_BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) {
671         if (isTokenizerWhitespace(cc))
672             HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
673         if (cc == '"') {
674             HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
675         } else if (cc == '\'') {
676             HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
677         } else if (cc == '>') {
678             parseError();
679             return emitAndResumeIn(source, HTMLTokenizer::DataState);
680         } else if (cc == kEndOfFileMarker) {
681             parseError();
682             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
683         } else {
684             parseError();
685             HTML_ADVANCE_TO(BogusDOCTYPEState);
686         }
687     }
688     END_STATE()
690     HTML_BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) {
691         if (cc == '"')
692             HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
693         else if (cc == '>') {
694             parseError();
695             return emitAndResumeIn(source, HTMLTokenizer::DataState);
696         } else if (cc == kEndOfFileMarker) {
697             parseError();
698             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
699         } else {
700             HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
701         }
702     }
703     END_STATE()
705     HTML_BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) {
706         if (cc == '\'')
707             HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
708         else if (cc == '>') {
709             parseError();
710             return emitAndResumeIn(source, HTMLTokenizer::DataState);
711         } else if (cc == kEndOfFileMarker) {
712             parseError();
713             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
714         } else {
715             HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
716         }
717     }
718     END_STATE()
720     HTML_BEGIN_STATE(AfterDOCTYPESystemIdentifierState) {
721         if (isTokenizerWhitespace(cc))
722             HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
723         else if (cc == '>')
724             return emitAndResumeIn(source, HTMLTokenizer::DataState);
725         else if (cc == kEndOfFileMarker) {
726             parseError();
727             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
728         } else {
729             parseError();
730             HTML_ADVANCE_TO(BogusDOCTYPEState);
731         }
732     }
733     END_STATE()
735     HTML_BEGIN_STATE(BogusDOCTYPEState) {
736         if (cc == '>')
737             return emitAndResumeIn(source, HTMLTokenizer::DataState);
738         else if (cc == kEndOfFileMarker)
739             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
740         HTML_ADVANCE_TO(BogusDOCTYPEState);
741     }
742     END_STATE()
744     HTML_BEGIN_STATE(CDATASectionState) {
745         if (cc == ']')
746             HTML_ADVANCE_TO(CDATASectionRightSquareBracketState);
747         else if (cc == kEndOfFileMarker)
748             HTML_RECONSUME_IN(DataState);
749         else {
750             m_token->ensureIsCharacterToken();
751             HTML_ADVANCE_TO(CDATASectionState);
752         }
753     }
754     END_STATE()
756     HTML_BEGIN_STATE(CDATASectionRightSquareBracketState) {
757         if (cc == ']')
758             HTML_ADVANCE_TO(CDATASectionDoubleRightSquareBracketState);
759         else {
760             m_token->ensureIsCharacterToken();
761             HTML_RECONSUME_IN(CDATASectionState);
762         }
763     }
764     END_STATE()
766     HTML_BEGIN_STATE(CDATASectionDoubleRightSquareBracketState) {
767         if (cc == '>')
768             HTML_ADVANCE_TO(DataState);
769         else {
770             m_token->ensureIsCharacterToken();
771             HTML_RECONSUME_IN(CDATASectionState);
772         }
773     }
774     END_STATE()
776     }
778     ASSERT_NOT_REACHED();
779     return false;
782 inline void HTMLTokenizer::parseError()
784     notImplemented();