Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / ios / third_party / blink / src / html_input_stream_preprocessor.h
blobd46ed643724b702cb21be009e074c25ab210bc4f
1 /*
2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
4 * Copyright (C) 2013 Google, Inc. All Rights Reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #ifndef InputStreamPreprocessor_h
29 #define InputStreamPreprocessor_h
31 #include "html_character_provider.h"
33 namespace WebCore {
35 // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream
36 template <typename Tokenizer>
37 class InputStreamPreprocessor {
38 WTF_MAKE_NONCOPYABLE(InputStreamPreprocessor);
39 public:
40 InputStreamPreprocessor(Tokenizer* tokenizer)
41 : m_tokenizer(tokenizer)
43 reset();
46 ALWAYS_INLINE UChar nextInputCharacter() const { return m_nextInputCharacter; }
48 // Returns whether we succeeded in peeking at the next character.
49 // The only way we can fail to peek is if there are no more
50 // characters in |source| (after collapsing \r\n, etc).
51 ALWAYS_INLINE bool peek(CharacterProvider& source)
53 m_nextInputCharacter = source.currentCharacter();
55 // Every branch in this function is expensive, so we have a
56 // fast-reject branch for characters that don't require special
57 // handling. Please run the parser benchmark whenever you touch
58 // this function. It's very hot.
59 static const UChar specialCharacterMask = '\n' | '\r' | '\0';
60 if (m_nextInputCharacter & ~specialCharacterMask) {
61 m_skipNextNewLine = false;
62 return true;
64 return processNextInputCharacter(source);
67 // Returns whether there are more characters in |source| after advancing.
68 ALWAYS_INLINE bool advance(CharacterProvider& source)
70 source.next();
71 if (source.isEmpty())
72 return false;
73 return peek(source);
76 void reset(bool skipNextNewLine = false)
78 m_nextInputCharacter = '\0';
79 m_skipNextNewLine = skipNextNewLine;
82 private:
83 bool processNextInputCharacter(CharacterProvider& source)
85 ProcessAgain:
86 ASSERT(m_nextInputCharacter == source.currentCharacter());
88 if (m_nextInputCharacter == '\n' && m_skipNextNewLine) {
89 m_skipNextNewLine = false;
90 source.next();
91 if (source.isEmpty())
92 return false;
93 m_nextInputCharacter = source.currentCharacter();
95 if (m_nextInputCharacter == '\r') {
96 m_nextInputCharacter = '\n';
97 m_skipNextNewLine = true;
98 } else {
99 m_skipNextNewLine = false;
100 // FIXME: The spec indicates that the surrogate pair range as well as
101 // a number of specific character values are parse errors and should be replaced
102 // by the replacement character. We suspect this is a problem with the spec as doing
103 // that filtering breaks surrogate pair handling and causes us not to match Minefield.
104 if (m_nextInputCharacter == '\0' && !shouldTreatNullAsEndOfFileMarker(source)) {
105 if (m_tokenizer->shouldSkipNullCharacters()) {
106 source.next();
107 if (source.isEmpty())
108 return false;
109 m_nextInputCharacter = source.currentCharacter();
110 goto ProcessAgain;
112 m_nextInputCharacter = 0xFFFD;
115 return true;
118 bool shouldTreatNullAsEndOfFileMarker(CharacterProvider& source) const
120 return source.remainingBytes() == 1;
123 Tokenizer* m_tokenizer;
125 // http://www.whatwg.org/specs/web-apps/current-work/#next-input-character
126 UChar m_nextInputCharacter;
127 bool m_skipNextNewLine;
132 #endif // InputStreamPreprocessor_h