1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef BASE_JSON_JSON_PARSER_H_
6 #define BASE_JSON_JSON_PARSER_H_
10 #include "base/base_export.h"
11 #include "base/basictypes.h"
12 #include "base/compiler_specific.h"
13 #include "base/json/json_reader.h"
14 #include "base/strings/string_piece.h"
16 #if !defined(OS_CHROMEOS)
17 #include "base/gtest_prod_util.h"
24 #if defined(OS_CHROMEOS)
25 // Chromium and Chromium OS check out gtest to different places, so this is
26 // unable to compile on both if gtest_prod.h is included here. Instead, include
27 // its only contents -- this will need to be updated if the macro ever changes.
28 #define FRIEND_TEST(test_case_name, test_name)\
29 friend class test_case_name##_##test_name##_Test
31 #define FRIEND_TEST_ALL_PREFIXES(test_case_name, test_name) \
32 FRIEND_TEST(test_case_name, test_name); \
33 FRIEND_TEST(test_case_name, DISABLED_##test_name); \
34 FRIEND_TEST(test_case_name, FLAKY_##test_name)
42 // The implementation behind the JSONReader interface. This class is not meant
43 // to be used directly; it encapsulates logic that need not be exposed publicly.
45 // This parser guarantees O(n) time through the input string. It also optimizes
46 // base::StringValue by using StringPiece where possible when returning Value
47 // objects by using "hidden roots," discussed in the implementation.
49 // Iteration happens on the byte level, with the functions CanConsume and
50 // NextChar. The conversion from byte to JSON token happens without advancing
51 // the parser in GetNextToken/ParseToken, that is tokenization operates on
52 // the current parser position without advancing.
54 // Built on top of these are a family of Consume functions that iterate
55 // internally. Invariant: on entry of a Consume function, the parser is wound
56 // to the first byte of a valid JSON token. On exit, it is on the last byte
57 // of a token, such that the next iteration of the parser will be at the byte
58 // immediately following the token, which would likely be the first byte of the
60 class BASE_EXPORT_PRIVATE JSONParser
{
62 explicit JSONParser(int options
);
65 // Parses the input string according to the set options and returns the
66 // result as a Value owned by the caller.
67 Value
* Parse(const StringPiece
& input
);
69 // Returns the error code.
70 JSONReader::JsonParseError
error_code() const;
72 // Returns the human-friendly error message.
73 std::string
GetErrorMessage() const;
84 T_BOOL_FALSE
, // false
86 T_LIST_SEPARATOR
, // ,
87 T_OBJECT_PAIR_SEPARATOR
, // :
92 // A helper class used for parsing strings. One optimization performed is to
93 // create base::Value with a StringPiece to avoid unnecessary std::string
94 // copies. This is not possible if the input string needs to be decoded from
95 // UTF-16 to UTF-8, or if an escape sequence causes characters to be skipped.
96 // This class centralizes that logic.
99 // Empty constructor. Used for creating a builder with which to Swap().
102 // |pos| is the beginning of an input string, excluding the |"|.
103 explicit StringBuilder(const char* pos
);
107 // Swaps the contents of |other| with this.
108 void Swap(StringBuilder
* other
);
110 // Either increases the |length_| of the string or copies the character if
111 // the StringBuilder has been converted. |c| must be in the basic ASCII
112 // plane; all other characters need to be in UTF-8 units, appended with
113 // AppendString below.
114 void Append(const char& c
);
116 // Appends a string to the std::string. Must be Convert()ed to use.
117 void AppendString(const std::string
& str
);
119 // Converts the builder from its default StringPiece to a full std::string,
120 // performing a copy. Once a builder is converted, it cannot be made a
121 // StringPiece again.
124 // Returns whether the builder can be converted to a StringPiece.
125 bool CanBeStringPiece() const;
127 // Returns the StringPiece representation. Returns an empty piece if it
128 // cannot be converted.
129 StringPiece
AsStringPiece();
131 // Returns the builder as a std::string.
132 const std::string
& AsString();
135 // The beginning of the input string.
138 // Number of bytes in |pos_| that make up the string being built.
141 // The copied string representation. NULL until Convert() is called.
142 // Strong. scoped_ptr<T> has too much of an overhead here.
143 std::string
* string_
;
146 // Quick check that the stream has capacity to consume |length| more bytes.
147 bool CanConsume(int length
);
149 // The basic way to consume a single character in the stream. Consumes one
150 // byte of the input stream and returns a pointer to the rest of it.
151 const char* NextChar();
153 // Performs the equivalent of NextChar N times.
154 void NextNChars(int n
);
156 // Skips over whitespace and comments to find the next token in the stream.
157 // This does not advance the parser for non-whitespace or comment chars.
158 Token
GetNextToken();
160 // Consumes whitespace characters and comments until the next non-that is
162 void EatWhitespaceAndComments();
163 // Helper function that consumes a comment, assuming that the parser is
164 // currently wound to a '/'.
167 // Calls GetNextToken() and then ParseToken(). Caller owns the result.
168 Value
* ParseNextToken();
170 // Takes a token that represents the start of a Value ("a structural token"
171 // in RFC terms) and consumes it, returning the result as an object the
173 Value
* ParseToken(Token token
);
175 // Assuming that the parser is currently wound to '{', this parses a JSON
176 // object into a DictionaryValue.
177 Value
* ConsumeDictionary();
179 // Assuming that the parser is wound to '[', this parses a JSON list into a
181 Value
* ConsumeList();
183 // Calls through ConsumeStringRaw and wraps it in a value.
184 Value
* ConsumeString();
186 // Assuming that the parser is wound to a double quote, this parses a string,
187 // decoding any escape sequences and converts UTF-16 to UTF-8. Returns true on
188 // success and Swap()s the result into |out|. Returns false on failure with
189 // error information set.
190 bool ConsumeStringRaw(StringBuilder
* out
);
191 // Helper function for ConsumeStringRaw() that consumes the next four or 10
192 // bytes (parser is wound to the first character of a HEX sequence, with the
193 // potential for consuming another \uXXXX for a surrogate). Returns true on
194 // success and places the UTF8 code units in |dest_string|, and false on
196 bool DecodeUTF16(std::string
* dest_string
);
197 // Helper function for ConsumeStringRaw() that takes a single code point,
198 // decodes it into UTF-8 units, and appends it to the given builder. The
199 // point must be valid.
200 void DecodeUTF8(const int32
& point
, StringBuilder
* dest
);
202 // Assuming that the parser is wound to the start of a valid JSON number,
203 // this parses and converts it to either an int or double value.
204 Value
* ConsumeNumber();
205 // Helper that reads characters that are ints. Returns true if a number was
206 // read and false on error.
207 bool ReadInt(bool allow_leading_zeros
);
209 // Consumes the literal values of |true|, |false|, and |null|, assuming the
210 // parser is wound to the first character of any of those.
211 Value
* ConsumeLiteral();
213 // Compares two string buffers of a given length.
214 static bool StringsAreEqual(const char* left
, const char* right
, size_t len
);
216 // Sets the error information to |code| at the current column, based on
217 // |index_| and |index_last_line_|, with an optional positive/negative
218 // adjustment by |column_adjust|.
219 void ReportError(JSONReader::JsonParseError code
, int column_adjust
);
221 // Given the line and column number of an error, formats one of the error
222 // message contants from json_reader.h for human display.
223 static std::string
FormatErrorMessage(int line
, int column
,
224 const std::string
& description
);
226 // base::JSONParserOptions that control parsing.
229 // Pointer to the start of the input data.
230 const char* start_pos_
;
232 // Pointer to the current position in the input data. Equivalent to
233 // |start_pos_ + index_|.
236 // Pointer to the last character of the input data.
237 const char* end_pos_
;
239 // The index in the input stream to which the parser is wound.
242 // The number of times the parser has recursed (current stack depth).
245 // The line number that the parser is at currently.
248 // The last value of |index_| on the previous line.
249 int index_last_line_
;
251 // Error information.
252 JSONReader::JsonParseError error_code_
;
256 friend class JSONParserTest
;
257 FRIEND_TEST_ALL_PREFIXES(JSONParserTest
, NextChar
);
258 FRIEND_TEST_ALL_PREFIXES(JSONParserTest
, ConsumeDictionary
);
259 FRIEND_TEST_ALL_PREFIXES(JSONParserTest
, ConsumeList
);
260 FRIEND_TEST_ALL_PREFIXES(JSONParserTest
, ConsumeString
);
261 FRIEND_TEST_ALL_PREFIXES(JSONParserTest
, ConsumeLiterals
);
262 FRIEND_TEST_ALL_PREFIXES(JSONParserTest
, ConsumeNumbers
);
263 FRIEND_TEST_ALL_PREFIXES(JSONParserTest
, ErrorMessages
);
265 DISALLOW_COPY_AND_ASSIGN(JSONParser
);
268 } // namespace internal
271 #endif // BASE_JSON_JSON_PARSER_H_