Roll src/third_party/WebKit eac3800:0237a66 (svn 202606:202607)
[chromium-blink-merge.git] / base / json / json_parser.cc
blob9be690a6287009e3d1983f5c43abcf4abb416d99
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/json/json_parser.h"
7 #include <cmath>
9 #include "base/logging.h"
10 #include "base/memory/scoped_ptr.h"
11 #include "base/strings/string_number_conversions.h"
12 #include "base/strings/string_piece.h"
13 #include "base/strings/string_util.h"
14 #include "base/strings/stringprintf.h"
15 #include "base/strings/utf_string_conversion_utils.h"
16 #include "base/strings/utf_string_conversions.h"
17 #include "base/third_party/icu/icu_utf.h"
18 #include "base/values.h"
20 namespace base {
21 namespace internal {
23 namespace {
25 const int kStackMaxDepth = 100;
27 const int32 kExtendedASCIIStart = 0x80;
29 // This and the class below are used to own the JSON input string for when
30 // string tokens are stored as StringPiece instead of std::string. This
31 // optimization avoids about 2/3rds of string memory copies. The constructor
32 // takes ownership of the input string. The real root value is Swap()ed into
33 // the new instance.
34 class DictionaryHiddenRootValue : public DictionaryValue {
35 public:
36 DictionaryHiddenRootValue(std::string* json, Value* root) : json_(json) {
37 DCHECK(root->IsType(Value::TYPE_DICTIONARY));
38 DictionaryValue::Swap(static_cast<DictionaryValue*>(root));
41 void Swap(DictionaryValue* other) override {
42 DVLOG(1) << "Swap()ing a DictionaryValue inefficiently.";
44 // First deep copy to convert JSONStringValue to std::string and swap that
45 // copy with |other|, which contains the new contents of |this|.
46 scoped_ptr<DictionaryValue> copy(DeepCopy());
47 copy->Swap(other);
49 // Then erase the contents of the current dictionary and swap in the
50 // new contents, originally from |other|.
51 Clear();
52 json_.reset();
53 DictionaryValue::Swap(copy.get());
56 // Not overriding DictionaryValue::Remove because it just calls through to
57 // the method below.
59 bool RemoveWithoutPathExpansion(const std::string& key,
60 scoped_ptr<Value>* out) override {
61 // If the caller won't take ownership of the removed value, just call up.
62 if (!out)
63 return DictionaryValue::RemoveWithoutPathExpansion(key, out);
65 DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently.";
67 // Otherwise, remove the value while its still "owned" by this and copy it
68 // to convert any JSONStringValues to std::string.
69 scoped_ptr<Value> out_owned;
70 if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned))
71 return false;
73 out->reset(out_owned->DeepCopy());
75 return true;
78 private:
79 scoped_ptr<std::string> json_;
81 DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue);
84 class ListHiddenRootValue : public ListValue {
85 public:
86 ListHiddenRootValue(std::string* json, Value* root) : json_(json) {
87 DCHECK(root->IsType(Value::TYPE_LIST));
88 ListValue::Swap(static_cast<ListValue*>(root));
91 void Swap(ListValue* other) override {
92 DVLOG(1) << "Swap()ing a ListValue inefficiently.";
94 // First deep copy to convert JSONStringValue to std::string and swap that
95 // copy with |other|, which contains the new contents of |this|.
96 scoped_ptr<ListValue> copy(DeepCopy());
97 copy->Swap(other);
99 // Then erase the contents of the current list and swap in the new contents,
100 // originally from |other|.
101 Clear();
102 json_.reset();
103 ListValue::Swap(copy.get());
106 bool Remove(size_t index, scoped_ptr<Value>* out) override {
107 // If the caller won't take ownership of the removed value, just call up.
108 if (!out)
109 return ListValue::Remove(index, out);
111 DVLOG(1) << "Remove()ing from a ListValue inefficiently.";
113 // Otherwise, remove the value while its still "owned" by this and copy it
114 // to convert any JSONStringValues to std::string.
115 scoped_ptr<Value> out_owned;
116 if (!ListValue::Remove(index, &out_owned))
117 return false;
119 out->reset(out_owned->DeepCopy());
121 return true;
124 private:
125 scoped_ptr<std::string> json_;
127 DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue);
130 // A variant on StringValue that uses StringPiece instead of copying the string
131 // into the Value. This can only be stored in a child of hidden root (above),
132 // otherwise the referenced string will not be guaranteed to outlive it.
133 class JSONStringValue : public Value {
134 public:
135 explicit JSONStringValue(const StringPiece& piece)
136 : Value(TYPE_STRING),
137 string_piece_(piece) {
140 // Overridden from Value:
141 bool GetAsString(std::string* out_value) const override {
142 string_piece_.CopyToString(out_value);
143 return true;
145 bool GetAsString(string16* out_value) const override {
146 *out_value = UTF8ToUTF16(string_piece_);
147 return true;
149 Value* DeepCopy() const override {
150 return new StringValue(string_piece_.as_string());
152 bool Equals(const Value* other) const override {
153 std::string other_string;
154 return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) &&
155 StringPiece(other_string) == string_piece_;
158 private:
159 // The location in the original input stream.
160 StringPiece string_piece_;
162 DISALLOW_COPY_AND_ASSIGN(JSONStringValue);
165 // Simple class that checks for maximum recursion/"stack overflow."
166 class StackMarker {
167 public:
168 explicit StackMarker(int* depth) : depth_(depth) {
169 ++(*depth_);
170 DCHECK_LE(*depth_, kStackMaxDepth);
172 ~StackMarker() {
173 --(*depth_);
176 bool IsTooDeep() const {
177 return *depth_ >= kStackMaxDepth;
180 private:
181 int* const depth_;
183 DISALLOW_COPY_AND_ASSIGN(StackMarker);
186 } // namespace
188 JSONParser::JSONParser(int options)
189 : options_(options),
190 start_pos_(NULL),
191 pos_(NULL),
192 end_pos_(NULL),
193 index_(0),
194 stack_depth_(0),
195 line_number_(0),
196 index_last_line_(0),
197 error_code_(JSONReader::JSON_NO_ERROR),
198 error_line_(0),
199 error_column_(0) {
202 JSONParser::~JSONParser() {
205 Value* JSONParser::Parse(const StringPiece& input) {
206 scoped_ptr<std::string> input_copy;
207 // If the children of a JSON root can be detached, then hidden roots cannot
208 // be used, so do not bother copying the input because StringPiece will not
209 // be used anywhere.
210 if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
211 input_copy.reset(new std::string(input.as_string()));
212 start_pos_ = input_copy->data();
213 } else {
214 start_pos_ = input.data();
216 pos_ = start_pos_;
217 end_pos_ = start_pos_ + input.length();
218 index_ = 0;
219 line_number_ = 1;
220 index_last_line_ = 0;
222 error_code_ = JSONReader::JSON_NO_ERROR;
223 error_line_ = 0;
224 error_column_ = 0;
226 // When the input JSON string starts with a UTF-8 Byte-Order-Mark
227 // <0xEF 0xBB 0xBF>, advance the start position to avoid the
228 // ParseNextToken function mis-treating a Unicode BOM as an invalid
229 // character and returning NULL.
230 if (CanConsume(3) && static_cast<uint8>(*pos_) == 0xEF &&
231 static_cast<uint8>(*(pos_ + 1)) == 0xBB &&
232 static_cast<uint8>(*(pos_ + 2)) == 0xBF) {
233 NextNChars(3);
236 // Parse the first and any nested tokens.
237 scoped_ptr<Value> root(ParseNextToken());
238 if (!root.get())
239 return NULL;
241 // Make sure the input stream is at an end.
242 if (GetNextToken() != T_END_OF_INPUT) {
243 if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) {
244 ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);
245 return NULL;
249 // Dictionaries and lists can contain JSONStringValues, so wrap them in a
250 // hidden root.
251 if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
252 if (root->IsType(Value::TYPE_DICTIONARY)) {
253 return new DictionaryHiddenRootValue(input_copy.release(), root.get());
254 } else if (root->IsType(Value::TYPE_LIST)) {
255 return new ListHiddenRootValue(input_copy.release(), root.get());
256 } else if (root->IsType(Value::TYPE_STRING)) {
257 // A string type could be a JSONStringValue, but because there's no
258 // corresponding HiddenRootValue, the memory will be lost. Deep copy to
259 // preserve it.
260 return root->DeepCopy();
264 // All other values can be returned directly.
265 return root.release();
268 JSONReader::JsonParseError JSONParser::error_code() const {
269 return error_code_;
272 std::string JSONParser::GetErrorMessage() const {
273 return FormatErrorMessage(error_line_, error_column_,
274 JSONReader::ErrorCodeToString(error_code_));
277 // StringBuilder ///////////////////////////////////////////////////////////////
279 JSONParser::StringBuilder::StringBuilder()
280 : pos_(NULL),
281 length_(0),
282 string_(NULL) {
285 JSONParser::StringBuilder::StringBuilder(const char* pos)
286 : pos_(pos),
287 length_(0),
288 string_(NULL) {
291 void JSONParser::StringBuilder::Swap(StringBuilder* other) {
292 std::swap(other->string_, string_);
293 std::swap(other->pos_, pos_);
294 std::swap(other->length_, length_);
297 JSONParser::StringBuilder::~StringBuilder() {
298 delete string_;
301 void JSONParser::StringBuilder::Append(const char& c) {
302 DCHECK_GE(c, 0);
303 DCHECK_LT(c, 128);
305 if (string_)
306 string_->push_back(c);
307 else
308 ++length_;
311 void JSONParser::StringBuilder::AppendString(const std::string& str) {
312 DCHECK(string_);
313 string_->append(str);
316 void JSONParser::StringBuilder::Convert() {
317 if (string_)
318 return;
319 string_ = new std::string(pos_, length_);
322 bool JSONParser::StringBuilder::CanBeStringPiece() const {
323 return !string_;
326 StringPiece JSONParser::StringBuilder::AsStringPiece() {
327 if (string_)
328 return StringPiece();
329 return StringPiece(pos_, length_);
332 const std::string& JSONParser::StringBuilder::AsString() {
333 if (!string_)
334 Convert();
335 return *string_;
338 // JSONParser private //////////////////////////////////////////////////////////
340 inline bool JSONParser::CanConsume(int length) {
341 return pos_ + length <= end_pos_;
344 const char* JSONParser::NextChar() {
345 DCHECK(CanConsume(1));
346 ++index_;
347 ++pos_;
348 return pos_;
351 void JSONParser::NextNChars(int n) {
352 DCHECK(CanConsume(n));
353 index_ += n;
354 pos_ += n;
357 JSONParser::Token JSONParser::GetNextToken() {
358 EatWhitespaceAndComments();
359 if (!CanConsume(1))
360 return T_END_OF_INPUT;
362 switch (*pos_) {
363 case '{':
364 return T_OBJECT_BEGIN;
365 case '}':
366 return T_OBJECT_END;
367 case '[':
368 return T_ARRAY_BEGIN;
369 case ']':
370 return T_ARRAY_END;
371 case '"':
372 return T_STRING;
373 case '0':
374 case '1':
375 case '2':
376 case '3':
377 case '4':
378 case '5':
379 case '6':
380 case '7':
381 case '8':
382 case '9':
383 case '-':
384 return T_NUMBER;
385 case 't':
386 return T_BOOL_TRUE;
387 case 'f':
388 return T_BOOL_FALSE;
389 case 'n':
390 return T_NULL;
391 case ',':
392 return T_LIST_SEPARATOR;
393 case ':':
394 return T_OBJECT_PAIR_SEPARATOR;
395 default:
396 return T_INVALID_TOKEN;
400 void JSONParser::EatWhitespaceAndComments() {
401 while (pos_ < end_pos_) {
402 switch (*pos_) {
403 case '\r':
404 case '\n':
405 index_last_line_ = index_;
406 // Don't increment line_number_ twice for "\r\n".
407 if (!(*pos_ == '\n' && pos_ > start_pos_ && *(pos_ - 1) == '\r'))
408 ++line_number_;
409 // Fall through.
410 case ' ':
411 case '\t':
412 NextChar();
413 break;
414 case '/':
415 if (!EatComment())
416 return;
417 break;
418 default:
419 return;
424 bool JSONParser::EatComment() {
425 if (*pos_ != '/' || !CanConsume(1))
426 return false;
428 char next_char = *NextChar();
429 if (next_char == '/') {
430 // Single line comment, read to newline.
431 while (CanConsume(1)) {
432 next_char = *NextChar();
433 if (next_char == '\n' || next_char == '\r')
434 return true;
436 } else if (next_char == '*') {
437 char previous_char = '\0';
438 // Block comment, read until end marker.
439 while (CanConsume(1)) {
440 next_char = *NextChar();
441 if (previous_char == '*' && next_char == '/') {
442 // EatWhitespaceAndComments will inspect pos_, which will still be on
443 // the last / of the comment, so advance once more (which may also be
444 // end of input).
445 NextChar();
446 return true;
448 previous_char = next_char;
451 // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
454 return false;
457 Value* JSONParser::ParseNextToken() {
458 return ParseToken(GetNextToken());
461 Value* JSONParser::ParseToken(Token token) {
462 switch (token) {
463 case T_OBJECT_BEGIN:
464 return ConsumeDictionary();
465 case T_ARRAY_BEGIN:
466 return ConsumeList();
467 case T_STRING:
468 return ConsumeString();
469 case T_NUMBER:
470 return ConsumeNumber();
471 case T_BOOL_TRUE:
472 case T_BOOL_FALSE:
473 case T_NULL:
474 return ConsumeLiteral();
475 default:
476 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
477 return NULL;
481 Value* JSONParser::ConsumeDictionary() {
482 if (*pos_ != '{') {
483 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
484 return NULL;
487 StackMarker depth_check(&stack_depth_);
488 if (depth_check.IsTooDeep()) {
489 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
490 return NULL;
493 scoped_ptr<DictionaryValue> dict(new DictionaryValue);
495 NextChar();
496 Token token = GetNextToken();
497 while (token != T_OBJECT_END) {
498 if (token != T_STRING) {
499 ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);
500 return NULL;
503 // First consume the key.
504 StringBuilder key;
505 if (!ConsumeStringRaw(&key)) {
506 return NULL;
509 // Read the separator.
510 NextChar();
511 token = GetNextToken();
512 if (token != T_OBJECT_PAIR_SEPARATOR) {
513 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
514 return NULL;
517 // The next token is the value. Ownership transfers to |dict|.
518 NextChar();
519 Value* value = ParseNextToken();
520 if (!value) {
521 // ReportError from deeper level.
522 return NULL;
525 dict->SetWithoutPathExpansion(key.AsString(), value);
527 NextChar();
528 token = GetNextToken();
529 if (token == T_LIST_SEPARATOR) {
530 NextChar();
531 token = GetNextToken();
532 if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
533 ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
534 return NULL;
536 } else if (token != T_OBJECT_END) {
537 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
538 return NULL;
542 return dict.release();
545 Value* JSONParser::ConsumeList() {
546 if (*pos_ != '[') {
547 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
548 return NULL;
551 StackMarker depth_check(&stack_depth_);
552 if (depth_check.IsTooDeep()) {
553 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
554 return NULL;
557 scoped_ptr<ListValue> list(new ListValue);
559 NextChar();
560 Token token = GetNextToken();
561 while (token != T_ARRAY_END) {
562 Value* item = ParseToken(token);
563 if (!item) {
564 // ReportError from deeper level.
565 return NULL;
568 list->Append(item);
570 NextChar();
571 token = GetNextToken();
572 if (token == T_LIST_SEPARATOR) {
573 NextChar();
574 token = GetNextToken();
575 if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
576 ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
577 return NULL;
579 } else if (token != T_ARRAY_END) {
580 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
581 return NULL;
585 return list.release();
588 Value* JSONParser::ConsumeString() {
589 StringBuilder string;
590 if (!ConsumeStringRaw(&string))
591 return NULL;
593 // Create the Value representation, using a hidden root, if configured
594 // to do so, and if the string can be represented by StringPiece.
595 if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) {
596 return new JSONStringValue(string.AsStringPiece());
597 } else {
598 if (string.CanBeStringPiece())
599 string.Convert();
600 return new StringValue(string.AsString());
604 bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
605 if (*pos_ != '"') {
606 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
607 return false;
610 // StringBuilder will internally build a StringPiece unless a UTF-16
611 // conversion occurs, at which point it will perform a copy into a
612 // std::string.
613 StringBuilder string(NextChar());
615 int length = end_pos_ - start_pos_;
616 int32 next_char = 0;
618 while (CanConsume(1)) {
619 pos_ = start_pos_ + index_; // CBU8_NEXT is postcrement.
620 CBU8_NEXT(start_pos_, index_, length, next_char);
621 if (next_char < 0 || !IsValidCharacter(next_char)) {
622 ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);
623 return false;
626 // If this character is an escape sequence...
627 if (next_char == '\\') {
628 // The input string will be adjusted (either by combining the two
629 // characters of an encoded escape sequence, or with a UTF conversion),
630 // so using StringPiece isn't possible -- force a conversion.
631 string.Convert();
633 if (!CanConsume(1)) {
634 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
635 return false;
638 switch (*NextChar()) {
639 // Allowed esape sequences:
640 case 'x': { // UTF-8 sequence.
641 // UTF-8 \x escape sequences are not allowed in the spec, but they
642 // are supported here for backwards-compatiblity with the old parser.
643 if (!CanConsume(2)) {
644 ReportError(JSONReader::JSON_INVALID_ESCAPE, 1);
645 return false;
648 int hex_digit = 0;
649 if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) {
650 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
651 return false;
653 NextChar();
655 if (hex_digit < kExtendedASCIIStart)
656 string.Append(static_cast<char>(hex_digit));
657 else
658 DecodeUTF8(hex_digit, &string);
659 break;
661 case 'u': { // UTF-16 sequence.
662 // UTF units are of the form \uXXXX.
663 if (!CanConsume(5)) { // 5 being 'u' and four HEX digits.
664 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
665 return false;
668 // Skip the 'u'.
669 NextChar();
671 std::string utf8_units;
672 if (!DecodeUTF16(&utf8_units)) {
673 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
674 return false;
677 string.AppendString(utf8_units);
678 break;
680 case '"':
681 string.Append('"');
682 break;
683 case '\\':
684 string.Append('\\');
685 break;
686 case '/':
687 string.Append('/');
688 break;
689 case 'b':
690 string.Append('\b');
691 break;
692 case 'f':
693 string.Append('\f');
694 break;
695 case 'n':
696 string.Append('\n');
697 break;
698 case 'r':
699 string.Append('\r');
700 break;
701 case 't':
702 string.Append('\t');
703 break;
704 case 'v': // Not listed as valid escape sequence in the RFC.
705 string.Append('\v');
706 break;
707 // All other escape squences are illegal.
708 default:
709 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
710 return false;
712 } else if (next_char == '"') {
713 --index_; // Rewind by one because of CBU8_NEXT.
714 out->Swap(&string);
715 return true;
716 } else {
717 if (next_char < kExtendedASCIIStart)
718 string.Append(static_cast<char>(next_char));
719 else
720 DecodeUTF8(next_char, &string);
724 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
725 return false;
728 // Entry is at the first X in \uXXXX.
729 bool JSONParser::DecodeUTF16(std::string* dest_string) {
730 if (!CanConsume(4))
731 return false;
733 // This is a 32-bit field because the shift operations in the
734 // conversion process below cause MSVC to error about "data loss."
735 // This only stores UTF-16 code units, though.
736 // Consume the UTF-16 code unit, which may be a high surrogate.
737 int code_unit16_high = 0;
738 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high))
739 return false;
741 // Only add 3, not 4, because at the end of this iteration, the parser has
742 // finished working with the last digit of the UTF sequence, meaning that
743 // the next iteration will advance to the next byte.
744 NextNChars(3);
746 // Used to convert the UTF-16 code units to a code point and then to a UTF-8
747 // code unit sequence.
748 char code_unit8[8] = { 0 };
749 size_t offset = 0;
751 // If this is a high surrogate, consume the next code unit to get the
752 // low surrogate.
753 if (CBU16_IS_SURROGATE(code_unit16_high)) {
754 // Make sure this is the high surrogate. If not, it's an encoding
755 // error.
756 if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
757 return false;
759 // Make sure that the token has more characters to consume the
760 // lower surrogate.
761 if (!CanConsume(6)) // 6 being '\' 'u' and four HEX digits.
762 return false;
763 if (*NextChar() != '\\' || *NextChar() != 'u')
764 return false;
766 NextChar(); // Read past 'u'.
767 int code_unit16_low = 0;
768 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low))
769 return false;
771 NextNChars(3);
773 if (!CBU16_IS_TRAIL(code_unit16_low)) {
774 return false;
777 uint32 code_point = CBU16_GET_SUPPLEMENTARY(code_unit16_high,
778 code_unit16_low);
779 if (!IsValidCharacter(code_point))
780 return false;
782 offset = 0;
783 CBU8_APPEND_UNSAFE(code_unit8, offset, code_point);
784 } else {
785 // Not a surrogate.
786 DCHECK(CBU16_IS_SINGLE(code_unit16_high));
787 if (!IsValidCharacter(code_unit16_high))
788 return false;
790 CBU8_APPEND_UNSAFE(code_unit8, offset, code_unit16_high);
793 dest_string->append(code_unit8);
794 return true;
797 void JSONParser::DecodeUTF8(const int32& point, StringBuilder* dest) {
798 DCHECK(IsValidCharacter(point));
800 // Anything outside of the basic ASCII plane will need to be decoded from
801 // int32 to a multi-byte sequence.
802 if (point < kExtendedASCIIStart) {
803 dest->Append(static_cast<char>(point));
804 } else {
805 char utf8_units[4] = { 0 };
806 int offset = 0;
807 CBU8_APPEND_UNSAFE(utf8_units, offset, point);
808 dest->Convert();
809 // CBU8_APPEND_UNSAFE can overwrite up to 4 bytes, so utf8_units may not be
810 // zero terminated at this point. |offset| contains the correct length.
811 dest->AppendString(std::string(utf8_units, offset));
815 Value* JSONParser::ConsumeNumber() {
816 const char* num_start = pos_;
817 const int start_index = index_;
818 int end_index = start_index;
820 if (*pos_ == '-')
821 NextChar();
823 if (!ReadInt(false)) {
824 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
825 return NULL;
827 end_index = index_;
829 // The optional fraction part.
830 if (*pos_ == '.') {
831 if (!CanConsume(1)) {
832 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
833 return NULL;
835 NextChar();
836 if (!ReadInt(true)) {
837 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
838 return NULL;
840 end_index = index_;
843 // Optional exponent part.
844 if (*pos_ == 'e' || *pos_ == 'E') {
845 NextChar();
846 if (*pos_ == '-' || *pos_ == '+')
847 NextChar();
848 if (!ReadInt(true)) {
849 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
850 return NULL;
852 end_index = index_;
855 // ReadInt is greedy because numbers have no easily detectable sentinel,
856 // so save off where the parser should be on exit (see Consume invariant at
857 // the top of the header), then make sure the next token is one which is
858 // valid.
859 const char* exit_pos = pos_ - 1;
860 int exit_index = index_ - 1;
862 switch (GetNextToken()) {
863 case T_OBJECT_END:
864 case T_ARRAY_END:
865 case T_LIST_SEPARATOR:
866 case T_END_OF_INPUT:
867 break;
868 default:
869 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
870 return NULL;
873 pos_ = exit_pos;
874 index_ = exit_index;
876 StringPiece num_string(num_start, end_index - start_index);
878 int num_int;
879 if (StringToInt(num_string, &num_int))
880 return new FundamentalValue(num_int);
882 double num_double;
883 if (StringToDouble(num_string.as_string(), &num_double) &&
884 std::isfinite(num_double)) {
885 return new FundamentalValue(num_double);
888 return NULL;
891 bool JSONParser::ReadInt(bool allow_leading_zeros) {
892 char first = *pos_;
893 int len = 0;
895 char c = first;
896 while (CanConsume(1) && IsAsciiDigit(c)) {
897 c = *NextChar();
898 ++len;
901 if (len == 0)
902 return false;
904 if (!allow_leading_zeros && len > 1 && first == '0')
905 return false;
907 return true;
910 Value* JSONParser::ConsumeLiteral() {
911 switch (*pos_) {
912 case 't': {
913 const char kTrueLiteral[] = "true";
914 const int kTrueLen = static_cast<int>(strlen(kTrueLiteral));
915 if (!CanConsume(kTrueLen - 1) ||
916 !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) {
917 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
918 return NULL;
920 NextNChars(kTrueLen - 1);
921 return new FundamentalValue(true);
923 case 'f': {
924 const char kFalseLiteral[] = "false";
925 const int kFalseLen = static_cast<int>(strlen(kFalseLiteral));
926 if (!CanConsume(kFalseLen - 1) ||
927 !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) {
928 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
929 return NULL;
931 NextNChars(kFalseLen - 1);
932 return new FundamentalValue(false);
934 case 'n': {
935 const char kNullLiteral[] = "null";
936 const int kNullLen = static_cast<int>(strlen(kNullLiteral));
937 if (!CanConsume(kNullLen - 1) ||
938 !StringsAreEqual(pos_, kNullLiteral, kNullLen)) {
939 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
940 return NULL;
942 NextNChars(kNullLen - 1);
943 return Value::CreateNullValue().release();
945 default:
946 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
947 return NULL;
951 // static
952 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) {
953 return strncmp(one, two, len) == 0;
956 void JSONParser::ReportError(JSONReader::JsonParseError code,
957 int column_adjust) {
958 error_code_ = code;
959 error_line_ = line_number_;
960 error_column_ = index_ - index_last_line_ + column_adjust;
963 // static
964 std::string JSONParser::FormatErrorMessage(int line, int column,
965 const std::string& description) {
966 if (line || column) {
967 return StringPrintf("Line: %i, column: %i, %s",
968 line, column, description.c_str());
970 return description;
973 } // namespace internal
974 } // namespace base