Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / third_party / jsoncpp / overrides / src / lib_json / json_reader.cpp
blobf8cfad70510c2e810746dc236eb4435279767dd3
1 // Copyright 2007-2011 Baptiste Lepilleur
2 // Distributed under MIT license, or public domain if desired and
3 // recognized in your jurisdiction.
4 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6 #if !defined(JSON_IS_AMALGAMATION)
7 # include <json/assertions.h>
8 # include <json/reader.h>
9 # include <json/value.h>
10 # include "json_tool.h"
11 #endif // if !defined(JSON_IS_AMALGAMATION)
12 #include <utility>
13 #include <cstdio>
14 #include <cassert>
15 #include <cstring>
16 #include <stdexcept>
17 #include <string>
18 #include <istream>
20 #if _MSC_VER >= 1400 // VC++ 8.0
21 #pragma warning( disable : 4996 ) // disable warning about strdup being deprecated.
22 #endif
24 namespace Json {
26 // Implementation of class Features
27 // ////////////////////////////////
29 Features::Features()
30 : allowComments_( true )
31 , strictRoot_( false )
36 Features
37 Features::all()
39 return Features();
43 Features
44 Features::strictMode()
46 Features features;
47 features.allowComments_ = false;
48 features.strictRoot_ = true;
49 return features;
52 // Implementation of class Reader
53 // ////////////////////////////////
56 static inline bool
57 in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4 )
59 return c == c1 || c == c2 || c == c3 || c == c4;
62 static inline bool
63 in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4, Reader::Char c5 )
65 return c == c1 || c == c2 || c == c3 || c == c4 || c == c5;
69 static bool
70 containsNewLine( Reader::Location begin,
71 Reader::Location end )
73 for ( ;begin < end; ++begin )
74 if ( *begin == '\n' || *begin == '\r' )
75 return true;
76 return false;
80 // Class Reader
81 // //////////////////////////////////////////////////////////////////
83 Reader::Reader()
84 : errors_(),
85 document_(),
86 begin_(),
87 end_(),
88 current_(),
89 lastValueEnd_(),
90 lastValue_(),
91 commentsBefore_(),
92 features_( Features::all() ),
93 collectComments_()
98 Reader::Reader( const Features &features )
99 : errors_(),
100 document_(),
101 begin_(),
102 end_(),
103 current_(),
104 lastValueEnd_(),
105 lastValue_(),
106 commentsBefore_(),
107 features_( features ),
108 collectComments_()
113 bool
114 Reader::parse( const std::string &document,
115 Value &root,
116 bool collectComments )
118 document_ = document;
119 const char *begin = document_.c_str();
120 const char *end = begin + document_.length();
121 return parse( begin, end, root, collectComments );
125 bool
126 Reader::parse( std::istream& sin,
127 Value &root,
128 bool collectComments )
130 //std::istream_iterator<char> begin(sin);
131 //std::istream_iterator<char> end;
132 // Those would allow streamed input from a file, if parse() were a
133 // template function.
135 // Since std::string is reference-counted, this at least does not
136 // create an extra copy.
137 std::string doc;
138 std::getline(sin, doc, (char)EOF);
139 return parse( doc, root, collectComments );
142 bool
143 Reader::parse( const char *beginDoc, const char *endDoc,
144 Value &root,
145 bool collectComments )
147 if ( !features_.allowComments_ )
149 collectComments = false;
152 begin_ = beginDoc;
153 end_ = endDoc;
154 collectComments_ = collectComments;
155 current_ = begin_;
156 lastValueEnd_ = 0;
157 lastValue_ = 0;
158 commentsBefore_ = "";
159 errors_.clear();
160 while ( !nodes_.empty() )
161 nodes_.pop();
162 nodes_.push( &root );
164 bool successful = readValue();
165 Token token;
166 skipCommentTokens( token );
167 if ( collectComments_ && !commentsBefore_.empty() )
168 root.setComment( commentsBefore_, commentAfter );
169 if ( features_.strictRoot_ )
171 if ( !root.isArray() && !root.isObject() )
173 // Set error location to start of doc, ideally should be first token found in doc
174 token.type_ = tokenError;
175 token.start_ = beginDoc;
176 token.end_ = endDoc;
177 addError( "A valid JSON document must be either an array or an object value.",
178 token );
179 return false;
182 return successful;
186 bool
187 Reader::readValue()
189 Token token;
190 skipCommentTokens( token );
191 bool successful = true;
193 if ( collectComments_ && !commentsBefore_.empty() )
195 currentValue().setComment( commentsBefore_, commentBefore );
196 commentsBefore_ = "";
200 switch ( token.type_ )
202 case tokenObjectBegin:
203 successful = readObject( token );
204 break;
205 case tokenArrayBegin:
206 successful = readArray( token );
207 break;
208 case tokenNumber:
209 successful = decodeNumber( token );
210 break;
211 case tokenString:
212 successful = decodeString( token );
213 break;
214 case tokenTrue:
215 currentValue() = true;
216 break;
217 case tokenFalse:
218 currentValue() = false;
219 break;
220 case tokenNull:
221 currentValue() = Value();
222 break;
223 default:
224 return addError( "Syntax error: value, object or array expected.", token );
227 if ( collectComments_ )
229 lastValueEnd_ = current_;
230 lastValue_ = &currentValue();
233 return successful;
237 void
238 Reader::skipCommentTokens( Token &token )
240 if ( features_.allowComments_ )
244 readToken( token );
246 while ( token.type_ == tokenComment );
248 else
250 readToken( token );
255 bool
256 Reader::expectToken( TokenType type, Token &token, const char *message )
258 readToken( token );
259 if ( token.type_ != type )
260 return addError( message, token );
261 return true;
265 bool
266 Reader::readToken( Token &token )
268 skipSpaces();
269 token.start_ = current_;
270 Char c = getNextChar();
271 bool ok = true;
272 switch ( c )
274 case '{':
275 token.type_ = tokenObjectBegin;
276 break;
277 case '}':
278 token.type_ = tokenObjectEnd;
279 break;
280 case '[':
281 token.type_ = tokenArrayBegin;
282 break;
283 case ']':
284 token.type_ = tokenArrayEnd;
285 break;
286 case '"':
287 token.type_ = tokenString;
288 ok = readString();
289 break;
290 case '/':
291 token.type_ = tokenComment;
292 ok = readComment();
293 break;
294 case '0':
295 case '1':
296 case '2':
297 case '3':
298 case '4':
299 case '5':
300 case '6':
301 case '7':
302 case '8':
303 case '9':
304 case '-':
305 token.type_ = tokenNumber;
306 readNumber();
307 break;
308 case 't':
309 token.type_ = tokenTrue;
310 ok = match( "rue", 3 );
311 break;
312 case 'f':
313 token.type_ = tokenFalse;
314 ok = match( "alse", 4 );
315 break;
316 case 'n':
317 token.type_ = tokenNull;
318 ok = match( "ull", 3 );
319 break;
320 case ',':
321 token.type_ = tokenArraySeparator;
322 break;
323 case ':':
324 token.type_ = tokenMemberSeparator;
325 break;
326 case 0:
327 token.type_ = tokenEndOfStream;
328 break;
329 default:
330 ok = false;
331 break;
333 if ( !ok )
334 token.type_ = tokenError;
335 token.end_ = current_;
336 return true;
340 void
341 Reader::skipSpaces()
343 while ( current_ != end_ )
345 Char c = *current_;
346 if ( c == ' ' || c == '\t' || c == '\r' || c == '\n' )
347 ++current_;
348 else
349 break;
354 bool
355 Reader::match( Location pattern,
356 int patternLength )
358 if ( end_ - current_ < patternLength )
359 return false;
360 int index = patternLength;
361 while ( index-- )
362 if ( current_[index] != pattern[index] )
363 return false;
364 current_ += patternLength;
365 return true;
369 bool
370 Reader::readComment()
372 Location commentBegin = current_ - 1;
373 Char c = getNextChar();
374 bool successful = false;
375 if ( c == '*' )
376 successful = readCStyleComment();
377 else if ( c == '/' )
378 successful = readCppStyleComment();
379 if ( !successful )
380 return false;
382 if ( collectComments_ )
384 CommentPlacement placement = commentBefore;
385 if ( lastValueEnd_ && !containsNewLine( lastValueEnd_, commentBegin ) )
387 if ( c != '*' || !containsNewLine( commentBegin, current_ ) )
388 placement = commentAfterOnSameLine;
391 addComment( commentBegin, current_, placement );
393 return true;
397 void
398 Reader::addComment( Location begin,
399 Location end,
400 CommentPlacement placement )
402 assert( collectComments_ );
403 if ( placement == commentAfterOnSameLine )
405 assert( lastValue_ != 0 );
406 lastValue_->setComment( std::string( begin, end ), placement );
408 else
410 if ( !commentsBefore_.empty() )
411 commentsBefore_ += "\n";
412 commentsBefore_ += std::string( begin, end );
417 bool
418 Reader::readCStyleComment()
420 while ( current_ != end_ )
422 Char c = getNextChar();
423 if ( c == '*' && *current_ == '/' )
424 break;
426 return getNextChar() == '/';
430 bool
431 Reader::readCppStyleComment()
433 while ( current_ != end_ )
435 Char c = getNextChar();
436 if ( c == '\r' || c == '\n' )
437 break;
439 return true;
443 void
444 Reader::readNumber()
446 while ( current_ != end_ )
448 if ( !(*current_ >= '0' && *current_ <= '9') &&
449 !in( *current_, '.', 'e', 'E', '+', '-' ) )
450 break;
451 ++current_;
455 bool
456 Reader::readString()
458 Char c = 0;
459 while ( current_ != end_ )
461 c = getNextChar();
462 if ( c == '\\' )
463 getNextChar();
464 else if ( c == '"' )
465 break;
467 return c == '"';
471 bool
472 Reader::readObject( Token &/*tokenStart*/ )
474 Token tokenName;
475 std::string name;
476 currentValue() = Value( objectValue );
477 while ( readToken( tokenName ) )
479 bool initialTokenOk = true;
480 while ( tokenName.type_ == tokenComment && initialTokenOk )
481 initialTokenOk = readToken( tokenName );
482 if ( !initialTokenOk )
483 break;
484 if ( tokenName.type_ == tokenObjectEnd && name.empty() ) // empty object
485 return true;
486 if ( tokenName.type_ != tokenString )
487 break;
489 name = "";
490 if ( !decodeString( tokenName, name ) )
491 return recoverFromError( tokenObjectEnd );
493 Token colon;
494 if ( !readToken( colon ) || colon.type_ != tokenMemberSeparator )
496 return addErrorAndRecover( "Missing ':' after object member name",
497 colon,
498 tokenObjectEnd );
500 Value &value = currentValue()[ name ];
501 nodes_.push( &value );
502 bool ok = readValue();
503 nodes_.pop();
504 if ( !ok ) // error already set
505 return recoverFromError( tokenObjectEnd );
507 Token comma;
508 if ( !readToken( comma )
509 || ( comma.type_ != tokenObjectEnd &&
510 comma.type_ != tokenArraySeparator &&
511 comma.type_ != tokenComment ) )
513 return addErrorAndRecover( "Missing ',' or '}' in object declaration",
514 comma,
515 tokenObjectEnd );
517 bool finalizeTokenOk = true;
518 while ( comma.type_ == tokenComment &&
519 finalizeTokenOk )
520 finalizeTokenOk = readToken( comma );
521 if ( comma.type_ == tokenObjectEnd )
522 return true;
524 return addErrorAndRecover( "Missing '}' or object member name",
525 tokenName,
526 tokenObjectEnd );
530 bool
531 Reader::readArray( Token &/*tokenStart*/ )
533 currentValue() = Value( arrayValue );
534 skipSpaces();
535 if ( *current_ == ']' ) // empty array
537 Token endArray;
538 readToken( endArray );
539 return true;
541 int index = 0;
542 for (;;)
544 Value &value = currentValue()[ index++ ];
545 nodes_.push( &value );
546 bool ok = readValue();
547 nodes_.pop();
548 if ( !ok ) // error already set
549 return recoverFromError( tokenArrayEnd );
551 Token token;
552 // Accept Comment after last item in the array.
553 ok = readToken( token );
554 while ( token.type_ == tokenComment && ok )
556 ok = readToken( token );
558 bool badTokenType = ( token.type_ != tokenArraySeparator &&
559 token.type_ != tokenArrayEnd );
560 if ( !ok || badTokenType )
562 return addErrorAndRecover( "Missing ',' or ']' in array declaration",
563 token,
564 tokenArrayEnd );
566 if ( token.type_ == tokenArrayEnd )
567 break;
569 return true;
573 bool
574 Reader::decodeNumber( Token &token )
576 bool isDouble = false;
577 for ( Location inspect = token.start_; inspect != token.end_; ++inspect )
579 isDouble = isDouble
580 || in( *inspect, '.', 'e', 'E', '+' )
581 || ( *inspect == '-' && inspect != token.start_ );
583 if ( isDouble )
584 return decodeDouble( token );
585 // Attempts to parse the number as an integer. If the number is
586 // larger than the maximum supported value of an integer then
587 // we decode the number as a double.
588 Location current = token.start_;
589 bool isNegative = *current == '-';
590 if ( isNegative )
591 ++current;
592 Value::LargestUInt maxIntegerValue = isNegative ? Value::LargestUInt(-Value::minLargestInt)
593 : Value::maxLargestUInt;
594 Value::LargestUInt threshold = maxIntegerValue / 10;
595 Value::LargestUInt value = 0;
596 while ( current < token.end_ )
598 Char c = *current++;
599 if ( c < '0' || c > '9' )
600 return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token );
601 Value::UInt digit(c - '0');
602 if ( value >= threshold )
604 // We've hit or exceeded the max value divided by 10 (rounded down). If
605 // a) we've only just touched the limit, b) this is the last digit, and
606 // c) it's small enough to fit in that rounding delta, we're okay.
607 // Otherwise treat this number as a double to avoid overflow.
608 if (value > threshold ||
609 current != token.end_ ||
610 digit > maxIntegerValue % 10)
612 return decodeDouble( token );
615 value = value * 10 + digit;
617 if ( isNegative )
618 currentValue() = -Value::LargestInt( value );
619 else if ( value <= Value::LargestUInt(Value::maxInt) )
620 currentValue() = Value::LargestInt( value );
621 else
622 currentValue() = value;
623 return true;
627 bool
628 Reader::decodeDouble( Token &token )
630 double value = 0;
631 const int bufferSize = 32;
632 int count;
633 int length = int(token.end_ - token.start_);
635 // Sanity check to avoid buffer overflow exploits.
636 if (length < 0) {
637 return addError( "Unable to parse token length", token );
640 // Avoid using a string constant for the format control string given to
641 // sscanf, as this can cause hard to debug crashes on OS X. See here for more
642 // info:
644 // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
645 char format[] = "%lf";
647 if ( length <= bufferSize )
649 Char buffer[bufferSize+1];
650 memcpy( buffer, token.start_, length );
651 buffer[length] = 0;
652 count = sscanf( buffer, format, &value );
654 else
656 std::string buffer( token.start_, token.end_ );
657 count = sscanf( buffer.c_str(), format, &value );
660 if ( count != 1 )
661 return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token );
662 currentValue() = value;
663 return true;
667 bool
668 Reader::decodeString( Token &token )
670 std::string decoded;
671 if ( !decodeString( token, decoded ) )
672 return false;
673 currentValue() = decoded;
674 return true;
678 bool
679 Reader::decodeString( Token &token, std::string &decoded )
681 decoded.reserve( token.end_ - token.start_ - 2 );
682 Location current = token.start_ + 1; // skip '"'
683 Location end = token.end_ - 1; // do not include '"'
684 while ( current != end )
686 Char c = *current++;
687 if ( c == '"' )
688 break;
689 else if ( c == '\\' )
691 if ( current == end )
692 return addError( "Empty escape sequence in string", token, current );
693 Char escape = *current++;
694 switch ( escape )
696 case '"': decoded += '"'; break;
697 case '/': decoded += '/'; break;
698 case '\\': decoded += '\\'; break;
699 case 'b': decoded += '\b'; break;
700 case 'f': decoded += '\f'; break;
701 case 'n': decoded += '\n'; break;
702 case 'r': decoded += '\r'; break;
703 case 't': decoded += '\t'; break;
704 case 'u':
706 unsigned int unicode;
707 if ( !decodeUnicodeCodePoint( token, current, end, unicode ) )
708 return false;
709 decoded += codePointToUTF8(unicode);
711 break;
712 default:
713 return addError( "Bad escape sequence in string", token, current );
716 else
718 decoded += c;
721 return true;
724 bool
725 Reader::decodeUnicodeCodePoint( Token &token,
726 Location &current,
727 Location end,
728 unsigned int &unicode )
731 if ( !decodeUnicodeEscapeSequence( token, current, end, unicode ) )
732 return false;
733 if (unicode >= 0xD800 && unicode <= 0xDBFF)
735 // surrogate pairs
736 if (end - current < 6)
737 return addError( "additional six characters expected to parse unicode surrogate pair.", token, current );
738 unsigned int surrogatePair;
739 if (*(current++) == '\\' && *(current++)== 'u')
741 if (decodeUnicodeEscapeSequence( token, current, end, surrogatePair ))
743 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
745 else
746 return false;
748 else
749 return addError( "expecting another \\u token to begin the second half of a unicode surrogate pair", token, current );
751 return true;
754 bool
755 Reader::decodeUnicodeEscapeSequence( Token &token,
756 Location &current,
757 Location end,
758 unsigned int &unicode )
760 if ( end - current < 4 )
761 return addError( "Bad unicode escape sequence in string: four digits expected.", token, current );
762 unicode = 0;
763 for ( int index =0; index < 4; ++index )
765 Char c = *current++;
766 unicode *= 16;
767 if ( c >= '0' && c <= '9' )
768 unicode += c - '0';
769 else if ( c >= 'a' && c <= 'f' )
770 unicode += c - 'a' + 10;
771 else if ( c >= 'A' && c <= 'F' )
772 unicode += c - 'A' + 10;
773 else
774 return addError( "Bad unicode escape sequence in string: hexadecimal digit expected.", token, current );
776 return true;
780 bool
781 Reader::addError( const std::string &message,
782 Token &token,
783 Location extra )
785 ErrorInfo info;
786 info.token_ = token;
787 info.message_ = message;
788 info.extra_ = extra;
789 errors_.push_back( info );
790 return false;
794 bool
795 Reader::recoverFromError( TokenType skipUntilToken )
797 int errorCount = int(errors_.size());
798 Token skip;
799 for (;;)
801 if ( !readToken(skip) )
802 errors_.resize( errorCount ); // discard errors caused by recovery
803 if ( skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream )
804 break;
806 errors_.resize( errorCount );
807 return false;
811 bool
812 Reader::addErrorAndRecover( const std::string &message,
813 Token &token,
814 TokenType skipUntilToken )
816 addError( message, token );
817 return recoverFromError( skipUntilToken );
821 Value &
822 Reader::currentValue()
824 return *(nodes_.top());
828 Reader::Char
829 Reader::getNextChar()
831 if ( current_ == end_ )
832 return 0;
833 return *current_++;
837 void
838 Reader::getLocationLineAndColumn( Location location,
839 int &line,
840 int &column ) const
842 Location current = begin_;
843 Location lastLineStart = current;
844 line = 0;
845 while ( current < location && current != end_ )
847 Char c = *current++;
848 if ( c == '\r' )
850 if ( *current == '\n' )
851 ++current;
852 lastLineStart = current;
853 ++line;
855 else if ( c == '\n' )
857 lastLineStart = current;
858 ++line;
861 // column & line start at 1
862 column = int(location - lastLineStart) + 1;
863 ++line;
867 std::string
868 Reader::getLocationLineAndColumn( Location location ) const
870 int line, column;
871 getLocationLineAndColumn( location, line, column );
872 char buffer[18+16+16+1];
873 sprintf( buffer, "Line %d, Column %d", line, column );
874 return buffer;
878 // Deprecated. Preserved for backward compatibility
879 std::string
880 Reader::getFormatedErrorMessages() const
882 return getFormattedErrorMessages();
886 std::string
887 Reader::getFormattedErrorMessages() const
889 std::string formattedMessage;
890 for ( Errors::const_iterator itError = errors_.begin();
891 itError != errors_.end();
892 ++itError )
894 const ErrorInfo &error = *itError;
895 formattedMessage += "* " + getLocationLineAndColumn( error.token_.start_ ) + "\n";
896 formattedMessage += " " + error.message_ + "\n";
897 if ( error.extra_ )
898 formattedMessage += "See " + getLocationLineAndColumn( error.extra_ ) + " for detail.\n";
900 return formattedMessage;
904 std::istream& operator>>( std::istream &sin, Value &root )
906 Json::Reader reader;
907 bool ok = reader.parse(sin, root, true);
908 if (!ok) {
909 fprintf(
910 stderr,
911 "Error from reader: %s",
912 reader.getFormattedErrorMessages().c_str());
914 JSON_FAIL_MESSAGE("reader error");
916 return sin;
920 } // namespace Json