1 // Copyright 2007-2011 Baptiste Lepilleur
2 // Distributed under MIT license, or public domain if desired and
3 // recognized in your jurisdiction.
4 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6 #if !defined(JSON_IS_AMALGAMATION)
7 # include <json/assertions.h>
8 # include <json/reader.h>
9 # include <json/value.h>
10 # include "json_tool.h"
11 #endif // if !defined(JSON_IS_AMALGAMATION)
20 #if _MSC_VER >= 1400 // VC++ 8.0
21 #pragma warning( disable : 4996 ) // disable warning about strdup being deprecated.
26 // Implementation of class Features
27 // ////////////////////////////////
30 : allowComments_( true )
31 , strictRoot_( false )
44 Features::strictMode()
47 features
.allowComments_
= false;
48 features
.strictRoot_
= true;
52 // Implementation of class Reader
53 // ////////////////////////////////
57 in( Reader::Char c
, Reader::Char c1
, Reader::Char c2
, Reader::Char c3
, Reader::Char c4
)
59 return c
== c1
|| c
== c2
|| c
== c3
|| c
== c4
;
63 in( Reader::Char c
, Reader::Char c1
, Reader::Char c2
, Reader::Char c3
, Reader::Char c4
, Reader::Char c5
)
65 return c
== c1
|| c
== c2
|| c
== c3
|| c
== c4
|| c
== c5
;
70 containsNewLine( Reader::Location begin
,
71 Reader::Location end
)
73 for ( ;begin
< end
; ++begin
)
74 if ( *begin
== '\n' || *begin
== '\r' )
81 // //////////////////////////////////////////////////////////////////
92 features_( Features::all() ),
98 Reader::Reader( const Features
&features
)
107 features_( features
),
114 Reader::parse( const std::string
&document
,
116 bool collectComments
)
118 document_
= document
;
119 const char *begin
= document_
.c_str();
120 const char *end
= begin
+ document_
.length();
121 return parse( begin
, end
, root
, collectComments
);
126 Reader::parse( std::istream
& sin
,
128 bool collectComments
)
130 //std::istream_iterator<char> begin(sin);
131 //std::istream_iterator<char> end;
132 // Those would allow streamed input from a file, if parse() were a
133 // template function.
135 // Since std::string is reference-counted, this at least does not
136 // create an extra copy.
138 std::getline(sin
, doc
, (char)EOF
);
139 return parse( doc
, root
, collectComments
);
143 Reader::parse( const char *beginDoc
, const char *endDoc
,
145 bool collectComments
)
147 if ( !features_
.allowComments_
)
149 collectComments
= false;
154 collectComments_
= collectComments
;
158 commentsBefore_
= "";
160 while ( !nodes_
.empty() )
162 nodes_
.push( &root
);
164 bool successful
= readValue();
166 skipCommentTokens( token
);
167 if ( collectComments_
&& !commentsBefore_
.empty() )
168 root
.setComment( commentsBefore_
, commentAfter
);
169 if ( features_
.strictRoot_
)
171 if ( !root
.isArray() && !root
.isObject() )
173 // Set error location to start of doc, ideally should be first token found in doc
174 token
.type_
= tokenError
;
175 token
.start_
= beginDoc
;
177 addError( "A valid JSON document must be either an array or an object value.",
190 skipCommentTokens( token
);
191 bool successful
= true;
193 if ( collectComments_
&& !commentsBefore_
.empty() )
195 currentValue().setComment( commentsBefore_
, commentBefore
);
196 commentsBefore_
= "";
200 switch ( token
.type_
)
202 case tokenObjectBegin
:
203 successful
= readObject( token
);
205 case tokenArrayBegin
:
206 successful
= readArray( token
);
209 successful
= decodeNumber( token
);
212 successful
= decodeString( token
);
215 currentValue() = true;
218 currentValue() = false;
221 currentValue() = Value();
224 return addError( "Syntax error: value, object or array expected.", token
);
227 if ( collectComments_
)
229 lastValueEnd_
= current_
;
230 lastValue_
= ¤tValue();
238 Reader::skipCommentTokens( Token
&token
)
240 if ( features_
.allowComments_
)
246 while ( token
.type_
== tokenComment
);
256 Reader::expectToken( TokenType type
, Token
&token
, const char *message
)
259 if ( token
.type_
!= type
)
260 return addError( message
, token
);
266 Reader::readToken( Token
&token
)
269 token
.start_
= current_
;
270 Char c
= getNextChar();
275 token
.type_
= tokenObjectBegin
;
278 token
.type_
= tokenObjectEnd
;
281 token
.type_
= tokenArrayBegin
;
284 token
.type_
= tokenArrayEnd
;
287 token
.type_
= tokenString
;
291 token
.type_
= tokenComment
;
305 token
.type_
= tokenNumber
;
309 token
.type_
= tokenTrue
;
310 ok
= match( "rue", 3 );
313 token
.type_
= tokenFalse
;
314 ok
= match( "alse", 4 );
317 token
.type_
= tokenNull
;
318 ok
= match( "ull", 3 );
321 token
.type_
= tokenArraySeparator
;
324 token
.type_
= tokenMemberSeparator
;
327 token
.type_
= tokenEndOfStream
;
334 token
.type_
= tokenError
;
335 token
.end_
= current_
;
343 while ( current_
!= end_
)
346 if ( c
== ' ' || c
== '\t' || c
== '\r' || c
== '\n' )
355 Reader::match( Location pattern
,
358 if ( end_
- current_
< patternLength
)
360 int index
= patternLength
;
362 if ( current_
[index
] != pattern
[index
] )
364 current_
+= patternLength
;
370 Reader::readComment()
372 Location commentBegin
= current_
- 1;
373 Char c
= getNextChar();
374 bool successful
= false;
376 successful
= readCStyleComment();
378 successful
= readCppStyleComment();
382 if ( collectComments_
)
384 CommentPlacement placement
= commentBefore
;
385 if ( lastValueEnd_
&& !containsNewLine( lastValueEnd_
, commentBegin
) )
387 if ( c
!= '*' || !containsNewLine( commentBegin
, current_
) )
388 placement
= commentAfterOnSameLine
;
391 addComment( commentBegin
, current_
, placement
);
398 Reader::addComment( Location begin
,
400 CommentPlacement placement
)
402 assert( collectComments_
);
403 if ( placement
== commentAfterOnSameLine
)
405 assert( lastValue_
!= 0 );
406 lastValue_
->setComment( std::string( begin
, end
), placement
);
410 if ( !commentsBefore_
.empty() )
411 commentsBefore_
+= "\n";
412 commentsBefore_
+= std::string( begin
, end
);
418 Reader::readCStyleComment()
420 while ( current_
!= end_
)
422 Char c
= getNextChar();
423 if ( c
== '*' && *current_
== '/' )
426 return getNextChar() == '/';
431 Reader::readCppStyleComment()
433 while ( current_
!= end_
)
435 Char c
= getNextChar();
436 if ( c
== '\r' || c
== '\n' )
446 while ( current_
!= end_
)
448 if ( !(*current_
>= '0' && *current_
<= '9') &&
449 !in( *current_
, '.', 'e', 'E', '+', '-' ) )
459 while ( current_
!= end_
)
472 Reader::readObject( Token
&/*tokenStart*/ )
476 currentValue() = Value( objectValue
);
477 while ( readToken( tokenName
) )
479 bool initialTokenOk
= true;
480 while ( tokenName
.type_
== tokenComment
&& initialTokenOk
)
481 initialTokenOk
= readToken( tokenName
);
482 if ( !initialTokenOk
)
484 if ( tokenName
.type_
== tokenObjectEnd
&& name
.empty() ) // empty object
486 if ( tokenName
.type_
!= tokenString
)
490 if ( !decodeString( tokenName
, name
) )
491 return recoverFromError( tokenObjectEnd
);
494 if ( !readToken( colon
) || colon
.type_
!= tokenMemberSeparator
)
496 return addErrorAndRecover( "Missing ':' after object member name",
500 Value
&value
= currentValue()[ name
];
501 nodes_
.push( &value
);
502 bool ok
= readValue();
504 if ( !ok
) // error already set
505 return recoverFromError( tokenObjectEnd
);
508 if ( !readToken( comma
)
509 || ( comma
.type_
!= tokenObjectEnd
&&
510 comma
.type_
!= tokenArraySeparator
&&
511 comma
.type_
!= tokenComment
) )
513 return addErrorAndRecover( "Missing ',' or '}' in object declaration",
517 bool finalizeTokenOk
= true;
518 while ( comma
.type_
== tokenComment
&&
520 finalizeTokenOk
= readToken( comma
);
521 if ( comma
.type_
== tokenObjectEnd
)
524 return addErrorAndRecover( "Missing '}' or object member name",
531 Reader::readArray( Token
&/*tokenStart*/ )
533 currentValue() = Value( arrayValue
);
535 if ( *current_
== ']' ) // empty array
538 readToken( endArray
);
544 Value
&value
= currentValue()[ index
++ ];
545 nodes_
.push( &value
);
546 bool ok
= readValue();
548 if ( !ok
) // error already set
549 return recoverFromError( tokenArrayEnd
);
552 // Accept Comment after last item in the array.
553 ok
= readToken( token
);
554 while ( token
.type_
== tokenComment
&& ok
)
556 ok
= readToken( token
);
558 bool badTokenType
= ( token
.type_
!= tokenArraySeparator
&&
559 token
.type_
!= tokenArrayEnd
);
560 if ( !ok
|| badTokenType
)
562 return addErrorAndRecover( "Missing ',' or ']' in array declaration",
566 if ( token
.type_
== tokenArrayEnd
)
574 Reader::decodeNumber( Token
&token
)
576 bool isDouble
= false;
577 for ( Location inspect
= token
.start_
; inspect
!= token
.end_
; ++inspect
)
580 || in( *inspect
, '.', 'e', 'E', '+' )
581 || ( *inspect
== '-' && inspect
!= token
.start_
);
584 return decodeDouble( token
);
585 // Attempts to parse the number as an integer. If the number is
586 // larger than the maximum supported value of an integer then
587 // we decode the number as a double.
588 Location current
= token
.start_
;
589 bool isNegative
= *current
== '-';
592 Value::LargestUInt maxIntegerValue
= isNegative
? Value::LargestUInt(-Value::minLargestInt
)
593 : Value::maxLargestUInt
;
594 Value::LargestUInt threshold
= maxIntegerValue
/ 10;
595 Value::LargestUInt value
= 0;
596 while ( current
< token
.end_
)
599 if ( c
< '0' || c
> '9' )
600 return addError( "'" + std::string( token
.start_
, token
.end_
) + "' is not a number.", token
);
601 Value::UInt
digit(c
- '0');
602 if ( value
>= threshold
)
604 // We've hit or exceeded the max value divided by 10 (rounded down). If
605 // a) we've only just touched the limit, b) this is the last digit, and
606 // c) it's small enough to fit in that rounding delta, we're okay.
607 // Otherwise treat this number as a double to avoid overflow.
608 if (value
> threshold
||
609 current
!= token
.end_
||
610 digit
> maxIntegerValue
% 10)
612 return decodeDouble( token
);
615 value
= value
* 10 + digit
;
618 currentValue() = -Value::LargestInt( value
);
619 else if ( value
<= Value::LargestUInt(Value::maxInt
) )
620 currentValue() = Value::LargestInt( value
);
622 currentValue() = value
;
628 Reader::decodeDouble( Token
&token
)
631 const int bufferSize
= 32;
633 int length
= int(token
.end_
- token
.start_
);
635 // Sanity check to avoid buffer overflow exploits.
637 return addError( "Unable to parse token length", token
);
640 // Avoid using a string constant for the format control string given to
641 // sscanf, as this can cause hard to debug crashes on OS X. See here for more
644 // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
645 char format
[] = "%lf";
647 if ( length
<= bufferSize
)
649 Char buffer
[bufferSize
+1];
650 memcpy( buffer
, token
.start_
, length
);
652 count
= sscanf( buffer
, format
, &value
);
656 std::string
buffer( token
.start_
, token
.end_
);
657 count
= sscanf( buffer
.c_str(), format
, &value
);
661 return addError( "'" + std::string( token
.start_
, token
.end_
) + "' is not a number.", token
);
662 currentValue() = value
;
668 Reader::decodeString( Token
&token
)
671 if ( !decodeString( token
, decoded
) )
673 currentValue() = decoded
;
679 Reader::decodeString( Token
&token
, std::string
&decoded
)
681 decoded
.reserve( token
.end_
- token
.start_
- 2 );
682 Location current
= token
.start_
+ 1; // skip '"'
683 Location end
= token
.end_
- 1; // do not include '"'
684 while ( current
!= end
)
689 else if ( c
== '\\' )
691 if ( current
== end
)
692 return addError( "Empty escape sequence in string", token
, current
);
693 Char escape
= *current
++;
696 case '"': decoded
+= '"'; break;
697 case '/': decoded
+= '/'; break;
698 case '\\': decoded
+= '\\'; break;
699 case 'b': decoded
+= '\b'; break;
700 case 'f': decoded
+= '\f'; break;
701 case 'n': decoded
+= '\n'; break;
702 case 'r': decoded
+= '\r'; break;
703 case 't': decoded
+= '\t'; break;
706 unsigned int unicode
;
707 if ( !decodeUnicodeCodePoint( token
, current
, end
, unicode
) )
709 decoded
+= codePointToUTF8(unicode
);
713 return addError( "Bad escape sequence in string", token
, current
);
725 Reader::decodeUnicodeCodePoint( Token
&token
,
728 unsigned int &unicode
)
731 if ( !decodeUnicodeEscapeSequence( token
, current
, end
, unicode
) )
733 if (unicode
>= 0xD800 && unicode
<= 0xDBFF)
736 if (end
- current
< 6)
737 return addError( "additional six characters expected to parse unicode surrogate pair.", token
, current
);
738 unsigned int surrogatePair
;
739 if (*(current
++) == '\\' && *(current
++)== 'u')
741 if (decodeUnicodeEscapeSequence( token
, current
, end
, surrogatePair
))
743 unicode
= 0x10000 + ((unicode
& 0x3FF) << 10) + (surrogatePair
& 0x3FF);
749 return addError( "expecting another \\u token to begin the second half of a unicode surrogate pair", token
, current
);
755 Reader::decodeUnicodeEscapeSequence( Token
&token
,
758 unsigned int &unicode
)
760 if ( end
- current
< 4 )
761 return addError( "Bad unicode escape sequence in string: four digits expected.", token
, current
);
763 for ( int index
=0; index
< 4; ++index
)
767 if ( c
>= '0' && c
<= '9' )
769 else if ( c
>= 'a' && c
<= 'f' )
770 unicode
+= c
- 'a' + 10;
771 else if ( c
>= 'A' && c
<= 'F' )
772 unicode
+= c
- 'A' + 10;
774 return addError( "Bad unicode escape sequence in string: hexadecimal digit expected.", token
, current
);
781 Reader::addError( const std::string
&message
,
787 info
.message_
= message
;
789 errors_
.push_back( info
);
795 Reader::recoverFromError( TokenType skipUntilToken
)
797 int errorCount
= int(errors_
.size());
801 if ( !readToken(skip
) )
802 errors_
.resize( errorCount
); // discard errors caused by recovery
803 if ( skip
.type_
== skipUntilToken
|| skip
.type_
== tokenEndOfStream
)
806 errors_
.resize( errorCount
);
812 Reader::addErrorAndRecover( const std::string
&message
,
814 TokenType skipUntilToken
)
816 addError( message
, token
);
817 return recoverFromError( skipUntilToken
);
822 Reader::currentValue()
824 return *(nodes_
.top());
829 Reader::getNextChar()
831 if ( current_
== end_
)
838 Reader::getLocationLineAndColumn( Location location
,
842 Location current
= begin_
;
843 Location lastLineStart
= current
;
845 while ( current
< location
&& current
!= end_
)
850 if ( *current
== '\n' )
852 lastLineStart
= current
;
855 else if ( c
== '\n' )
857 lastLineStart
= current
;
861 // column & line start at 1
862 column
= int(location
- lastLineStart
) + 1;
868 Reader::getLocationLineAndColumn( Location location
) const
871 getLocationLineAndColumn( location
, line
, column
);
872 char buffer
[18+16+16+1];
873 sprintf( buffer
, "Line %d, Column %d", line
, column
);
878 // Deprecated. Preserved for backward compatibility
880 Reader::getFormatedErrorMessages() const
882 return getFormattedErrorMessages();
887 Reader::getFormattedErrorMessages() const
889 std::string formattedMessage
;
890 for ( Errors::const_iterator itError
= errors_
.begin();
891 itError
!= errors_
.end();
894 const ErrorInfo
&error
= *itError
;
895 formattedMessage
+= "* " + getLocationLineAndColumn( error
.token_
.start_
) + "\n";
896 formattedMessage
+= " " + error
.message_
+ "\n";
898 formattedMessage
+= "See " + getLocationLineAndColumn( error
.extra_
) + " for detail.\n";
900 return formattedMessage
;
904 std::istream
& operator>>( std::istream
&sin
, Value
&root
)
907 bool ok
= reader
.parse(sin
, root
, true);
911 "Error from reader: %s",
912 reader
.getFormattedErrorMessages().c_str());
914 JSON_FAIL_MESSAGE("reader error");