3 #include "yaml-cpp/exceptions.h"
10 Scanner::Scanner(std::istream
& in
)
11 : INPUT(in
), m_startedStream(false), m_endedStream(false), m_simpleKeyAllowed(false), m_canBeJSONFlow(false)
20 // . Returns true if there are no more tokens to be read
23 EnsureTokensInQueue();
24 return m_tokens
.empty();
28 // . Simply removes the next token on the queue.
31 EnsureTokensInQueue();
37 // . Returns (but does not remove) the next token on the queue.
38 Token
& Scanner::peek()
40 EnsureTokensInQueue();
41 assert(!m_tokens
.empty()); // should we be asserting here? I mean, we really just be checking
42 // if it's empty before peeking.
45 static Token
*pLast
= 0;
46 if(pLast
!= &m_tokens
.front())
47 std::cerr
<< "peek: " << m_tokens
.front() << "\n";
48 pLast
= &m_tokens
.front();
51 return m_tokens
.front();
54 // EnsureTokensInQueue
55 // . Scan until there's a valid token at the front of the queue,
56 // or we're sure the queue is empty.
57 void Scanner::EnsureTokensInQueue()
60 if(!m_tokens
.empty()) {
61 Token
& token
= m_tokens
.front();
63 // if this guy's valid, then we're done
64 if(token
.status
== Token::VALID
)
67 // here's where we clean up the impossible tokens
68 if(token
.status
== Token::INVALID
) {
73 // note: what's left are the unverified tokens
76 // no token? maybe we've actually finished
86 // . The main scanning function; here we branch out and
87 // scan whatever the next token should be.
88 void Scanner::ScanNextToken()
96 // get rid of whitespace, etc. (in between tokens it should be irrelevent)
99 // maybe need to end some blocks
103 // And now branch based on the next few characters!
110 if(INPUT
.column() == 0 && INPUT
.peek() == Keys::Directive
)
111 return ScanDirective();
114 if(INPUT
.column() == 0 && Exp::DocStart().Matches(INPUT
))
115 return ScanDocStart();
117 if(INPUT
.column() == 0 && Exp::DocEnd().Matches(INPUT
))
120 // flow start/end/entry
121 if(INPUT
.peek() == Keys::FlowSeqStart
|| INPUT
.peek() == Keys::FlowMapStart
)
122 return ScanFlowStart();
124 if(INPUT
.peek() == Keys::FlowSeqEnd
|| INPUT
.peek() == Keys::FlowMapEnd
)
125 return ScanFlowEnd();
127 if(INPUT
.peek() == Keys::FlowEntry
)
128 return ScanFlowEntry();
131 if(Exp::BlockEntry().Matches(INPUT
))
132 return ScanBlockEntry();
134 if((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT
))
137 if(GetValueRegex().Matches(INPUT
))
141 if(INPUT
.peek() == Keys::Alias
|| INPUT
.peek() == Keys::Anchor
)
142 return ScanAnchorOrAlias();
145 if(INPUT
.peek() == Keys::Tag
)
149 if(InBlockContext() && (INPUT
.peek() == Keys::LiteralScalar
|| INPUT
.peek() == Keys::FoldedScalar
))
150 return ScanBlockScalar();
152 if(INPUT
.peek() == '\'' || INPUT
.peek() == '\"')
153 return ScanQuotedScalar();
156 if((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow()).Matches(INPUT
))
157 return ScanPlainScalar();
159 // don't know what it is!
160 throw ParserException(INPUT
.mark(), ErrorMsg::UNKNOWN_TOKEN
);
164 // . Eats input until we reach the next token-like thing.
165 void Scanner::ScanToNextToken()
168 // first eat whitespace
169 while(INPUT
&& IsWhitespaceToBeEaten(INPUT
.peek())) {
170 if(InBlockContext() && Exp::Tab().Matches(INPUT
))
171 m_simpleKeyAllowed
= false;
175 // then eat a comment
176 if(Exp::Comment().Matches(INPUT
)) {
177 // eat until line break
178 while(INPUT
&& !Exp::Break().Matches(INPUT
))
182 // if it's NOT a line break, then we're done!
183 if(!Exp::Break().Matches(INPUT
))
186 // otherwise, let's eat the line break and keep going
187 int n
= Exp::Break().Match(INPUT
);
190 // oh yeah, and let's get rid of that simple key
191 InvalidateSimpleKey();
193 // new line - we may be able to accept a simple key now
195 m_simpleKeyAllowed
= true;
199 ///////////////////////////////////////////////////////////////////////
202 // IsWhitespaceToBeEaten
203 // . We can eat whitespace if it's a space or tab
204 // . Note: originally tabs in block context couldn't be eaten
205 // "where a simple key could be allowed
206 // (i.e., not at the beginning of a line, or following '-', '?', or ':')"
207 // I think this is wrong, since tabs can be non-content whitespace; it's just
208 // that they can't contribute to indentation, so once you've seen a tab in a
209 // line, you can't start a simple key
210 bool Scanner::IsWhitespaceToBeEaten(char ch
)
222 // . Get the appropriate regex to check if it's a value token
223 const RegEx
& Scanner::GetValueRegex() const
228 return m_canBeJSONFlow
? Exp::ValueInJSONFlow() : Exp::ValueInFlow();
232 // . Set the initial conditions for starting a stream.
233 void Scanner::StartStream()
235 m_startedStream
= true;
236 m_simpleKeyAllowed
= true;
237 std::auto_ptr
<IndentMarker
> pIndent(new IndentMarker(-1, IndentMarker::NONE
));
238 m_indentRefs
.push_back(pIndent
);
239 m_indents
.push(&m_indentRefs
.back());
243 // . Close out the stream, finish up, etc.
244 void Scanner::EndStream()
247 if(INPUT
.column() > 0)
253 m_simpleKeyAllowed
= false;
254 m_endedStream
= true;
257 Token
*Scanner::PushToken(Token::TYPE type
)
259 m_tokens
.push(Token(type
, INPUT
.mark()));
260 return &m_tokens
.back();
263 Token::TYPE
Scanner::GetStartTokenFor(IndentMarker::INDENT_TYPE type
) const
266 case IndentMarker::SEQ
: return Token::BLOCK_SEQ_START
;
267 case IndentMarker::MAP
: return Token::BLOCK_MAP_START
;
268 case IndentMarker::NONE
: assert(false); break;
271 throw std::runtime_error("yaml-cpp: internal error, invalid indent type");
275 // . Pushes an indentation onto the stack, and enqueues the
276 // proper token (sequence start or mapping start).
277 // . Returns the indent marker it generates (if any).
278 Scanner::IndentMarker
*Scanner::PushIndentTo(int column
, IndentMarker::INDENT_TYPE type
)
284 std::auto_ptr
<IndentMarker
> pIndent(new IndentMarker(column
, type
));
285 IndentMarker
& indent
= *pIndent
;
286 const IndentMarker
& lastIndent
= *m_indents
.top();
288 // is this actually an indentation?
289 if(indent
.column
< lastIndent
.column
)
291 if(indent
.column
== lastIndent
.column
&& !(indent
.type
== IndentMarker::SEQ
&& lastIndent
.type
== IndentMarker::MAP
))
294 // push a start token
295 indent
.pStartToken
= PushToken(GetStartTokenFor(type
));
297 // and then the indent
298 m_indents
.push(&indent
);
299 m_indentRefs
.push_back(pIndent
);
300 return &m_indentRefs
.back();
304 // . Pops indentations off the stack until we reach the current indentation level,
305 // and enqueues the proper token each time.
306 // . Then pops all invalid indentations off.
307 void Scanner::PopIndentToHere()
314 while(!m_indents
.empty()) {
315 const IndentMarker
& indent
= *m_indents
.top();
316 if(indent
.column
< INPUT
.column())
318 if(indent
.column
== INPUT
.column() && !(indent
.type
== IndentMarker::SEQ
&& !Exp::BlockEntry().Matches(INPUT
)))
324 while(!m_indents
.empty() && m_indents
.top()->status
== IndentMarker::INVALID
)
329 // . Pops all indentations (except for the base empty one) off the stack,
330 // and enqueues the proper token each time.
331 void Scanner::PopAllIndents()
338 while(!m_indents
.empty()) {
339 const IndentMarker
& indent
= *m_indents
.top();
340 if(indent
.type
== IndentMarker::NONE
)
348 // . Pops a single indent, pushing the proper token
349 void Scanner::PopIndent()
351 const IndentMarker
& indent
= *m_indents
.top();
354 if(indent
.status
!= IndentMarker::VALID
) {
355 InvalidateSimpleKey();
359 if(indent
.type
== IndentMarker::SEQ
)
360 m_tokens
.push(Token(Token::BLOCK_SEQ_END
, INPUT
.mark()));
361 else if(indent
.type
== IndentMarker::MAP
)
362 m_tokens
.push(Token(Token::BLOCK_MAP_END
, INPUT
.mark()));
366 int Scanner::GetTopIndent() const
368 if(m_indents
.empty())
370 return m_indents
.top()->column
;
373 // ThrowParserException
374 // . Throws a ParserException with the current token location
376 // . Does not parse any more tokens.
377 void Scanner::ThrowParserException(const std::string
& msg
) const
379 Mark mark
= Mark::null();
380 if(!m_tokens
.empty()) {
381 const Token
& token
= m_tokens
.front();
384 throw ParserException(mark
, msg
);