1 //===- YAMLParser.h - Simple YAML parser ------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This is a YAML 1.2 parser.
11 // See http://www.yaml.org/spec/1.2/spec.html for the full standard.
13 // This currently does not implement the following:
14 // * Multi-line literal folding.
17 // * BOMs anywhere other than the first Unicode scalar value in the file.
19 // The most important class here is Stream. This represents a YAML stream with
20 // 0, 1, or many documents.
23 // StringRef input = getInput();
24 // yaml::Stream stream(input, sm);
26 // for (yaml::document_iterator di = stream.begin(), de = stream.end();
28 // yaml::Node *n = di->getRoot();
30 // // Do something with n...
35 //===----------------------------------------------------------------------===//
37 #ifndef LLVM_SUPPORT_YAMLPARSER_H
38 #define LLVM_SUPPORT_YAMLPARSER_H
40 #include "llvm/ADT/StringRef.h"
41 #include "llvm/Support/Allocator.h"
42 #include "llvm/Support/SMLoc.h"
49 #include <system_error>
53 class MemoryBufferRef
;
61 class document_iterator
;
66 /// Dump all the tokens in this stream to OS.
67 /// \returns true if there was an error, false otherwise.
68 bool dumpTokens(StringRef Input
, raw_ostream
&);
70 /// Scans all tokens in input without outputting anything. This is used
71 /// for benchmarking the tokenizer.
72 /// \returns true if there was an error, false otherwise.
73 bool scanTokens(StringRef Input
);
75 /// Escape \a Input for a double quoted scalar; if \p EscapePrintable
76 /// is true, all UTF8 sequences will be escaped, if \p EscapePrintable is
77 /// false, those UTF8 sequences encoding printable unicode scalars will not be
78 /// escaped, but emitted verbatim.
79 std::string
escape(StringRef Input
, bool EscapePrintable
= true);
81 /// This class represents a YAML stream potentially containing multiple
85 /// This keeps a reference to the string referenced by \p Input.
86 Stream(StringRef Input
, SourceMgr
&, bool ShowColors
= true,
87 std::error_code
*EC
= nullptr);
89 Stream(MemoryBufferRef InputBuffer
, SourceMgr
&, bool ShowColors
= true,
90 std::error_code
*EC
= nullptr);
93 document_iterator
begin();
94 document_iterator
end();
103 void printError(Node
*N
, const Twine
&Msg
);
106 friend class Document
;
108 std::unique_ptr
<Scanner
> scanner
;
109 std::unique_ptr
<Document
> CurrentDoc
;
112 /// Abstract base class for all Nodes.
114 virtual void anchor();
127 Node(unsigned int Type
, std::unique_ptr
<Document
> &, StringRef Anchor
,
130 // It's not safe to copy YAML nodes; the document is streamed and the position
131 // is part of the state.
132 Node(const Node
&) = delete;
133 void operator=(const Node
&) = delete;
135 void *operator new(size_t Size
, BumpPtrAllocator
&Alloc
,
136 size_t Alignment
= 16) noexcept
{
137 return Alloc
.Allocate(Size
, Alignment
);
140 void operator delete(void *Ptr
, BumpPtrAllocator
&Alloc
,
141 size_t Size
) noexcept
{
142 Alloc
.Deallocate(Ptr
, Size
);
145 void operator delete(void *) noexcept
= delete;
147 /// Get the value of the anchor attached to this node. If it does not
148 /// have one, getAnchor().size() will be 0.
149 StringRef
getAnchor() const { return Anchor
; }
151 /// Get the tag as it was written in the document. This does not
152 /// perform tag resolution.
153 StringRef
getRawTag() const { return Tag
; }
155 /// Get the verbatium tag for a given Node. This performs tag resoluton
156 /// and substitution.
157 std::string
getVerbatimTag() const;
159 SMRange
getSourceRange() const { return SourceRange
; }
160 void setSourceRange(SMRange SR
) { SourceRange
= SR
; }
162 // These functions forward to Document and Scanner.
165 Node
*parseBlockNode();
166 BumpPtrAllocator
&getAllocator();
167 void setError(const Twine
&Message
, Token
&Location
) const;
170 virtual void skip() {}
172 unsigned int getType() const { return TypeID
; }
175 std::unique_ptr
<Document
> &Doc
;
183 /// The tag as typed in the document.
191 class NullNode final
: public Node
{
192 void anchor() override
;
195 NullNode(std::unique_ptr
<Document
> &D
)
196 : Node(NK_Null
, D
, StringRef(), StringRef()) {}
198 static bool classof(const Node
*N
) { return N
->getType() == NK_Null
; }
201 /// A scalar node is an opaque datum that can be presented as a
202 /// series of zero or more Unicode scalar values.
206 class ScalarNode final
: public Node
{
207 void anchor() override
;
210 ScalarNode(std::unique_ptr
<Document
> &D
, StringRef Anchor
, StringRef Tag
,
212 : Node(NK_Scalar
, D
, Anchor
, Tag
), Value(Val
) {
213 SMLoc Start
= SMLoc::getFromPointer(Val
.begin());
214 SMLoc End
= SMLoc::getFromPointer(Val
.end());
215 SourceRange
= SMRange(Start
, End
);
218 // Return Value without any escaping or folding or other fun YAML stuff. This
219 // is the exact bytes that are contained in the file (after conversion to
221 StringRef
getRawValue() const { return Value
; }
223 /// Gets the value of this node as a StringRef.
225 /// \param Storage is used to store the content of the returned StringRef iff
226 /// it requires any modification from how it appeared in the source.
227 /// This happens with escaped characters and multi-line literals.
228 StringRef
getValue(SmallVectorImpl
<char> &Storage
) const;
230 static bool classof(const Node
*N
) {
231 return N
->getType() == NK_Scalar
;
237 StringRef
unescapeDoubleQuoted(StringRef UnquotedValue
,
238 StringRef::size_type Start
,
239 SmallVectorImpl
<char> &Storage
) const;
242 /// A block scalar node is an opaque datum that can be presented as a
243 /// series of zero or more Unicode scalar values.
249 class BlockScalarNode final
: public Node
{
250 void anchor() override
;
253 BlockScalarNode(std::unique_ptr
<Document
> &D
, StringRef Anchor
, StringRef Tag
,
254 StringRef Value
, StringRef RawVal
)
255 : Node(NK_BlockScalar
, D
, Anchor
, Tag
), Value(Value
) {
256 SMLoc Start
= SMLoc::getFromPointer(RawVal
.begin());
257 SMLoc End
= SMLoc::getFromPointer(RawVal
.end());
258 SourceRange
= SMRange(Start
, End
);
261 /// Gets the value of this node as a StringRef.
262 StringRef
getValue() const { return Value
; }
264 static bool classof(const Node
*N
) {
265 return N
->getType() == NK_BlockScalar
;
272 /// A key and value pair. While not technically a Node under the YAML
273 /// representation graph, it is easier to treat them this way.
275 /// TODO: Consider making this not a child of Node.
279 class KeyValueNode final
: public Node
{
280 void anchor() override
;
283 KeyValueNode(std::unique_ptr
<Document
> &D
)
284 : Node(NK_KeyValue
, D
, StringRef(), StringRef()) {}
286 /// Parse and return the key.
288 /// This may be called multiple times.
290 /// \returns The key, or nullptr if failed() == true.
293 /// Parse and return the value.
295 /// This may be called multiple times.
297 /// \returns The value, or nullptr if failed() == true.
300 void skip() override
{
301 if (Node
*Key
= getKey()) {
303 if (Node
*Val
= getValue())
308 static bool classof(const Node
*N
) {
309 return N
->getType() == NK_KeyValue
;
314 Node
*Value
= nullptr;
317 /// This is an iterator abstraction over YAML collections shared by both
318 /// sequences and maps.
320 /// BaseT must have a ValueT* member named CurrentEntry and a member function
321 /// increment() which must set CurrentEntry to 0 to create an end iterator.
322 template <class BaseT
, class ValueT
>
323 class basic_collection_iterator
324 : public std::iterator
<std::input_iterator_tag
, ValueT
> {
326 basic_collection_iterator() = default;
327 basic_collection_iterator(BaseT
*B
) : Base(B
) {}
329 ValueT
*operator->() const {
330 assert(Base
&& Base
->CurrentEntry
&& "Attempted to access end iterator!");
331 return Base
->CurrentEntry
;
334 ValueT
&operator*() const {
335 assert(Base
&& Base
->CurrentEntry
&&
336 "Attempted to dereference end iterator!");
337 return *Base
->CurrentEntry
;
340 operator ValueT
*() const {
341 assert(Base
&& Base
->CurrentEntry
&& "Attempted to access end iterator!");
342 return Base
->CurrentEntry
;
345 /// Note on EqualityComparable:
347 /// The iterator is not re-entrant,
348 /// it is meant to be used for parsing YAML on-demand
349 /// Once iteration started - it can point only to one entry at a time
350 /// hence Base.CurrentEntry and Other.Base.CurrentEntry are equal
351 /// iff Base and Other.Base are equal.
352 bool operator==(const basic_collection_iterator
&Other
) const {
353 if (Base
&& (Base
== Other
.Base
)) {
354 assert((Base
->CurrentEntry
== Other
.Base
->CurrentEntry
)
355 && "Equal Bases expected to point to equal Entries");
358 return Base
== Other
.Base
;
361 bool operator!=(const basic_collection_iterator
&Other
) const {
362 return !(Base
== Other
.Base
);
365 basic_collection_iterator
&operator++() {
366 assert(Base
&& "Attempted to advance iterator past end!");
368 // Create an end iterator.
369 if (!Base
->CurrentEntry
)
375 BaseT
*Base
= nullptr;
378 // The following two templates are used for both MappingNode and Sequence Node.
379 template <class CollectionType
>
380 typename
CollectionType::iterator
begin(CollectionType
&C
) {
381 assert(C
.IsAtBeginning
&& "You may only iterate over a collection once!");
382 C
.IsAtBeginning
= false;
383 typename
CollectionType::iterator
ret(&C
);
388 template <class CollectionType
> void skip(CollectionType
&C
) {
389 // TODO: support skipping from the middle of a parsed collection ;/
390 assert((C
.IsAtBeginning
|| C
.IsAtEnd
) && "Cannot skip mid parse!");
392 for (typename
CollectionType::iterator i
= begin(C
), e
= C
.end(); i
!= e
;
397 /// Represents a YAML map created from either a block map for a flow map.
399 /// This parses the YAML stream as increment() is called.
404 class MappingNode final
: public Node
{
405 void anchor() override
;
411 MT_Inline
///< An inline mapping node is used for "[key: value]".
414 MappingNode(std::unique_ptr
<Document
> &D
, StringRef Anchor
, StringRef Tag
,
416 : Node(NK_Mapping
, D
, Anchor
, Tag
), Type(MT
) {}
418 friend class basic_collection_iterator
<MappingNode
, KeyValueNode
>;
420 using iterator
= basic_collection_iterator
<MappingNode
, KeyValueNode
>;
422 template <class T
> friend typename
T::iterator
yaml::begin(T
&);
423 template <class T
> friend void yaml::skip(T
&);
425 iterator
begin() { return yaml::begin(*this); }
427 iterator
end() { return iterator(); }
429 void skip() override
{ yaml::skip(*this); }
431 static bool classof(const Node
*N
) {
432 return N
->getType() == NK_Mapping
;
437 bool IsAtBeginning
= true;
438 bool IsAtEnd
= false;
439 KeyValueNode
*CurrentEntry
= nullptr;
444 /// Represents a YAML sequence created from either a block sequence for a
447 /// This parses the YAML stream as increment() is called.
452 class SequenceNode final
: public Node
{
453 void anchor() override
;
465 // As a BlockMappingEntry and BlockEnd are not created in this case.
469 SequenceNode(std::unique_ptr
<Document
> &D
, StringRef Anchor
, StringRef Tag
,
471 : Node(NK_Sequence
, D
, Anchor
, Tag
), SeqType(ST
) {}
473 friend class basic_collection_iterator
<SequenceNode
, Node
>;
475 using iterator
= basic_collection_iterator
<SequenceNode
, Node
>;
477 template <class T
> friend typename
T::iterator
yaml::begin(T
&);
478 template <class T
> friend void yaml::skip(T
&);
482 iterator
begin() { return yaml::begin(*this); }
484 iterator
end() { return iterator(); }
486 void skip() override
{ yaml::skip(*this); }
488 static bool classof(const Node
*N
) {
489 return N
->getType() == NK_Sequence
;
493 SequenceType SeqType
;
494 bool IsAtBeginning
= true;
495 bool IsAtEnd
= false;
496 bool WasPreviousTokenFlowEntry
= true; // Start with an imaginary ','.
497 Node
*CurrentEntry
= nullptr;
500 /// Represents an alias to a Node with an anchor.
504 class AliasNode final
: public Node
{
505 void anchor() override
;
508 AliasNode(std::unique_ptr
<Document
> &D
, StringRef Val
)
509 : Node(NK_Alias
, D
, StringRef(), StringRef()), Name(Val
) {}
511 StringRef
getName() const { return Name
; }
514 static bool classof(const Node
*N
) { return N
->getType() == NK_Alias
; }
520 /// A YAML Stream is a sequence of Documents. A document contains a root
524 Document(Stream
&ParentStream
);
526 /// Root for parsing a node. Returns a single node.
527 Node
*parseBlockNode();
529 /// Finish parsing the current document and return true if there are
530 /// more. Return false otherwise.
533 /// Parse and return the root level node.
537 return Root
= parseBlockNode();
540 const std::map
<StringRef
, StringRef
> &getTagMap() const { return TagMap
; }
544 friend class document_iterator
;
546 /// Stream to read tokens from.
549 /// Used to allocate nodes to. All are destroyed without calling their
550 /// destructor when the document is destroyed.
551 BumpPtrAllocator NodeAllocator
;
553 /// The root node. Used to support skipping a partially parsed
557 /// Maps tag prefixes to their expansion.
558 std::map
<StringRef
, StringRef
> TagMap
;
562 void setError(const Twine
&Message
, Token
&Location
) const;
565 /// Parse %BLAH directives and return true if any were encountered.
566 bool parseDirectives();
569 void parseYAMLDirective();
572 void parseTAGDirective();
574 /// Consume the next token and error if it is not \a TK.
575 bool expectToken(int TK
);
578 /// Iterator abstraction for Documents over a Stream.
579 class document_iterator
{
581 document_iterator() = default;
582 document_iterator(std::unique_ptr
<Document
> &D
) : Doc(&D
) {}
584 bool operator==(const document_iterator
&Other
) const {
585 if (isAtEnd() || Other
.isAtEnd())
586 return isAtEnd() && Other
.isAtEnd();
588 return Doc
== Other
.Doc
;
590 bool operator!=(const document_iterator
&Other
) const {
591 return !(*this == Other
);
594 document_iterator
operator++() {
595 assert(Doc
&& "incrementing iterator past the end.");
596 if (!(*Doc
)->skip()) {
599 Stream
&S
= (*Doc
)->stream
;
600 Doc
->reset(new Document(S
));
605 Document
&operator*() { return *Doc
->get(); }
607 std::unique_ptr
<Document
> &operator->() { return *Doc
; }
610 bool isAtEnd() const { return !Doc
|| !*Doc
; }
612 std::unique_ptr
<Document
> *Doc
= nullptr;
615 } // end namespace yaml
617 } // end namespace llvm
619 #endif // LLVM_SUPPORT_YAMLPARSER_H