1 //===- FormatGen.h - Utilities for custom assembly formats ------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains common classes for building custom assembly format parsers
12 //===----------------------------------------------------------------------===//
14 #ifndef MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_
15 #define MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_
17 #include "mlir/Support/LLVM.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/StringSet.h"
20 #include "llvm/Support/Allocator.h"
21 #include "llvm/Support/CommandLine.h"
22 #include "llvm/Support/SMLoc.h"
32 //===----------------------------------------------------------------------===//
34 //===----------------------------------------------------------------------===//
36 /// This class represents a specific token in the input format.
39 /// Basic token kinds.
45 // Tokens with no info.
61 kw_attr_dict_w_keyword
,
77 // String valued tokens.
84 FormatToken(Kind kind
, StringRef spelling
) : kind(kind
), spelling(spelling
) {}
86 /// Return the bytes that make up this token.
87 StringRef
getSpelling() const { return spelling
; }
89 /// Return the kind of this token.
90 Kind
getKind() const { return kind
; }
92 /// Return a location for this token.
95 /// Returns true if the token is of the given kind.
96 bool is(Kind kind
) { return getKind() == kind
; }
98 /// Return if this token is a keyword.
99 bool isKeyword() const {
100 return getKind() > Kind::keyword_start
&& getKind() < Kind::keyword_end
;
104 /// Discriminator that indicates the kind of token this is.
107 /// A reference to the entire token contents; this is always a pointer into
108 /// a memory buffer owned by the source manager.
112 //===----------------------------------------------------------------------===//
114 //===----------------------------------------------------------------------===//
116 /// This class implements a simple lexer for operation assembly format strings.
119 FormatLexer(llvm::SourceMgr
&mgr
, SMLoc loc
);
121 /// Lex the next token and return it.
122 FormatToken
lexToken();
124 /// Emit an error to the lexer with the given location and message.
125 FormatToken
emitError(SMLoc loc
, const Twine
&msg
);
126 FormatToken
emitError(const char *loc
, const Twine
&msg
);
128 FormatToken
emitErrorAndNote(SMLoc loc
, const Twine
&msg
, const Twine
¬e
);
131 /// Return the next character in the stream.
134 /// Lex an identifier, literal, variable, or string.
135 FormatToken
lexIdentifier(const char *tokStart
);
136 FormatToken
lexLiteral(const char *tokStart
);
137 FormatToken
lexVariable(const char *tokStart
);
138 FormatToken
lexString(const char *tokStart
);
140 /// Create a token with the current pointer and a start pointer.
141 FormatToken
formToken(FormatToken::Kind kind
, const char *tokStart
) {
142 return FormatToken(kind
, StringRef(tokStart
, curPtr
- tokStart
));
145 /// The source manager containing the format string.
146 llvm::SourceMgr
&mgr
;
147 /// Location of the format string.
149 /// Buffer containing the format string.
151 /// Current pointer in the buffer.
155 //===----------------------------------------------------------------------===//
157 //===----------------------------------------------------------------------===//
159 /// This class represents a single format element.
161 /// If you squint and take a close look, you can see the outline of a `Format`
163 class FormatElement
{
165 virtual ~FormatElement();
167 // The top-level kinds of format elements.
168 enum Kind
{ Literal
, String
, Variable
, Whitespace
, Directive
, Optional
};
170 /// Support LLVM-style RTTI.
171 static bool classof(const FormatElement
*el
) { return true; }
173 /// Get the element kind.
174 Kind
getKind() const { return kind
; }
177 /// Create a format element with the given kind.
178 FormatElement(Kind kind
) : kind(kind
) {}
181 /// The kind of the element.
185 /// The base class for all format elements. This class implements common methods
186 /// for LLVM-style RTTI.
187 template <FormatElement::Kind ElementKind
>
188 class FormatElementBase
: public FormatElement
{
190 /// Support LLVM-style RTTI.
191 static bool classof(const FormatElement
*el
) {
192 return ElementKind
== el
->getKind();
196 /// Create a format element with the given kind.
197 FormatElementBase() : FormatElement(ElementKind
) {}
200 /// This class represents a literal element. A literal is either one of the
201 /// supported punctuation characters (e.g. `(` or `,`) or a string literal (e.g.
203 class LiteralElement
: public FormatElementBase
<FormatElement::Literal
> {
205 /// Create a literal element with the given spelling.
206 explicit LiteralElement(StringRef spelling
) : spelling(spelling
) {}
208 /// Get the spelling of the literal.
209 StringRef
getSpelling() const { return spelling
; }
212 /// The spelling of the variable, i.e. the string contained within the
217 /// This class represents a raw string that can contain arbitrary C++ code.
218 class StringElement
: public FormatElementBase
<FormatElement::String
> {
220 /// Create a string element with the given contents.
221 explicit StringElement(std::string value
) : value(std::move(value
)) {}
223 /// Get the value of the string element.
224 StringRef
getValue() const { return value
; }
227 /// The contents of the string.
231 /// This class represents a variable element. A variable refers to some part of
232 /// the object being parsed, e.g. an attribute or operand on an operation or a
233 /// parameter on an attribute.
234 class VariableElement
: public FormatElementBase
<FormatElement::Variable
> {
236 /// These are the kinds of variables.
247 /// Get the kind of variable.
248 Kind
getKind() const { return kind
; }
251 /// Create a variable with a kind.
252 VariableElement(Kind kind
) : kind(kind
) {}
255 /// The kind of variable.
259 /// Base class for variable elements. This class implements common methods for
261 template <VariableElement::Kind VariableKind
>
262 class VariableElementBase
: public VariableElement
{
264 /// An element is of this class if it is a variable and has the same variable
266 static bool classof(const FormatElement
*el
) {
267 if (auto *varEl
= dyn_cast
<VariableElement
>(el
))
268 return VariableKind
== varEl
->getKind();
273 /// Create a variable element with the given variable kind.
274 VariableElementBase() : VariableElement(VariableKind
) {}
277 /// This class represents a whitespace element, e.g. a newline or space. It is a
278 /// literal that is printed but never parsed. When the value is empty, i.e. ``,
279 /// a space is elided where one would have been printed automatically.
280 class WhitespaceElement
: public FormatElementBase
<FormatElement::Whitespace
> {
282 /// Create a whitespace element.
283 explicit WhitespaceElement(StringRef value
) : value(value
) {}
285 /// Get the whitespace value.
286 StringRef
getValue() const { return value
; }
289 /// The value of the whitespace element. Can be empty.
293 class DirectiveElement
: public FormatElementBase
<FormatElement::Directive
> {
295 /// These are the kinds of directives.
312 /// Get the directive kind.
313 Kind
getKind() const { return kind
; }
316 /// Create a directive element with a kind.
317 DirectiveElement(Kind kind
) : kind(kind
) {}
320 /// The directive kind.
324 /// Base class for directive elements. This class implements common methods for
326 template <DirectiveElement::Kind DirectiveKind
>
327 class DirectiveElementBase
: public DirectiveElement
{
329 /// Create a directive element with the specified kind.
330 DirectiveElementBase() : DirectiveElement(DirectiveKind
) {}
332 /// A format element is of this class if it is a directive element and has the
334 static bool classof(const FormatElement
*el
) {
335 if (auto *directiveEl
= dyn_cast
<DirectiveElement
>(el
))
336 return DirectiveKind
== directiveEl
->getKind();
341 /// This class represents a custom format directive that is implemented by the
342 /// user in C++. The directive accepts a list of arguments that is passed to the
344 class CustomDirective
: public DirectiveElementBase
<DirectiveElement::Custom
> {
346 /// Create a custom directive with a name and list of arguments.
347 CustomDirective(StringRef name
, std::vector
<FormatElement
*> &&arguments
)
348 : name(name
), arguments(std::move(arguments
)) {}
350 /// Get the custom directive name.
351 StringRef
getName() const { return name
; }
353 /// Get the arguments to the custom directive.
354 ArrayRef
<FormatElement
*> getArguments() const { return arguments
; }
357 /// The name of the custom directive. The name is used to call two C++
358 /// methods: `parse{name}` and `print{name}` with the given arguments.
360 /// The arguments with which to call the custom functions. These are either
361 /// variables (for which the functions are responsible for populating) or
362 /// references to variables.
363 std::vector
<FormatElement
*> arguments
;
366 /// This class represents a reference directive. This directive can be used to
367 /// reference but not bind a previously bound variable or format object. Its
368 /// current only use is to pass variables as arguments to the custom directive.
369 class RefDirective
: public DirectiveElementBase
<DirectiveElement::Ref
> {
371 /// Create a reference directive with the single referenced child.
372 RefDirective(FormatElement
*arg
) : arg(arg
) {}
374 /// Get the reference argument.
375 FormatElement
*getArg() const { return arg
; }
378 /// The referenced argument.
382 /// This class represents a group of elements that are optionally emitted based
383 /// on an optional variable "anchor" and a group of elements that are emitted
384 /// when the anchor element is not present.
385 class OptionalElement
: public FormatElementBase
<FormatElement::Optional
> {
387 /// Create an optional group with the given child elements.
388 OptionalElement(std::vector
<FormatElement
*> &&thenElements
,
389 std::vector
<FormatElement
*> &&elseElements
,
390 unsigned thenParseStart
, unsigned elseParseStart
,
391 FormatElement
*anchor
, bool inverted
)
392 : thenElements(std::move(thenElements
)),
393 elseElements(std::move(elseElements
)), thenParseStart(thenParseStart
),
394 elseParseStart(elseParseStart
), anchor(anchor
), inverted(inverted
) {}
396 /// Return the `then` elements of the optional group. Drops the first
397 /// `thenParseStart` whitespace elements if `parseable` is true.
398 ArrayRef
<FormatElement
*> getThenElements(bool parseable
= false) const {
399 return llvm::ArrayRef(thenElements
)
400 .drop_front(parseable
? thenParseStart
: 0);
403 /// Return the `else` elements of the optional group. Drops the first
404 /// `elseParseStart` whitespace elements if `parseable` is true.
405 ArrayRef
<FormatElement
*> getElseElements(bool parseable
= false) const {
406 return llvm::ArrayRef(elseElements
)
407 .drop_front(parseable
? elseParseStart
: 0);
410 /// Return the anchor of the optional group.
411 FormatElement
*getAnchor() const { return anchor
; }
413 /// Return true if the optional group is inverted.
414 bool isInverted() const { return inverted
; }
417 /// The child elements emitted when the anchor is present.
418 std::vector
<FormatElement
*> thenElements
;
419 /// The child elements emitted when the anchor is not present.
420 std::vector
<FormatElement
*> elseElements
;
421 /// The index of the first element that is parsed in `thenElements`. That is,
422 /// the first non-whitespace element.
423 unsigned thenParseStart
;
424 /// The index of the first element that is parsed in `elseElements`. That is,
425 /// the first non-whitespace element.
426 unsigned elseParseStart
;
427 /// The anchor element of the optional group.
428 FormatElement
*anchor
;
429 /// Whether the optional group condition is inverted and the anchor element is
430 /// in the else group.
434 //===----------------------------------------------------------------------===//
436 //===----------------------------------------------------------------------===//
438 /// Base class for a parser that implements an assembly format. This class
439 /// defines a common assembly format syntax and the creation of format elements.
440 /// Subclasses will need to implement parsing for the format elements they
445 virtual ~FormatParser();
447 /// Parse the assembly format.
448 FailureOr
<std::vector
<FormatElement
*>> parse();
451 /// The current context of the parser when parsing an element.
453 /// The element is being parsed in a "top-level" context, i.e. at the top of
454 /// the format or in an optional group.
456 /// The element is being parsed as a custom directive child.
457 CustomDirectiveContext
,
458 /// The element is being parsed as a type directive child.
459 TypeDirectiveContext
,
460 /// The element is being parsed as a reference directive child.
462 /// The element is being parsed as a struct directive child.
463 StructDirectiveContext
466 /// Create a format parser with the given source manager and a location.
467 explicit FormatParser(llvm::SourceMgr
&mgr
, llvm::SMLoc loc
)
468 : lexer(mgr
, loc
), curToken(lexer
.lexToken()) {}
470 /// Allocate and construct a format element.
471 template <typename FormatElementT
, typename
... Args
>
472 FormatElementT
*create(Args
&&...args
) {
473 // FormatElementT *ptr = allocator.Allocate<FormatElementT>();
474 // ::new (ptr) FormatElementT(std::forward<Args>(args)...);
476 auto mem
= std::make_unique
<FormatElementT
>(std::forward
<Args
>(args
)...);
477 FormatElementT
*ptr
= mem
.get();
478 allocator
.push_back(std::move(mem
));
482 //===--------------------------------------------------------------------===//
485 /// Parse a single element of any kind.
486 FailureOr
<FormatElement
*> parseElement(Context ctx
);
488 FailureOr
<FormatElement
*> parseLiteral(Context ctx
);
490 FailureOr
<FormatElement
*> parseString(Context ctx
);
491 /// Parse a variable.
492 FailureOr
<FormatElement
*> parseVariable(Context ctx
);
493 /// Parse a directive.
494 FailureOr
<FormatElement
*> parseDirective(Context ctx
);
495 /// Parse an optional group.
496 FailureOr
<FormatElement
*> parseOptionalGroup(Context ctx
);
497 /// Parse a custom directive.
498 FailureOr
<FormatElement
*> parseCustomDirective(llvm::SMLoc loc
, Context ctx
);
499 /// Parse a ref directive.
500 FailureOr
<FormatElement
*> parseRefDirective(SMLoc loc
, Context context
);
501 /// Parse a qualified directive.
502 FailureOr
<FormatElement
*> parseQualifiedDirective(SMLoc loc
, Context ctx
);
504 /// Parse a format-specific variable kind.
505 virtual FailureOr
<FormatElement
*>
506 parseVariableImpl(llvm::SMLoc loc
, StringRef name
, Context ctx
) = 0;
507 /// Parse a format-specific directive kind.
508 virtual FailureOr
<FormatElement
*>
509 parseDirectiveImpl(llvm::SMLoc loc
, FormatToken::Kind kind
, Context ctx
) = 0;
511 //===--------------------------------------------------------------------===//
512 // Format Verification
514 /// Verify that the format is well-formed.
515 virtual LogicalResult
verify(llvm::SMLoc loc
,
516 ArrayRef
<FormatElement
*> elements
) = 0;
517 /// Verify the arguments to a custom directive.
518 virtual LogicalResult
519 verifyCustomDirectiveArguments(llvm::SMLoc loc
,
520 ArrayRef
<FormatElement
*> arguments
) = 0;
521 /// Verify the elements of an optional group.
522 virtual LogicalResult
523 verifyOptionalGroupElements(llvm::SMLoc loc
,
524 ArrayRef
<FormatElement
*> elements
,
525 FormatElement
*anchor
) = 0;
527 /// Mark 'element' as qualified. If 'element' cannot be qualified an error
528 /// should be emitted and failure returned.
529 virtual LogicalResult
markQualified(llvm::SMLoc loc
,
530 FormatElement
*element
) = 0;
532 //===--------------------------------------------------------------------===//
535 /// Emit an error at the given location.
536 LogicalResult
emitError(llvm::SMLoc loc
, const Twine
&msg
) {
537 lexer
.emitError(loc
, msg
);
541 /// Emit an error and a note at the given notation.
542 LogicalResult
emitErrorAndNote(llvm::SMLoc loc
, const Twine
&msg
,
544 lexer
.emitErrorAndNote(loc
, msg
, note
);
548 /// Parse a single token of the expected kind.
549 FailureOr
<FormatToken
> parseToken(FormatToken::Kind kind
, const Twine
&msg
) {
550 if (!curToken
.is(kind
))
551 return emitError(curToken
.getLoc(), msg
);
552 FormatToken tok
= curToken
;
557 /// Advance the lexer to the next token.
558 void consumeToken() {
559 assert(!curToken
.is(FormatToken::eof
) && !curToken
.is(FormatToken::error
) &&
560 "shouldn't advance past EOF or errors");
561 curToken
= lexer
.lexToken();
564 /// Get the current token.
565 FormatToken
peekToken() { return curToken
; }
568 /// The format parser retains ownership of the format elements in a bump
569 /// pointer allocator.
570 // FIXME: FormatElement with `std::vector` need to be converted to use
572 // llvm::BumpPtrAllocator allocator;
573 std::vector
<std::unique_ptr
<FormatElement
>> allocator
;
574 /// The format lexer to use.
576 /// The current token in the lexer.
577 FormatToken curToken
;
580 //===----------------------------------------------------------------------===//
582 //===----------------------------------------------------------------------===//
584 /// Whether a space needs to be emitted before a literal. E.g., two keywords
585 /// back-to-back require a space separator, but a keyword followed by '<' does
586 /// not require a space.
587 bool shouldEmitSpaceBefore(StringRef value
, bool lastWasPunctuation
);
589 /// Returns true if the given string can be formatted as a keyword.
590 bool canFormatStringAsKeyword(StringRef value
,
591 function_ref
<void(Twine
)> emitError
= nullptr);
593 /// Returns true if the given string is valid format literal element.
594 /// If `emitError` is provided, it is invoked with the reason for the failure.
595 bool isValidLiteral(StringRef value
,
596 function_ref
<void(Twine
)> emitError
= nullptr);
598 /// Whether a failure in parsing the assembly format should be a fatal error.
599 extern llvm::cl::opt
<bool> formatErrorIsFatal
;
601 } // namespace tblgen
604 #endif // MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_