1 //===- FormatGen.h - Utilities for custom assembly formats ------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains common classes for building custom assembly format parsers
12 //===----------------------------------------------------------------------===//
14 #ifndef MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_
15 #define MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_
17 #include "mlir/Support/LLVM.h"
18 #include "mlir/Support/LogicalResult.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/StringSet.h"
21 #include "llvm/Support/Allocator.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/SMLoc.h"
33 //===----------------------------------------------------------------------===//
35 //===----------------------------------------------------------------------===//
37 /// This class represents a specific token in the input format.
40 /// Basic token kinds.
46 // Tokens with no info.
62 kw_attr_dict_w_keyword
,
78 // String valued tokens.
85 FormatToken(Kind kind
, StringRef spelling
) : kind(kind
), spelling(spelling
) {}
87 /// Return the bytes that make up this token.
88 StringRef
getSpelling() const { return spelling
; }
90 /// Return the kind of this token.
91 Kind
getKind() const { return kind
; }
93 /// Return a location for this token.
96 /// Returns true if the token is of the given kind.
97 bool is(Kind kind
) { return getKind() == kind
; }
99 /// Return if this token is a keyword.
100 bool isKeyword() const {
101 return getKind() > Kind::keyword_start
&& getKind() < Kind::keyword_end
;
105 /// Discriminator that indicates the kind of token this is.
108 /// A reference to the entire token contents; this is always a pointer into
109 /// a memory buffer owned by the source manager.
113 //===----------------------------------------------------------------------===//
115 //===----------------------------------------------------------------------===//
117 /// This class implements a simple lexer for operation assembly format strings.
120 FormatLexer(llvm::SourceMgr
&mgr
, SMLoc loc
);
122 /// Lex the next token and return it.
123 FormatToken
lexToken();
125 /// Emit an error to the lexer with the given location and message.
126 FormatToken
emitError(SMLoc loc
, const Twine
&msg
);
127 FormatToken
emitError(const char *loc
, const Twine
&msg
);
129 FormatToken
emitErrorAndNote(SMLoc loc
, const Twine
&msg
, const Twine
¬e
);
132 /// Return the next character in the stream.
135 /// Lex an identifier, literal, variable, or string.
136 FormatToken
lexIdentifier(const char *tokStart
);
137 FormatToken
lexLiteral(const char *tokStart
);
138 FormatToken
lexVariable(const char *tokStart
);
139 FormatToken
lexString(const char *tokStart
);
141 /// Create a token with the current pointer and a start pointer.
142 FormatToken
formToken(FormatToken::Kind kind
, const char *tokStart
) {
143 return FormatToken(kind
, StringRef(tokStart
, curPtr
- tokStart
));
146 /// The source manager containing the format string.
147 llvm::SourceMgr
&mgr
;
148 /// Location of the format string.
150 /// Buffer containing the format string.
152 /// Current pointer in the buffer.
156 //===----------------------------------------------------------------------===//
158 //===----------------------------------------------------------------------===//
160 /// This class represents a single format element.
162 /// If you squint and take a close look, you can see the outline of a `Format`
164 class FormatElement
{
166 virtual ~FormatElement();
168 // The top-level kinds of format elements.
169 enum Kind
{ Literal
, String
, Variable
, Whitespace
, Directive
, Optional
};
171 /// Support LLVM-style RTTI.
172 static bool classof(const FormatElement
*el
) { return true; }
174 /// Get the element kind.
175 Kind
getKind() const { return kind
; }
178 /// Create a format element with the given kind.
179 FormatElement(Kind kind
) : kind(kind
) {}
182 /// The kind of the element.
186 /// The base class for all format elements. This class implements common methods
187 /// for LLVM-style RTTI.
188 template <FormatElement::Kind ElementKind
>
189 class FormatElementBase
: public FormatElement
{
191 /// Support LLVM-style RTTI.
192 static bool classof(const FormatElement
*el
) {
193 return ElementKind
== el
->getKind();
197 /// Create a format element with the given kind.
198 FormatElementBase() : FormatElement(ElementKind
) {}
201 /// This class represents a literal element. A literal is either one of the
202 /// supported punctuation characters (e.g. `(` or `,`) or a string literal (e.g.
204 class LiteralElement
: public FormatElementBase
<FormatElement::Literal
> {
206 /// Create a literal element with the given spelling.
207 explicit LiteralElement(StringRef spelling
) : spelling(spelling
) {}
209 /// Get the spelling of the literal.
210 StringRef
getSpelling() const { return spelling
; }
213 /// The spelling of the variable, i.e. the string contained within the
218 /// This class represents a raw string that can contain arbitrary C++ code.
219 class StringElement
: public FormatElementBase
<FormatElement::String
> {
221 /// Create a string element with the given contents.
222 explicit StringElement(std::string value
) : value(std::move(value
)) {}
224 /// Get the value of the string element.
225 StringRef
getValue() const { return value
; }
228 /// The contents of the string.
232 /// This class represents a variable element. A variable refers to some part of
233 /// the object being parsed, e.g. an attribute or operand on an operation or a
234 /// parameter on an attribute.
235 class VariableElement
: public FormatElementBase
<FormatElement::Variable
> {
237 /// These are the kinds of variables.
248 /// Get the kind of variable.
249 Kind
getKind() const { return kind
; }
252 /// Create a variable with a kind.
253 VariableElement(Kind kind
) : kind(kind
) {}
256 /// The kind of variable.
260 /// Base class for variable elements. This class implements common methods for
262 template <VariableElement::Kind VariableKind
>
263 class VariableElementBase
: public VariableElement
{
265 /// An element is of this class if it is a variable and has the same variable
267 static bool classof(const FormatElement
*el
) {
268 if (auto *varEl
= dyn_cast
<VariableElement
>(el
))
269 return VariableKind
== varEl
->getKind();
274 /// Create a variable element with the given variable kind.
275 VariableElementBase() : VariableElement(VariableKind
) {}
278 /// This class represents a whitespace element, e.g. a newline or space. It is a
279 /// literal that is printed but never parsed. When the value is empty, i.e. ``,
280 /// a space is elided where one would have been printed automatically.
281 class WhitespaceElement
: public FormatElementBase
<FormatElement::Whitespace
> {
283 /// Create a whitespace element.
284 explicit WhitespaceElement(StringRef value
) : value(value
) {}
286 /// Get the whitespace value.
287 StringRef
getValue() const { return value
; }
290 /// The value of the whitespace element. Can be empty.
294 class DirectiveElement
: public FormatElementBase
<FormatElement::Directive
> {
296 /// These are the kinds of directives.
313 /// Get the directive kind.
314 Kind
getKind() const { return kind
; }
317 /// Create a directive element with a kind.
318 DirectiveElement(Kind kind
) : kind(kind
) {}
321 /// The directive kind.
325 /// Base class for directive elements. This class implements common methods for
327 template <DirectiveElement::Kind DirectiveKind
>
328 class DirectiveElementBase
: public DirectiveElement
{
330 /// Create a directive element with the specified kind.
331 DirectiveElementBase() : DirectiveElement(DirectiveKind
) {}
333 /// A format element is of this class if it is a directive element and has the
335 static bool classof(const FormatElement
*el
) {
336 if (auto *directiveEl
= dyn_cast
<DirectiveElement
>(el
))
337 return DirectiveKind
== directiveEl
->getKind();
342 /// This class represents a custom format directive that is implemented by the
343 /// user in C++. The directive accepts a list of arguments that is passed to the
345 class CustomDirective
: public DirectiveElementBase
<DirectiveElement::Custom
> {
347 /// Create a custom directive with a name and list of arguments.
348 CustomDirective(StringRef name
, std::vector
<FormatElement
*> &&arguments
)
349 : name(name
), arguments(std::move(arguments
)) {}
351 /// Get the custom directive name.
352 StringRef
getName() const { return name
; }
354 /// Get the arguments to the custom directive.
355 ArrayRef
<FormatElement
*> getArguments() const { return arguments
; }
358 /// The name of the custom directive. The name is used to call two C++
359 /// methods: `parse{name}` and `print{name}` with the given arguments.
361 /// The arguments with which to call the custom functions. These are either
362 /// variables (for which the functions are responsible for populating) or
363 /// references to variables.
364 std::vector
<FormatElement
*> arguments
;
367 /// This class represents a reference directive. This directive can be used to
368 /// reference but not bind a previously bound variable or format object. Its
369 /// current only use is to pass variables as arguments to the custom directive.
370 class RefDirective
: public DirectiveElementBase
<DirectiveElement::Ref
> {
372 /// Create a reference directive with the single referenced child.
373 RefDirective(FormatElement
*arg
) : arg(arg
) {}
375 /// Get the reference argument.
376 FormatElement
*getArg() const { return arg
; }
379 /// The referenced argument.
383 /// This class represents a group of elements that are optionally emitted based
384 /// on an optional variable "anchor" and a group of elements that are emitted
385 /// when the anchor element is not present.
386 class OptionalElement
: public FormatElementBase
<FormatElement::Optional
> {
388 /// Create an optional group with the given child elements.
389 OptionalElement(std::vector
<FormatElement
*> &&thenElements
,
390 std::vector
<FormatElement
*> &&elseElements
,
391 unsigned thenParseStart
, unsigned elseParseStart
,
392 FormatElement
*anchor
, bool inverted
)
393 : thenElements(std::move(thenElements
)),
394 elseElements(std::move(elseElements
)), thenParseStart(thenParseStart
),
395 elseParseStart(elseParseStart
), anchor(anchor
), inverted(inverted
) {}
397 /// Return the `then` elements of the optional group. Drops the first
398 /// `thenParseStart` whitespace elements if `parseable` is true.
399 ArrayRef
<FormatElement
*> getThenElements(bool parseable
= false) const {
400 return llvm::ArrayRef(thenElements
)
401 .drop_front(parseable
? thenParseStart
: 0);
404 /// Return the `else` elements of the optional group. Drops the first
405 /// `elseParseStart` whitespace elements if `parseable` is true.
406 ArrayRef
<FormatElement
*> getElseElements(bool parseable
= false) const {
407 return llvm::ArrayRef(elseElements
)
408 .drop_front(parseable
? elseParseStart
: 0);
411 /// Return the anchor of the optional group.
412 FormatElement
*getAnchor() const { return anchor
; }
414 /// Return true if the optional group is inverted.
415 bool isInverted() const { return inverted
; }
418 /// The child elements emitted when the anchor is present.
419 std::vector
<FormatElement
*> thenElements
;
420 /// The child elements emitted when the anchor is not present.
421 std::vector
<FormatElement
*> elseElements
;
422 /// The index of the first element that is parsed in `thenElements`. That is,
423 /// the first non-whitespace element.
424 unsigned thenParseStart
;
425 /// The index of the first element that is parsed in `elseElements`. That is,
426 /// the first non-whitespace element.
427 unsigned elseParseStart
;
428 /// The anchor element of the optional group.
429 FormatElement
*anchor
;
430 /// Whether the optional group condition is inverted and the anchor element is
431 /// in the else group.
435 //===----------------------------------------------------------------------===//
437 //===----------------------------------------------------------------------===//
439 /// Base class for a parser that implements an assembly format. This class
440 /// defines a common assembly format syntax and the creation of format elements.
441 /// Subclasses will need to implement parsing for the format elements they
446 virtual ~FormatParser();
448 /// Parse the assembly format.
449 FailureOr
<std::vector
<FormatElement
*>> parse();
452 /// The current context of the parser when parsing an element.
454 /// The element is being parsed in a "top-level" context, i.e. at the top of
455 /// the format or in an optional group.
457 /// The element is being parsed as a custom directive child.
458 CustomDirectiveContext
,
459 /// The element is being parsed as a type directive child.
460 TypeDirectiveContext
,
461 /// The element is being parsed as a reference directive child.
463 /// The element is being parsed as a struct directive child.
464 StructDirectiveContext
467 /// Create a format parser with the given source manager and a location.
468 explicit FormatParser(llvm::SourceMgr
&mgr
, llvm::SMLoc loc
)
469 : lexer(mgr
, loc
), curToken(lexer
.lexToken()) {}
471 /// Allocate and construct a format element.
472 template <typename FormatElementT
, typename
... Args
>
473 FormatElementT
*create(Args
&&...args
) {
474 // FormatElementT *ptr = allocator.Allocate<FormatElementT>();
475 // ::new (ptr) FormatElementT(std::forward<Args>(args)...);
477 auto mem
= std::make_unique
<FormatElementT
>(std::forward
<Args
>(args
)...);
478 FormatElementT
*ptr
= mem
.get();
479 allocator
.push_back(std::move(mem
));
483 //===--------------------------------------------------------------------===//
486 /// Parse a single element of any kind.
487 FailureOr
<FormatElement
*> parseElement(Context ctx
);
489 FailureOr
<FormatElement
*> parseLiteral(Context ctx
);
491 FailureOr
<FormatElement
*> parseString(Context ctx
);
492 /// Parse a variable.
493 FailureOr
<FormatElement
*> parseVariable(Context ctx
);
494 /// Parse a directive.
495 FailureOr
<FormatElement
*> parseDirective(Context ctx
);
496 /// Parse an optional group.
497 FailureOr
<FormatElement
*> parseOptionalGroup(Context ctx
);
499 /// Parse a custom directive.
500 FailureOr
<FormatElement
*> parseCustomDirective(llvm::SMLoc loc
, Context ctx
);
502 /// Parse a format-specific variable kind.
503 virtual FailureOr
<FormatElement
*>
504 parseVariableImpl(llvm::SMLoc loc
, StringRef name
, Context ctx
) = 0;
505 /// Parse a format-specific directive kind.
506 virtual FailureOr
<FormatElement
*>
507 parseDirectiveImpl(llvm::SMLoc loc
, FormatToken::Kind kind
, Context ctx
) = 0;
509 //===--------------------------------------------------------------------===//
510 // Format Verification
512 /// Verify that the format is well-formed.
513 virtual LogicalResult
verify(llvm::SMLoc loc
,
514 ArrayRef
<FormatElement
*> elements
) = 0;
515 /// Verify the arguments to a custom directive.
516 virtual LogicalResult
517 verifyCustomDirectiveArguments(llvm::SMLoc loc
,
518 ArrayRef
<FormatElement
*> arguments
) = 0;
519 /// Verify the elements of an optional group.
520 virtual LogicalResult
521 verifyOptionalGroupElements(llvm::SMLoc loc
,
522 ArrayRef
<FormatElement
*> elements
,
523 FormatElement
*anchor
) = 0;
525 //===--------------------------------------------------------------------===//
528 /// Emit an error at the given location.
529 LogicalResult
emitError(llvm::SMLoc loc
, const Twine
&msg
) {
530 lexer
.emitError(loc
, msg
);
534 /// Emit an error and a note at the given notation.
535 LogicalResult
emitErrorAndNote(llvm::SMLoc loc
, const Twine
&msg
,
537 lexer
.emitErrorAndNote(loc
, msg
, note
);
541 /// Parse a single token of the expected kind.
542 FailureOr
<FormatToken
> parseToken(FormatToken::Kind kind
, const Twine
&msg
) {
543 if (!curToken
.is(kind
))
544 return emitError(curToken
.getLoc(), msg
);
545 FormatToken tok
= curToken
;
550 /// Advance the lexer to the next token.
551 void consumeToken() {
552 assert(!curToken
.is(FormatToken::eof
) && !curToken
.is(FormatToken::error
) &&
553 "shouldn't advance past EOF or errors");
554 curToken
= lexer
.lexToken();
557 /// Get the current token.
558 FormatToken
peekToken() { return curToken
; }
561 /// The format parser retains ownership of the format elements in a bump
562 /// pointer allocator.
563 // FIXME: FormatElement with `std::vector` need to be converted to use
565 // llvm::BumpPtrAllocator allocator;
566 std::vector
<std::unique_ptr
<FormatElement
>> allocator
;
567 /// The format lexer to use.
569 /// The current token in the lexer.
570 FormatToken curToken
;
573 //===----------------------------------------------------------------------===//
575 //===----------------------------------------------------------------------===//
577 /// Whether a space needs to be emitted before a literal. E.g., two keywords
578 /// back-to-back require a space separator, but a keyword followed by '<' does
579 /// not require a space.
580 bool shouldEmitSpaceBefore(StringRef value
, bool lastWasPunctuation
);
582 /// Returns true if the given string can be formatted as a keyword.
583 bool canFormatStringAsKeyword(StringRef value
,
584 function_ref
<void(Twine
)> emitError
= nullptr);
586 /// Returns true if the given string is valid format literal element.
587 /// If `emitError` is provided, it is invoked with the reason for the failure.
588 bool isValidLiteral(StringRef value
,
589 function_ref
<void(Twine
)> emitError
= nullptr);
591 /// Whether a failure in parsing the assembly format should be a fatal error.
592 extern llvm::cl::opt
<bool> formatErrorIsFatal
;
594 } // namespace tblgen
597 #endif // MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_