[clang-tidy][NFC]remove deps of clang in clang tidy test (#116588)
[llvm-project.git] / mlir / tools / mlir-tblgen / FormatGen.h
blob1dc2cb3eaa88a6e5e2d9c21438068bd6d43d0830
1 //===- FormatGen.h - Utilities for custom assembly formats ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains common classes for building custom assembly format parsers
10 // and generators.
12 //===----------------------------------------------------------------------===//
14 #ifndef MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_
15 #define MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_
17 #include "mlir/Support/LLVM.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/StringSet.h"
20 #include "llvm/Support/Allocator.h"
21 #include "llvm/Support/CommandLine.h"
22 #include "llvm/Support/SMLoc.h"
23 #include <vector>
25 namespace llvm {
26 class SourceMgr;
27 } // namespace llvm
29 namespace mlir {
30 namespace tblgen {
32 //===----------------------------------------------------------------------===//
33 // FormatToken
34 //===----------------------------------------------------------------------===//
36 /// This class represents a specific token in the input format.
37 class FormatToken {
38 public:
39 /// Basic token kinds.
40 enum Kind {
41 // Markers.
42 eof,
43 error,
45 // Tokens with no info.
46 l_paren,
47 r_paren,
48 caret,
49 colon,
50 comma,
51 equal,
52 less,
53 greater,
54 question,
55 star,
56 pipe,
58 // Keywords.
59 keyword_start,
60 kw_attr_dict,
61 kw_attr_dict_w_keyword,
62 kw_prop_dict,
63 kw_custom,
64 kw_functional_type,
65 kw_oilist,
66 kw_operands,
67 kw_params,
68 kw_qualified,
69 kw_ref,
70 kw_regions,
71 kw_results,
72 kw_struct,
73 kw_successors,
74 kw_type,
75 keyword_end,
77 // String valued tokens.
78 identifier,
79 literal,
80 variable,
81 string,
84 FormatToken(Kind kind, StringRef spelling) : kind(kind), spelling(spelling) {}
86 /// Return the bytes that make up this token.
87 StringRef getSpelling() const { return spelling; }
89 /// Return the kind of this token.
90 Kind getKind() const { return kind; }
92 /// Return a location for this token.
93 SMLoc getLoc() const;
95 /// Returns true if the token is of the given kind.
96 bool is(Kind kind) { return getKind() == kind; }
98 /// Return if this token is a keyword.
99 bool isKeyword() const {
100 return getKind() > Kind::keyword_start && getKind() < Kind::keyword_end;
103 private:
104 /// Discriminator that indicates the kind of token this is.
105 Kind kind;
107 /// A reference to the entire token contents; this is always a pointer into
108 /// a memory buffer owned by the source manager.
109 StringRef spelling;
112 //===----------------------------------------------------------------------===//
113 // FormatLexer
114 //===----------------------------------------------------------------------===//
116 /// This class implements a simple lexer for operation assembly format strings.
117 class FormatLexer {
118 public:
119 FormatLexer(llvm::SourceMgr &mgr, SMLoc loc);
121 /// Lex the next token and return it.
122 FormatToken lexToken();
124 /// Emit an error to the lexer with the given location and message.
125 FormatToken emitError(SMLoc loc, const Twine &msg);
126 FormatToken emitError(const char *loc, const Twine &msg);
128 FormatToken emitErrorAndNote(SMLoc loc, const Twine &msg, const Twine &note);
130 private:
131 /// Return the next character in the stream.
132 int getNextChar();
134 /// Lex an identifier, literal, variable, or string.
135 FormatToken lexIdentifier(const char *tokStart);
136 FormatToken lexLiteral(const char *tokStart);
137 FormatToken lexVariable(const char *tokStart);
138 FormatToken lexString(const char *tokStart);
140 /// Create a token with the current pointer and a start pointer.
141 FormatToken formToken(FormatToken::Kind kind, const char *tokStart) {
142 return FormatToken(kind, StringRef(tokStart, curPtr - tokStart));
145 /// The source manager containing the format string.
146 llvm::SourceMgr &mgr;
147 /// Location of the format string.
148 SMLoc loc;
149 /// Buffer containing the format string.
150 StringRef curBuffer;
151 /// Current pointer in the buffer.
152 const char *curPtr;
155 //===----------------------------------------------------------------------===//
156 // FormatElement
157 //===----------------------------------------------------------------------===//
159 /// This class represents a single format element.
161 /// If you squint and take a close look, you can see the outline of a `Format`
162 /// dialect.
163 class FormatElement {
164 public:
165 virtual ~FormatElement();
167 // The top-level kinds of format elements.
168 enum Kind { Literal, String, Variable, Whitespace, Directive, Optional };
170 /// Support LLVM-style RTTI.
171 static bool classof(const FormatElement *el) { return true; }
173 /// Get the element kind.
174 Kind getKind() const { return kind; }
176 protected:
177 /// Create a format element with the given kind.
178 FormatElement(Kind kind) : kind(kind) {}
180 private:
181 /// The kind of the element.
182 Kind kind;
185 /// The base class for all format elements. This class implements common methods
186 /// for LLVM-style RTTI.
187 template <FormatElement::Kind ElementKind>
188 class FormatElementBase : public FormatElement {
189 public:
190 /// Support LLVM-style RTTI.
191 static bool classof(const FormatElement *el) {
192 return ElementKind == el->getKind();
195 protected:
196 /// Create a format element with the given kind.
197 FormatElementBase() : FormatElement(ElementKind) {}
200 /// This class represents a literal element. A literal is either one of the
201 /// supported punctuation characters (e.g. `(` or `,`) or a string literal (e.g.
202 /// `literal`).
203 class LiteralElement : public FormatElementBase<FormatElement::Literal> {
204 public:
205 /// Create a literal element with the given spelling.
206 explicit LiteralElement(StringRef spelling) : spelling(spelling) {}
208 /// Get the spelling of the literal.
209 StringRef getSpelling() const { return spelling; }
211 private:
212 /// The spelling of the variable, i.e. the string contained within the
213 /// backticks.
214 StringRef spelling;
217 /// This class represents a raw string that can contain arbitrary C++ code.
218 class StringElement : public FormatElementBase<FormatElement::String> {
219 public:
220 /// Create a string element with the given contents.
221 explicit StringElement(std::string value) : value(std::move(value)) {}
223 /// Get the value of the string element.
224 StringRef getValue() const { return value; }
226 private:
227 /// The contents of the string.
228 std::string value;
231 /// This class represents a variable element. A variable refers to some part of
232 /// the object being parsed, e.g. an attribute or operand on an operation or a
233 /// parameter on an attribute.
234 class VariableElement : public FormatElementBase<FormatElement::Variable> {
235 public:
236 /// These are the kinds of variables.
237 enum Kind {
238 Attribute,
239 Operand,
240 Region,
241 Result,
242 Successor,
243 Parameter,
244 Property
247 /// Get the kind of variable.
248 Kind getKind() const { return kind; }
250 protected:
251 /// Create a variable with a kind.
252 VariableElement(Kind kind) : kind(kind) {}
254 private:
255 /// The kind of variable.
256 Kind kind;
259 /// Base class for variable elements. This class implements common methods for
260 /// LLVM-style RTTI.
261 template <VariableElement::Kind VariableKind>
262 class VariableElementBase : public VariableElement {
263 public:
264 /// An element is of this class if it is a variable and has the same variable
265 /// type.
266 static bool classof(const FormatElement *el) {
267 if (auto *varEl = dyn_cast<VariableElement>(el))
268 return VariableKind == varEl->getKind();
269 return false;
272 protected:
273 /// Create a variable element with the given variable kind.
274 VariableElementBase() : VariableElement(VariableKind) {}
277 /// This class represents a whitespace element, e.g. a newline or space. It is a
278 /// literal that is printed but never parsed. When the value is empty, i.e. ``,
279 /// a space is elided where one would have been printed automatically.
280 class WhitespaceElement : public FormatElementBase<FormatElement::Whitespace> {
281 public:
282 /// Create a whitespace element.
283 explicit WhitespaceElement(StringRef value) : value(value) {}
285 /// Get the whitespace value.
286 StringRef getValue() const { return value; }
288 private:
289 /// The value of the whitespace element. Can be empty.
290 StringRef value;
293 class DirectiveElement : public FormatElementBase<FormatElement::Directive> {
294 public:
295 /// These are the kinds of directives.
296 enum Kind {
297 AttrDict,
298 PropDict,
299 Custom,
300 FunctionalType,
301 OIList,
302 Operands,
303 Ref,
304 Regions,
305 Results,
306 Successors,
307 Type,
308 Params,
309 Struct
312 /// Get the directive kind.
313 Kind getKind() const { return kind; }
315 protected:
316 /// Create a directive element with a kind.
317 DirectiveElement(Kind kind) : kind(kind) {}
319 private:
320 /// The directive kind.
321 Kind kind;
324 /// Base class for directive elements. This class implements common methods for
325 /// LLVM-style RTTI.
326 template <DirectiveElement::Kind DirectiveKind>
327 class DirectiveElementBase : public DirectiveElement {
328 public:
329 /// Create a directive element with the specified kind.
330 DirectiveElementBase() : DirectiveElement(DirectiveKind) {}
332 /// A format element is of this class if it is a directive element and has the
333 /// same kind.
334 static bool classof(const FormatElement *el) {
335 if (auto *directiveEl = dyn_cast<DirectiveElement>(el))
336 return DirectiveKind == directiveEl->getKind();
337 return false;
341 /// This class represents a custom format directive that is implemented by the
342 /// user in C++. The directive accepts a list of arguments that is passed to the
343 /// C++ function.
344 class CustomDirective : public DirectiveElementBase<DirectiveElement::Custom> {
345 public:
346 /// Create a custom directive with a name and list of arguments.
347 CustomDirective(StringRef name, std::vector<FormatElement *> &&arguments)
348 : name(name), arguments(std::move(arguments)) {}
350 /// Get the custom directive name.
351 StringRef getName() const { return name; }
353 /// Get the arguments to the custom directive.
354 ArrayRef<FormatElement *> getArguments() const { return arguments; }
356 private:
357 /// The name of the custom directive. The name is used to call two C++
358 /// methods: `parse{name}` and `print{name}` with the given arguments.
359 StringRef name;
360 /// The arguments with which to call the custom functions. These are either
361 /// variables (for which the functions are responsible for populating) or
362 /// references to variables.
363 std::vector<FormatElement *> arguments;
366 /// This class represents a reference directive. This directive can be used to
367 /// reference but not bind a previously bound variable or format object. Its
368 /// current only use is to pass variables as arguments to the custom directive.
369 class RefDirective : public DirectiveElementBase<DirectiveElement::Ref> {
370 public:
371 /// Create a reference directive with the single referenced child.
372 RefDirective(FormatElement *arg) : arg(arg) {}
374 /// Get the reference argument.
375 FormatElement *getArg() const { return arg; }
377 private:
378 /// The referenced argument.
379 FormatElement *arg;
382 /// This class represents a group of elements that are optionally emitted based
383 /// on an optional variable "anchor" and a group of elements that are emitted
384 /// when the anchor element is not present.
385 class OptionalElement : public FormatElementBase<FormatElement::Optional> {
386 public:
387 /// Create an optional group with the given child elements.
388 OptionalElement(std::vector<FormatElement *> &&thenElements,
389 std::vector<FormatElement *> &&elseElements,
390 unsigned thenParseStart, unsigned elseParseStart,
391 FormatElement *anchor, bool inverted)
392 : thenElements(std::move(thenElements)),
393 elseElements(std::move(elseElements)), thenParseStart(thenParseStart),
394 elseParseStart(elseParseStart), anchor(anchor), inverted(inverted) {}
396 /// Return the `then` elements of the optional group. Drops the first
397 /// `thenParseStart` whitespace elements if `parseable` is true.
398 ArrayRef<FormatElement *> getThenElements(bool parseable = false) const {
399 return llvm::ArrayRef(thenElements)
400 .drop_front(parseable ? thenParseStart : 0);
403 /// Return the `else` elements of the optional group. Drops the first
404 /// `elseParseStart` whitespace elements if `parseable` is true.
405 ArrayRef<FormatElement *> getElseElements(bool parseable = false) const {
406 return llvm::ArrayRef(elseElements)
407 .drop_front(parseable ? elseParseStart : 0);
410 /// Return the anchor of the optional group.
411 FormatElement *getAnchor() const { return anchor; }
413 /// Return true if the optional group is inverted.
414 bool isInverted() const { return inverted; }
416 private:
417 /// The child elements emitted when the anchor is present.
418 std::vector<FormatElement *> thenElements;
419 /// The child elements emitted when the anchor is not present.
420 std::vector<FormatElement *> elseElements;
421 /// The index of the first element that is parsed in `thenElements`. That is,
422 /// the first non-whitespace element.
423 unsigned thenParseStart;
424 /// The index of the first element that is parsed in `elseElements`. That is,
425 /// the first non-whitespace element.
426 unsigned elseParseStart;
427 /// The anchor element of the optional group.
428 FormatElement *anchor;
429 /// Whether the optional group condition is inverted and the anchor element is
430 /// in the else group.
431 bool inverted;
434 //===----------------------------------------------------------------------===//
435 // FormatParserBase
436 //===----------------------------------------------------------------------===//
438 /// Base class for a parser that implements an assembly format. This class
439 /// defines a common assembly format syntax and the creation of format elements.
440 /// Subclasses will need to implement parsing for the format elements they
441 /// support.
442 class FormatParser {
443 public:
444 /// Vtable anchor.
445 virtual ~FormatParser();
447 /// Parse the assembly format.
448 FailureOr<std::vector<FormatElement *>> parse();
450 protected:
451 /// The current context of the parser when parsing an element.
452 enum Context {
453 /// The element is being parsed in a "top-level" context, i.e. at the top of
454 /// the format or in an optional group.
455 TopLevelContext,
456 /// The element is being parsed as a custom directive child.
457 CustomDirectiveContext,
458 /// The element is being parsed as a type directive child.
459 TypeDirectiveContext,
460 /// The element is being parsed as a reference directive child.
461 RefDirectiveContext,
462 /// The element is being parsed as a struct directive child.
463 StructDirectiveContext
466 /// Create a format parser with the given source manager and a location.
467 explicit FormatParser(llvm::SourceMgr &mgr, llvm::SMLoc loc)
468 : lexer(mgr, loc), curToken(lexer.lexToken()) {}
470 /// Allocate and construct a format element.
471 template <typename FormatElementT, typename... Args>
472 FormatElementT *create(Args &&...args) {
473 // FormatElementT *ptr = allocator.Allocate<FormatElementT>();
474 // ::new (ptr) FormatElementT(std::forward<Args>(args)...);
475 // return ptr;
476 auto mem = std::make_unique<FormatElementT>(std::forward<Args>(args)...);
477 FormatElementT *ptr = mem.get();
478 allocator.push_back(std::move(mem));
479 return ptr;
482 //===--------------------------------------------------------------------===//
483 // Element Parsing
485 /// Parse a single element of any kind.
486 FailureOr<FormatElement *> parseElement(Context ctx);
487 /// Parse a literal.
488 FailureOr<FormatElement *> parseLiteral(Context ctx);
489 /// Parse a string.
490 FailureOr<FormatElement *> parseString(Context ctx);
491 /// Parse a variable.
492 FailureOr<FormatElement *> parseVariable(Context ctx);
493 /// Parse a directive.
494 FailureOr<FormatElement *> parseDirective(Context ctx);
495 /// Parse an optional group.
496 FailureOr<FormatElement *> parseOptionalGroup(Context ctx);
497 /// Parse a custom directive.
498 FailureOr<FormatElement *> parseCustomDirective(llvm::SMLoc loc, Context ctx);
499 /// Parse a ref directive.
500 FailureOr<FormatElement *> parseRefDirective(SMLoc loc, Context context);
501 /// Parse a qualified directive.
502 FailureOr<FormatElement *> parseQualifiedDirective(SMLoc loc, Context ctx);
504 /// Parse a format-specific variable kind.
505 virtual FailureOr<FormatElement *>
506 parseVariableImpl(llvm::SMLoc loc, StringRef name, Context ctx) = 0;
507 /// Parse a format-specific directive kind.
508 virtual FailureOr<FormatElement *>
509 parseDirectiveImpl(llvm::SMLoc loc, FormatToken::Kind kind, Context ctx) = 0;
511 //===--------------------------------------------------------------------===//
512 // Format Verification
514 /// Verify that the format is well-formed.
515 virtual LogicalResult verify(llvm::SMLoc loc,
516 ArrayRef<FormatElement *> elements) = 0;
517 /// Verify the arguments to a custom directive.
518 virtual LogicalResult
519 verifyCustomDirectiveArguments(llvm::SMLoc loc,
520 ArrayRef<FormatElement *> arguments) = 0;
521 /// Verify the elements of an optional group.
522 virtual LogicalResult
523 verifyOptionalGroupElements(llvm::SMLoc loc,
524 ArrayRef<FormatElement *> elements,
525 FormatElement *anchor) = 0;
527 /// Mark 'element' as qualified. If 'element' cannot be qualified an error
528 /// should be emitted and failure returned.
529 virtual LogicalResult markQualified(llvm::SMLoc loc,
530 FormatElement *element) = 0;
532 //===--------------------------------------------------------------------===//
533 // Lexer Utilities
535 /// Emit an error at the given location.
536 LogicalResult emitError(llvm::SMLoc loc, const Twine &msg) {
537 lexer.emitError(loc, msg);
538 return failure();
541 /// Emit an error and a note at the given notation.
542 LogicalResult emitErrorAndNote(llvm::SMLoc loc, const Twine &msg,
543 const Twine &note) {
544 lexer.emitErrorAndNote(loc, msg, note);
545 return failure();
548 /// Parse a single token of the expected kind.
549 FailureOr<FormatToken> parseToken(FormatToken::Kind kind, const Twine &msg) {
550 if (!curToken.is(kind))
551 return emitError(curToken.getLoc(), msg);
552 FormatToken tok = curToken;
553 consumeToken();
554 return tok;
557 /// Advance the lexer to the next token.
558 void consumeToken() {
559 assert(!curToken.is(FormatToken::eof) && !curToken.is(FormatToken::error) &&
560 "shouldn't advance past EOF or errors");
561 curToken = lexer.lexToken();
564 /// Get the current token.
565 FormatToken peekToken() { return curToken; }
567 private:
568 /// The format parser retains ownership of the format elements in a bump
569 /// pointer allocator.
570 // FIXME: FormatElement with `std::vector` need to be converted to use
571 // trailing objects.
572 // llvm::BumpPtrAllocator allocator;
573 std::vector<std::unique_ptr<FormatElement>> allocator;
574 /// The format lexer to use.
575 FormatLexer lexer;
576 /// The current token in the lexer.
577 FormatToken curToken;
580 //===----------------------------------------------------------------------===//
581 // Utility Functions
582 //===----------------------------------------------------------------------===//
584 /// Whether a space needs to be emitted before a literal. E.g., two keywords
585 /// back-to-back require a space separator, but a keyword followed by '<' does
586 /// not require a space.
587 bool shouldEmitSpaceBefore(StringRef value, bool lastWasPunctuation);
589 /// Returns true if the given string can be formatted as a keyword.
590 bool canFormatStringAsKeyword(StringRef value,
591 function_ref<void(Twine)> emitError = nullptr);
593 /// Returns true if the given string is valid format literal element.
594 /// If `emitError` is provided, it is invoked with the reason for the failure.
595 bool isValidLiteral(StringRef value,
596 function_ref<void(Twine)> emitError = nullptr);
598 /// Whether a failure in parsing the assembly format should be a fatal error.
599 extern llvm::cl::opt<bool> formatErrorIsFatal;
601 } // namespace tblgen
602 } // namespace mlir
604 #endif // MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_