1 //===- FormatGen.cpp - Utilities for custom assembly formats ----*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 #include "llvm/ADT/StringSwitch.h"
11 #include "llvm/Support/SourceMgr.h"
12 #include "llvm/TableGen/Error.h"
15 using namespace mlir::tblgen
;
16 using llvm::SourceMgr
;
18 //===----------------------------------------------------------------------===//
20 //===----------------------------------------------------------------------===//
22 SMLoc
FormatToken::getLoc() const {
23 return SMLoc::getFromPointer(spelling
.data());
26 //===----------------------------------------------------------------------===//
28 //===----------------------------------------------------------------------===//
30 FormatLexer::FormatLexer(SourceMgr
&mgr
, SMLoc loc
)
32 curBuffer(mgr
.getMemoryBuffer(mgr
.getMainFileID())->getBuffer()),
33 curPtr(curBuffer
.begin()) {}
35 FormatToken
FormatLexer::emitError(SMLoc loc
, const Twine
&msg
) {
36 mgr
.PrintMessage(loc
, SourceMgr::DK_Error
, msg
);
37 llvm::SrcMgr
.PrintMessage(this->loc
, SourceMgr::DK_Note
,
38 "in custom assembly format for this operation");
39 return formToken(FormatToken::error
, loc
.getPointer());
42 FormatToken
FormatLexer::emitError(const char *loc
, const Twine
&msg
) {
43 return emitError(SMLoc::getFromPointer(loc
), msg
);
46 FormatToken
FormatLexer::emitErrorAndNote(SMLoc loc
, const Twine
&msg
,
48 mgr
.PrintMessage(loc
, SourceMgr::DK_Error
, msg
);
49 llvm::SrcMgr
.PrintMessage(this->loc
, SourceMgr::DK_Note
,
50 "in custom assembly format for this operation");
51 mgr
.PrintMessage(loc
, SourceMgr::DK_Note
, note
);
52 return formToken(FormatToken::error
, loc
.getPointer());
55 int FormatLexer::getNextChar() {
56 char curChar
= *curPtr
++;
59 return (unsigned char)curChar
;
61 // A nul character in the stream is either the end of the current buffer or
62 // a random nul in the file. Disambiguate that here.
63 if (curPtr
- 1 != curBuffer
.end())
66 // Otherwise, return end of file.
72 // Handle the newline character by ignoring it and incrementing the line
73 // count. However, be careful about 'dos style' files with \n\r in them.
74 // Only treat a \n\r or \r\n as a single line.
75 if ((*curPtr
== '\n' || (*curPtr
== '\r')) && *curPtr
!= curChar
)
81 FormatToken
FormatLexer::lexToken() {
82 const char *tokStart
= curPtr
;
84 // This always consumes at least one character.
85 int curChar
= getNextChar();
88 // Handle identifiers: [a-zA-Z_]
89 if (isalpha(curChar
) || curChar
== '_')
90 return lexIdentifier(tokStart
);
92 // Unknown character, emit an error.
93 return emitError(tokStart
, "unexpected character");
95 // Return EOF denoting the end of lexing.
96 return formToken(FormatToken::eof
, tokStart
);
100 return formToken(FormatToken::caret
, tokStart
);
102 return formToken(FormatToken::colon
, tokStart
);
104 return formToken(FormatToken::comma
, tokStart
);
106 return formToken(FormatToken::equal
, tokStart
);
108 return formToken(FormatToken::less
, tokStart
);
110 return formToken(FormatToken::greater
, tokStart
);
112 return formToken(FormatToken::question
, tokStart
);
114 return formToken(FormatToken::l_paren
, tokStart
);
116 return formToken(FormatToken::r_paren
, tokStart
);
118 return formToken(FormatToken::star
, tokStart
);
120 return formToken(FormatToken::pipe
, tokStart
);
122 // Ignore whitespace characters.
130 return lexLiteral(tokStart
);
132 return lexVariable(tokStart
);
134 return lexString(tokStart
);
138 FormatToken
FormatLexer::lexLiteral(const char *tokStart
) {
139 assert(curPtr
[-1] == '`');
141 // Lex a literal surrounded by ``.
142 while (const char curChar
= *curPtr
++) {
144 return formToken(FormatToken::literal
, tokStart
);
146 return emitError(curPtr
- 1, "unexpected end of file in literal");
149 FormatToken
FormatLexer::lexVariable(const char *tokStart
) {
150 if (!isalpha(curPtr
[0]) && curPtr
[0] != '_')
151 return emitError(curPtr
- 1, "expected variable name");
153 // Otherwise, consume the rest of the characters.
154 while (isalnum(*curPtr
) || *curPtr
== '_')
156 return formToken(FormatToken::variable
, tokStart
);
159 FormatToken
FormatLexer::lexString(const char *tokStart
) {
160 // Lex until another quote, respecting escapes.
162 while (const char curChar
= *curPtr
++) {
163 if (!escape
&& curChar
== '"')
164 return formToken(FormatToken::string
, tokStart
);
165 escape
= curChar
== '\\';
167 return emitError(curPtr
- 1, "unexpected end of file in string");
170 FormatToken
FormatLexer::lexIdentifier(const char *tokStart
) {
171 // Match the rest of the identifier regex: [0-9a-zA-Z_\-]*
172 while (isalnum(*curPtr
) || *curPtr
== '_' || *curPtr
== '-')
175 // Check to see if this identifier is a keyword.
176 StringRef
str(tokStart
, curPtr
- tokStart
);
178 StringSwitch
<FormatToken::Kind
>(str
)
179 .Case("attr-dict", FormatToken::kw_attr_dict
)
180 .Case("attr-dict-with-keyword", FormatToken::kw_attr_dict_w_keyword
)
181 .Case("prop-dict", FormatToken::kw_prop_dict
)
182 .Case("custom", FormatToken::kw_custom
)
183 .Case("functional-type", FormatToken::kw_functional_type
)
184 .Case("oilist", FormatToken::kw_oilist
)
185 .Case("operands", FormatToken::kw_operands
)
186 .Case("params", FormatToken::kw_params
)
187 .Case("ref", FormatToken::kw_ref
)
188 .Case("regions", FormatToken::kw_regions
)
189 .Case("results", FormatToken::kw_results
)
190 .Case("struct", FormatToken::kw_struct
)
191 .Case("successors", FormatToken::kw_successors
)
192 .Case("type", FormatToken::kw_type
)
193 .Case("qualified", FormatToken::kw_qualified
)
194 .Default(FormatToken::identifier
);
195 return FormatToken(kind
, str
);
198 //===----------------------------------------------------------------------===//
200 //===----------------------------------------------------------------------===//
202 FormatElement::~FormatElement() = default;
204 FormatParser::~FormatParser() = default;
206 FailureOr
<std::vector
<FormatElement
*>> FormatParser::parse() {
207 SMLoc loc
= curToken
.getLoc();
209 // Parse each of the format elements into the main format.
210 std::vector
<FormatElement
*> elements
;
211 while (curToken
.getKind() != FormatToken::eof
) {
212 FailureOr
<FormatElement
*> element
= parseElement(TopLevelContext
);
215 elements
.push_back(*element
);
218 // Verify the format.
219 if (failed(verify(loc
, elements
)))
224 //===----------------------------------------------------------------------===//
227 FailureOr
<FormatElement
*> FormatParser::parseElement(Context ctx
) {
228 if (curToken
.is(FormatToken::literal
))
229 return parseLiteral(ctx
);
230 if (curToken
.is(FormatToken::string
))
231 return parseString(ctx
);
232 if (curToken
.is(FormatToken::variable
))
233 return parseVariable(ctx
);
234 if (curToken
.isKeyword())
235 return parseDirective(ctx
);
236 if (curToken
.is(FormatToken::l_paren
))
237 return parseOptionalGroup(ctx
);
238 return emitError(curToken
.getLoc(),
239 "expected literal, variable, directive, or optional group");
242 FailureOr
<FormatElement
*> FormatParser::parseLiteral(Context ctx
) {
243 FormatToken tok
= curToken
;
244 SMLoc loc
= tok
.getLoc();
247 if (ctx
!= TopLevelContext
) {
250 "literals may only be used in the top-level section of the format");
252 // Get the spelling without the surrounding backticks.
253 StringRef value
= tok
.getSpelling();
254 // Prevents things like `$arg0` or empty literals (when a literal is expected
255 // but not found) from getting segmentation faults.
256 if (value
.size() < 2 || value
[0] != '`' || value
[value
.size() - 1] != '`')
257 return emitError(tok
.getLoc(), "expected literal, but got '" + value
+ "'");
258 value
= value
.drop_front().drop_back();
260 // The parsed literal is a space element (`` or ` `) or a newline.
261 if (value
.empty() || value
== " " || value
== "\\n")
262 return create
<WhitespaceElement
>(value
);
264 // Check that the parsed literal is valid.
265 if (!isValidLiteral(value
, [&](Twine msg
) {
266 (void)emitError(loc
, "expected valid literal but got '" + value
+
270 return create
<LiteralElement
>(value
);
273 FailureOr
<FormatElement
*> FormatParser::parseString(Context ctx
) {
274 FormatToken tok
= curToken
;
275 SMLoc loc
= tok
.getLoc();
278 if (ctx
!= CustomDirectiveContext
) {
280 loc
, "strings may only be used as 'custom' directive arguments");
282 // Escape the string.
284 StringRef contents
= tok
.getSpelling().drop_front().drop_back();
285 value
.reserve(contents
.size());
287 for (char c
: contents
) {
292 return create
<StringElement
>(std::move(value
));
295 FailureOr
<FormatElement
*> FormatParser::parseVariable(Context ctx
) {
296 FormatToken tok
= curToken
;
297 SMLoc loc
= tok
.getLoc();
300 // Get the name of the variable without the leading `$`.
301 StringRef name
= tok
.getSpelling().drop_front();
302 return parseVariableImpl(loc
, name
, ctx
);
305 FailureOr
<FormatElement
*> FormatParser::parseDirective(Context ctx
) {
306 FormatToken tok
= curToken
;
307 SMLoc loc
= tok
.getLoc();
310 if (tok
.is(FormatToken::kw_custom
))
311 return parseCustomDirective(loc
, ctx
);
312 if (tok
.is(FormatToken::kw_ref
))
313 return parseRefDirective(loc
, ctx
);
314 if (tok
.is(FormatToken::kw_qualified
))
315 return parseQualifiedDirective(loc
, ctx
);
316 return parseDirectiveImpl(loc
, tok
.getKind(), ctx
);
319 FailureOr
<FormatElement
*> FormatParser::parseOptionalGroup(Context ctx
) {
320 SMLoc loc
= curToken
.getLoc();
322 if (ctx
!= TopLevelContext
) {
323 return emitError(loc
,
324 "optional groups can only be used as top-level elements");
327 // Parse the child elements for this optional group.
328 std::vector
<FormatElement
*> thenElements
, elseElements
;
329 FormatElement
*anchor
= nullptr;
330 auto parseChildElements
=
331 [this, &anchor
](std::vector
<FormatElement
*> &elements
) -> LogicalResult
{
333 FailureOr
<FormatElement
*> element
= parseElement(TopLevelContext
);
336 // Check for an anchor.
337 if (curToken
.is(FormatToken::caret
)) {
339 return emitError(curToken
.getLoc(),
340 "only one element can be marked as the anchor of an "
346 elements
.push_back(*element
);
347 } while (!curToken
.is(FormatToken::r_paren
));
351 // Parse the 'then' elements. If the anchor was found in this group, then the
352 // optional is not inverted.
353 if (failed(parseChildElements(thenElements
)))
356 bool inverted
= !anchor
;
358 // Parse the `else` elements of this optional group.
359 if (curToken
.is(FormatToken::colon
)) {
361 if (failed(parseToken(
362 FormatToken::l_paren
,
363 "expected '(' to start else branch of optional group")) ||
364 failed(parseChildElements(elseElements
)))
368 if (failed(parseToken(FormatToken::question
,
369 "expected '?' after optional group")))
372 // The optional group is required to have an anchor.
374 return emitError(loc
, "optional group has no anchor element");
376 // Verify the child elements.
377 if (failed(verifyOptionalGroupElements(loc
, thenElements
, anchor
)) ||
378 failed(verifyOptionalGroupElements(loc
, elseElements
, nullptr)))
381 // Get the first parsable element. It must be an element that can be
382 // optionally-parsed.
383 auto isWhitespace
= [](FormatElement
*element
) {
384 return isa
<WhitespaceElement
>(element
);
386 auto thenParseBegin
= llvm::find_if_not(thenElements
, isWhitespace
);
387 auto elseParseBegin
= llvm::find_if_not(elseElements
, isWhitespace
);
388 unsigned thenParseStart
= std::distance(thenElements
.begin(), thenParseBegin
);
389 unsigned elseParseStart
= std::distance(elseElements
.begin(), elseParseBegin
);
391 if (!isa
<LiteralElement
, VariableElement
, CustomDirective
>(*thenParseBegin
)) {
392 return emitError(loc
, "first parsable element of an optional group must be "
393 "a literal, variable, or custom directive");
395 return create
<OptionalElement
>(std::move(thenElements
),
396 std::move(elseElements
), thenParseStart
,
397 elseParseStart
, anchor
, inverted
);
400 FailureOr
<FormatElement
*> FormatParser::parseCustomDirective(SMLoc loc
,
402 if (ctx
!= TopLevelContext
)
403 return emitError(loc
, "'custom' is only valid as a top-level directive");
405 FailureOr
<FormatToken
> nameTok
;
406 if (failed(parseToken(FormatToken::less
,
407 "expected '<' before custom directive name")) ||
409 parseToken(FormatToken::identifier
,
410 "expected custom directive name identifier")) ||
411 failed(parseToken(FormatToken::greater
,
412 "expected '>' after custom directive name")) ||
413 failed(parseToken(FormatToken::l_paren
,
414 "expected '(' before custom directive parameters")))
417 // Parse the arguments.
418 std::vector
<FormatElement
*> arguments
;
420 FailureOr
<FormatElement
*> argument
= parseElement(CustomDirectiveContext
);
421 if (failed(argument
))
423 arguments
.push_back(*argument
);
424 if (!curToken
.is(FormatToken::comma
))
429 if (failed(parseToken(FormatToken::r_paren
,
430 "expected ')' after custom directive parameters")))
433 if (failed(verifyCustomDirectiveArguments(loc
, arguments
)))
435 return create
<CustomDirective
>(nameTok
->getSpelling(), std::move(arguments
));
438 FailureOr
<FormatElement
*> FormatParser::parseRefDirective(SMLoc loc
,
440 if (context
!= CustomDirectiveContext
)
441 return emitError(loc
, "'ref' is only valid within a `custom` directive");
443 FailureOr
<FormatElement
*> arg
;
444 if (failed(parseToken(FormatToken::l_paren
,
445 "expected '(' before argument list")) ||
446 failed(arg
= parseElement(RefDirectiveContext
)) ||
448 parseToken(FormatToken::r_paren
, "expected ')' after argument list")))
451 return create
<RefDirective
>(*arg
);
454 FailureOr
<FormatElement
*> FormatParser::parseQualifiedDirective(SMLoc loc
,
456 if (failed(parseToken(FormatToken::l_paren
,
457 "expected '(' before argument list")))
459 FailureOr
<FormatElement
*> var
= parseElement(ctx
);
462 if (failed(markQualified(loc
, *var
)))
465 parseToken(FormatToken::r_paren
, "expected ')' after argument list")))
470 //===----------------------------------------------------------------------===//
472 //===----------------------------------------------------------------------===//
474 bool mlir::tblgen::shouldEmitSpaceBefore(StringRef value
,
475 bool lastWasPunctuation
) {
476 if (value
.size() != 1 && value
!= "->")
478 if (lastWasPunctuation
)
479 return !StringRef(">)}],").contains(value
.front());
480 return !StringRef("<>(){}[],").contains(value
.front());
483 bool mlir::tblgen::canFormatStringAsKeyword(
484 StringRef value
, function_ref
<void(Twine
)> emitError
) {
487 emitError("keywords cannot be empty");
490 if (!isalpha(value
.front()) && value
.front() != '_') {
492 emitError("valid keyword starts with a letter or '_'");
495 if (!llvm::all_of(value
.drop_front(), [](char c
) {
496 return isalnum(c
) || c
== '_' || c
== '$' || c
== '.';
500 "keywords should contain only alphanum, '_', '$', or '.' characters");
506 bool mlir::tblgen::isValidLiteral(StringRef value
,
507 function_ref
<void(Twine
)> emitError
) {
510 emitError("literal can't be empty");
513 char front
= value
.front();
515 // If there is only one character, this must either be punctuation or a
516 // single character bare identifier.
517 if (value
.size() == 1) {
518 StringRef bare
= "_:,=<>()[]{}?+*";
519 if (isalpha(front
) || bare
.contains(front
))
522 emitError("single character literal must be a letter or one of '" + bare
+
526 // Check the punctuation that are larger than a single character.
532 // Otherwise, this must be an identifier.
533 return canFormatStringAsKeyword(value
, emitError
);
536 //===----------------------------------------------------------------------===//
537 // Commandline Options
538 //===----------------------------------------------------------------------===//
540 llvm::cl::opt
<bool> mlir::tblgen::formatErrorIsFatal(
541 "asmformat-error-is-fatal",
542 llvm::cl::desc("Emit a fatal error if format parsing fails"),
543 llvm::cl::init(true));