1 //===--- Main.cpp - Compile BNF grammar -----------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This is a tool to compile a BNF grammar, it is used by the build system to
10 // generate a necessary data bits to statically construct core pieces (Grammar,
11 // LRTable etc) of the LR parser.
13 //===----------------------------------------------------------------------===//
15 #include "clang-pseudo/grammar/Grammar.h"
16 #include "llvm/ADT/StringExtras.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/FileSystem.h"
19 #include "llvm/Support/FormatVariadic.h"
20 #include "llvm/Support/MemoryBuffer.h"
21 #include "llvm/Support/ToolOutputFile.h"
27 using llvm::cl::Required
;
28 using llvm::cl::value_desc
;
29 using llvm::cl::values
;
37 opt
<std::string
> Grammar("grammar", desc("Parse a BNF grammar file."),
40 Emit(desc("which information to emit:"),
41 values(clEnumValN(EmitSymbolList
, "emit-symbol-list",
42 "Print nonterminal symbols (default)"),
43 clEnumValN(EmitGrammarContent
, "emit-grammar-content",
44 "Print the BNF grammar content as a string")));
46 opt
<std::string
> OutputFilename("o", init("-"), desc("Output"),
49 std::string
readOrDie(llvm::StringRef Path
) {
50 llvm::ErrorOr
<std::unique_ptr
<llvm::MemoryBuffer
>> Text
=
51 llvm::MemoryBuffer::getFile(Path
);
52 if (std::error_code EC
= Text
.getError()) {
53 llvm::errs() << "Error: can't read grammar file '" << Path
54 << "': " << EC
.message() << "\n";
57 return Text
.get()->getBuffer().str();
65 // Mangles a symbol name into a valid identifier.
67 // These follow names in the grammar fairly closely:
68 // nonterminal: `ptr-declarator` becomes `ptr_declarator`;
69 // punctuator: `,` becomes `COMMA`;
70 // keyword: `INT` becomes `INT`;
71 // terminal: `IDENTIFIER` becomes `IDENTIFIER`;
72 std::string
mangleSymbol(SymbolID SID
, const Grammar
&G
) {
73 static auto &TokNames
= *new std::vector
<std::string
>{
74 #define TOK(X) llvm::StringRef(#X).upper(),
75 #define KEYWORD(Keyword, Condition) llvm::StringRef(#Keyword).upper(),
76 #include "clang/Basic/TokenKinds.def"
79 return TokNames
[symbolToToken(SID
)];
80 std::string Name
= G
.symbolName(SID
).str();
81 // translation-unit -> translation_unit
82 std::replace(Name
.begin(), Name
.end(), '-', '_');
86 // Mangles the RHS of a rule definition into a valid identifier.
88 // These are unique only for a fixed LHS.
89 // e.g. for the grammar rule `ptr-declarator := ptr-operator ptr-declarator`,
90 // it is `ptr_operator__ptr_declarator`.
91 std::string
mangleRule(RuleID RID
, const Grammar
&G
) {
92 const auto &R
= G
.lookupRule(RID
);
93 std::string MangleName
= mangleSymbol(R
.seq().front(), G
);
94 for (SymbolID S
: R
.seq().drop_front()) {
95 MangleName
.append("__");
96 MangleName
.append(mangleSymbol(S
, G
));
102 } // namespace pseudo
105 int main(int argc
, char *argv
[]) {
106 llvm::cl::ParseCommandLineOptions(argc
, argv
, "");
108 std::string GrammarText
= readOrDie(Grammar
);
109 std::vector
<std::string
> Diags
;
110 auto G
= clang::pseudo::Grammar::parseBNF(GrammarText
, Diags
);
112 if (!Diags
.empty()) {
113 llvm::errs() << llvm::join(Diags
, "\n");
118 llvm::ToolOutputFile Out
{OutputFilename
, EC
, llvm::sys::fs::OF_None
};
120 llvm::errs() << EC
.message() << '\n';
128 #define NONTERMINAL(NAME, ID)
131 #define RULE(LHS, RHS, ID)
134 #define EXTENSION(NAME, ID)
137 for (clang::pseudo::SymbolID ID
= 0; ID
< G
.table().Nonterminals
.size();
139 Out
.os() << llvm::formatv("NONTERMINAL({0}, {1})\n",
140 clang::pseudo::mangleSymbol(ID
, G
), ID
);
141 for (const clang::pseudo::Rule
&R
: G
.rulesFor(ID
)) {
142 clang::pseudo::RuleID RID
= &R
- G
.table().Rules
.data();
143 Out
.os() << llvm::formatv("RULE({0}, {1}, {2})\n",
144 clang::pseudo::mangleSymbol(R
.Target
, G
),
145 clang::pseudo::mangleRule(RID
, G
), RID
);
148 for (clang::pseudo::ExtensionID EID
= 1 /*skip the sentinel 0 value*/;
149 EID
< G
.table().AttributeValues
.size(); ++EID
) {
150 llvm::StringRef Name
= G
.table().AttributeValues
[EID
];
151 assert(!Name
.empty());
152 Out
.os() << llvm::formatv("EXTENSION({0}, {1})\n", Name
, EID
);
160 case EmitGrammarContent
:
161 for (llvm::StringRef Line
: llvm::split(GrammarText
, '\n')) {
163 Out
.os().write_escaped((Line
+ "\n").str());