Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / tools / include-mapping / gen_std.py
blobfcd3bd0d843ea165975bdf4cb023af0e85249595
1 #!/usr/bin/env python3
2 # ===- gen_std.py - ------------------------------------------*- python -*--===#
4 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 # See https://llvm.org/LICENSE.txt for license information.
6 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 # ===------------------------------------------------------------------------===#
10 """gen_std.py is a tool to generate a lookup table (from qualified names to
11 include headers) for C/C++ Standard Library symbols by parsing archived HTML
12 files from cppreference.
14 The generated files are located in clang/include/Tooling/Inclusions.
16 Caveats and FIXMEs:
17 - only symbols directly in "std" namespace are added, we should also add std's
18 subnamespace symbols (e.g. chrono).
19 - symbols with multiple variants or defined in multiple headers aren't added,
20 e.g. std::move, std::swap
22 Usage:
23 1. Install BeautifulSoup dependency, see instruction:
24 https://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-beautiful-soup
25 2. Download cppreference offline HTML files (html_book_20220730.zip in Unofficial Release) at
26 https://en.cppreference.com/w/Cppreference:Archives
27 3. Unzip the zip file from step 2 (e.g., to a "cppreference" directory). You should
28 get a "cppreference/reference" directory.
29 4. Run the command:
30 // Generate C++ symbols
31 python3 gen_std.py -cppreference cppreference/reference -symbols=cpp > StdSymbolMap.inc
32 // Generate C symbols
33 python3 gen_std.py -cppreference cppreference/reference -symbols=c > CSymbolMap.inc
34 """
37 import cppreference_parser
38 import argparse
39 import datetime
40 import os
41 import sys
42 import re
45 CODE_PREFIX = """\
46 //===-- gen_std.py generated file -------------------------------*- C++ -*-===//
48 // Used to build a lookup table (qualified names => include headers) for %s
49 // Standard Library symbols.
51 // This file was generated automatically by
52 // clang/tools/include-mapping/gen_std.py, DO NOT EDIT!
54 // Generated from cppreference offline HTML book (modified on %s).
55 //===----------------------------------------------------------------------===//
56 """
59 def ParseArg():
60 parser = argparse.ArgumentParser(description="Generate StdGen file")
61 parser.add_argument(
62 "-cppreference",
63 metavar="PATH",
64 default="",
65 help="path to the cppreference offline HTML directory",
66 required=True,
68 parser.add_argument(
69 "-symbols",
70 default="cpp",
71 help="Generate c or cpp (removed) symbols. One of {cpp, c, cpp_removed}.",
72 required=True,
74 return parser.parse_args()
77 def AdditionalHeadersForIOSymbols(symbol):
78 # IO-related symbols declared in the <iosfwd> header, per C++
79 # [iosfwd.syn 31.3.1]:
80 iosfwd_symbols = [
81 "basic_ios",
82 "basic_streambuf",
83 "basic_istream",
84 "basic_ostream",
85 "basic_iostream",
86 "basic_stringbuf",
87 "basic_istringstream",
88 "basic_ostringstream",
89 "basic_stringstream",
90 "basic_spanbuf",
91 "basic_ispanstream",
92 "basic_ospanstream",
93 "basic_spanstream",
94 "basic_filebuf",
95 "basic_ifstream",
96 "basic_ofstream",
97 "basic_fstream",
98 "basic_syncbuf",
99 "basic_osyncstream",
100 "istreambuf_iterator",
101 "ostreambuf_iterator",
102 "ios",
103 "wios",
104 "streambuf",
105 "istream",
106 "ostream",
107 "iostream",
108 "stringbuf",
109 "istringstream",
110 "ostringstream",
111 "stringstream",
112 "spanbuf",
113 "ispanstream",
114 "ospanstream",
115 "spanstream",
116 "filebuf",
117 "ifstream",
118 "ofstream",
119 "fstream",
120 "syncbuf",
121 "osyncstream",
122 "wstreambuf",
123 "wistream",
124 "wostream",
125 "wiostream",
126 "wstringbuf",
127 "wistringstream",
128 "wostringstream",
129 "wstringstream",
130 "wspanbuf",
131 "wispanstream",
132 "wospanstream",
133 "wspanstream",
134 "wfilebuf",
135 "wifstream",
136 "wofstream",
137 "wfstream",
138 "wsyncbuf",
139 "wosyncstream",
140 "fpos",
141 "streampos",
142 "wstreampos",
143 "u8streampos",
144 "u16streampos",
145 "u32streampos",
147 assert len(symbol.headers) == 1
148 sym_header = symbol.headers[0]
149 headers = []
150 # <iostream> is preferred than <iosfwd>
152 # <iostream> is an alternative of <streambuf>, <istream>, <ostream>, <ios>.
153 # per C++ [iostream.syn 31.4.1]
154 if sym_header in ["<ios>", "<istream>", "<ostream>", "<streambuf>"]:
155 headers.append("<iostream>")
157 if symbol.name in iosfwd_symbols:
158 headers.append("<iosfwd>")
160 return headers
163 def GetCCompatibilitySymbols(symbol):
164 # C++ form of the C standard headers.
165 c_compat_headers = {
166 "<cassert>",
167 "<cctype>",
168 "<cerrno>",
169 "<cfenv>",
170 "<cfloat>",
171 "<cinttypes>",
172 "<climits>",
173 "<clocale>",
174 "<cmath>",
175 "<csetjmp>",
176 "<csignal>",
177 "<cstdarg>",
178 "<cstddef>",
179 "<cstdint>",
180 "<cstdio>",
181 "<cstdlib>",
182 "<cstring>",
183 "<ctime>",
184 "<cuchar>",
185 "<cwchar>",
186 "<cwctype>",
188 # C++ [support.c.headers.other] 17.14.7
189 # ..., behaves as if each name placed in the standard library namespace by
190 # the corresponding <cname> header is placed within the global namespace
191 # scope, except for the functions described in [sf.cmath], the
192 # std​::​lerp function overloads ([c.math.lerp]), the declaration of
193 # std​::​byte ([cstddef.syn]), and the functions and function templates
194 # described in [support.types.byteops].
195 exception_symbols = {
196 "(assoc_)?laguerre[f|l]?",
197 "(assoc_|sph_)?legendre[f|l]?",
198 "beta[f|l]?",
199 "(comp_)?ellint_[1-3][f|l]?",
200 "(cyl_|sph_)?bessel_[i-k][f|l]?",
201 "(cyl_|sph_)?neumann[f|l]?",
202 "expint[f|l]?",
203 "hermite[f|l]?",
204 "riemann_zeta[f|l]?",
205 "lerp",
206 "byte",
208 assert len(symbol.headers) == 1
209 header = symbol.headers[0]
210 if header not in c_compat_headers:
211 return []
212 if any(re.fullmatch(x, symbol.name) for x in exception_symbols):
213 return []
215 # Introduce two more entries, both in the global namespace, one using the
216 # C++-compat header and another using the C header.
217 results = []
218 if symbol.namespace != None:
219 # avoid printing duplicated entries, for C macros!
220 results.append(cppreference_parser.Symbol(symbol.name, None, [header]))
221 c_header = "<" + header[2:-1] + ".h>" # <cstdio> => <stdio.h>
222 results.append(cppreference_parser.Symbol(symbol.name, None, [c_header]))
223 return results
226 def main():
227 args = ParseArg()
228 if args.symbols == "cpp":
229 page_root = os.path.join(args.cppreference, "en", "cpp")
230 symbol_index_root = os.path.join(page_root, "symbol_index")
231 parse_pages = [
232 (page_root, "symbol_index.html", "std::"),
233 # std sub-namespace symbols have separated pages.
234 # We don't index std literal operators (e.g.
235 # std::literals::chrono_literals::operator""d), these symbols can't be
236 # accessed by std::<symbol_name>.
238 # std::placeholders symbols are handled manually in StdSpecialSymbolMap.inc
239 (symbol_index_root, "chrono.html", "std::chrono::"),
240 (symbol_index_root, "execution.html", "std::execution::"),
241 (symbol_index_root, "numbers.html", "std::numbers::"),
242 (symbol_index_root, "filesystem.html", "std::filesystem::"),
243 (symbol_index_root, "pmr.html", "std::pmr::"),
244 (symbol_index_root, "ranges.html", "std::ranges::"),
246 (symbol_index_root, "views.html", "std::ranges::views::"),
247 # std::ranges::views can be accessed as std::views.
248 (symbol_index_root, "views.html", "std::views::"),
250 (symbol_index_root, "regex_constants.html", "std::regex_constants::"),
251 (symbol_index_root, "this_thread.html", "std::this_thread::"),
252 # Zombie symbols that were available from the Standard Library, but are
253 # removed in the following standards.
254 (symbol_index_root, "zombie_names.html", "std::"),
255 (symbol_index_root, "macro.html", None),
257 elif args.symbols == "c":
258 page_root = os.path.join(args.cppreference, "en", "c")
259 symbol_index_root = page_root
260 parse_pages = [(page_root, "index.html", None)]
262 if not os.path.exists(symbol_index_root):
263 exit("Path %s doesn't exist!" % symbol_index_root)
265 symbols = cppreference_parser.GetSymbols(parse_pages)
267 # We don't have version information from the unzipped offline HTML files.
268 # so we use the modified time of the symbol_index.html as the version.
269 index_page_path = os.path.join(page_root, "index.html")
270 cppreference_modified_date = datetime.datetime.fromtimestamp(
271 os.stat(index_page_path).st_mtime
272 ).strftime("%Y-%m-%d")
273 print(CODE_PREFIX % (args.symbols.upper(), cppreference_modified_date))
274 for symbol in symbols:
275 if len(symbol.headers) == 1:
276 augmented_symbols = [symbol]
277 augmented_symbols.extend(GetCCompatibilitySymbols(symbol))
278 for s in augmented_symbols:
279 s.headers.extend(AdditionalHeadersForIOSymbols(s))
280 for header in s.headers:
281 # SYMBOL(unqualified_name, namespace, header)
282 print("SYMBOL(%s, %s, %s)" % (s.name, s.namespace, header))
283 elif len(symbol.headers) == 0:
284 sys.stderr.write("No header found for symbol %s\n" % symbol.name)
285 else:
286 # FIXME: support symbols with multiple headers (e.g. std::move).
287 sys.stderr.write(
288 "Ambiguous header for symbol %s: %s\n"
289 % (symbol.name, ", ".join(symbol.headers))
293 if __name__ == "__main__":
294 main()