2 # ===- gen_std.py - ------------------------------------------*- python -*--===#
4 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 # See https://llvm.org/LICENSE.txt for license information.
6 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 # ===------------------------------------------------------------------------===#
10 """gen_std.py is a tool to generate a lookup table (from qualified names to
11 include headers) for C/C++ Standard Library symbols by parsing archived HTML
12 files from cppreference.
14 The generated files are located in clang/include/Tooling/Inclusions.
17 - only symbols directly in "std" namespace are added, we should also add std's
18 subnamespace symbols (e.g. chrono).
19 - symbols with multiple variants or defined in multiple headers aren't added,
20 e.g. std::move, std::swap
23 1. Install BeautifulSoup dependency, see instruction:
24 https://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-beautiful-soup
25 2. Download cppreference offline HTML files (html_book_20220730.zip in Unofficial Release) at
26 https://en.cppreference.com/w/Cppreference:Archives
27 3. Unzip the zip file from step 2 (e.g., to a "cppreference" directory). You should
28 get a "cppreference/reference" directory.
30 // Generate C++ symbols
31 python3 gen_std.py -cppreference cppreference/reference -symbols=cpp > StdSymbolMap.inc
33 python3 gen_std.py -cppreference cppreference/reference -symbols=c > CSymbolMap.inc
37 import cppreference_parser
46 //===-- gen_std.py generated file -------------------------------*- C++ -*-===//
48 // Used to build a lookup table (qualified names => include headers) for %s
49 // Standard Library symbols.
51 // This file was generated automatically by
52 // clang/tools/include-mapping/gen_std.py, DO NOT EDIT!
54 // Generated from cppreference offline HTML book (modified on %s).
55 //===----------------------------------------------------------------------===//
60 parser
= argparse
.ArgumentParser(description
="Generate StdGen file")
65 help="path to the cppreference offline HTML directory",
71 help="Generate c or cpp (removed) symbols. One of {cpp, c, cpp_removed}.",
74 return parser
.parse_args()
77 def AdditionalHeadersForIOSymbols(symbol
):
78 # IO-related symbols declared in the <iosfwd> header, per C++
79 # [iosfwd.syn 31.3.1]:
87 "basic_istringstream",
88 "basic_ostringstream",
100 "istreambuf_iterator",
101 "ostreambuf_iterator",
147 assert len(symbol
.headers
) == 1
148 sym_header
= symbol
.headers
[0]
150 # <iostream> is preferred than <iosfwd>
152 # <iostream> is an alternative of <streambuf>, <istream>, <ostream>, <ios>.
153 # per C++ [iostream.syn 31.4.1]
154 if sym_header
in ["<ios>", "<istream>", "<ostream>", "<streambuf>"]:
155 headers
.append("<iostream>")
157 if symbol
.name
in iosfwd_symbols
:
158 headers
.append("<iosfwd>")
163 def GetCCompatibilitySymbols(symbol
):
164 # C++ form of the C standard headers.
188 # C++ [support.c.headers.other] 17.14.7
189 # ..., behaves as if each name placed in the standard library namespace by
190 # the corresponding <cname> header is placed within the global namespace
191 # scope, except for the functions described in [sf.cmath], the
192 # std::lerp function overloads ([c.math.lerp]), the declaration of
193 # std::byte ([cstddef.syn]), and the functions and function templates
194 # described in [support.types.byteops].
195 exception_symbols
= {
196 "(assoc_)?laguerre[f|l]?",
197 "(assoc_|sph_)?legendre[f|l]?",
199 "(comp_)?ellint_[1-3][f|l]?",
200 "(cyl_|sph_)?bessel_[i-k][f|l]?",
201 "(cyl_|sph_)?neumann[f|l]?",
204 "riemann_zeta[f|l]?",
208 assert len(symbol
.headers
) == 1
209 header
= symbol
.headers
[0]
210 if header
not in c_compat_headers
:
212 if any(re
.fullmatch(x
, symbol
.name
) for x
in exception_symbols
):
215 # Introduce two more entries, both in the global namespace, one using the
216 # C++-compat header and another using the C header.
218 if symbol
.namespace
!= None:
219 # avoid printing duplicated entries, for C macros!
220 results
.append(cppreference_parser
.Symbol(symbol
.name
, None, [header
]))
221 c_header
= "<" + header
[2:-1] + ".h>" # <cstdio> => <stdio.h>
222 results
.append(cppreference_parser
.Symbol(symbol
.name
, None, [c_header
]))
228 if args
.symbols
== "cpp":
229 page_root
= os
.path
.join(args
.cppreference
, "en", "cpp")
230 symbol_index_root
= os
.path
.join(page_root
, "symbol_index")
232 (page_root
, "symbol_index.html", "std::"),
233 # std sub-namespace symbols have separated pages.
234 # We don't index std literal operators (e.g.
235 # std::literals::chrono_literals::operator""d), these symbols can't be
236 # accessed by std::<symbol_name>.
238 # std::placeholders symbols are handled manually in StdSpecialSymbolMap.inc
239 (symbol_index_root
, "chrono.html", "std::chrono::"),
240 (symbol_index_root
, "execution.html", "std::execution::"),
241 (symbol_index_root
, "numbers.html", "std::numbers::"),
242 (symbol_index_root
, "filesystem.html", "std::filesystem::"),
243 (symbol_index_root
, "pmr.html", "std::pmr::"),
244 (symbol_index_root
, "ranges.html", "std::ranges::"),
246 (symbol_index_root
, "views.html", "std::ranges::views::"),
247 # std::ranges::views can be accessed as std::views.
248 (symbol_index_root
, "views.html", "std::views::"),
250 (symbol_index_root
, "regex_constants.html", "std::regex_constants::"),
251 (symbol_index_root
, "this_thread.html", "std::this_thread::"),
252 # Zombie symbols that were available from the Standard Library, but are
253 # removed in the following standards.
254 (symbol_index_root
, "zombie_names.html", "std::"),
255 (symbol_index_root
, "macro.html", None),
257 elif args
.symbols
== "c":
258 page_root
= os
.path
.join(args
.cppreference
, "en", "c")
259 symbol_index_root
= page_root
260 parse_pages
= [(page_root
, "index.html", None)]
262 if not os
.path
.exists(symbol_index_root
):
263 exit("Path %s doesn't exist!" % symbol_index_root
)
265 symbols
= cppreference_parser
.GetSymbols(parse_pages
)
267 # We don't have version information from the unzipped offline HTML files.
268 # so we use the modified time of the symbol_index.html as the version.
269 index_page_path
= os
.path
.join(page_root
, "index.html")
270 cppreference_modified_date
= datetime
.datetime
.fromtimestamp(
271 os
.stat(index_page_path
).st_mtime
272 ).strftime("%Y-%m-%d")
273 print(CODE_PREFIX
% (args
.symbols
.upper(), cppreference_modified_date
))
274 for symbol
in symbols
:
275 if len(symbol
.headers
) == 1:
276 augmented_symbols
= [symbol
]
277 augmented_symbols
.extend(GetCCompatibilitySymbols(symbol
))
278 for s
in augmented_symbols
:
279 s
.headers
.extend(AdditionalHeadersForIOSymbols(s
))
280 for header
in s
.headers
:
281 # SYMBOL(unqualified_name, namespace, header)
282 print("SYMBOL(%s, %s, %s)" % (s
.name
, s
.namespace
, header
))
283 elif len(symbol
.headers
) == 0:
284 sys
.stderr
.write("No header found for symbol %s\n" % symbol
.name
)
286 # FIXME: support symbols with multiple headers (e.g. std::move).
288 "Ambiguous header for symbol %s: %s\n"
289 % (symbol
.name
, ", ".join(symbol
.headers
))
293 if __name__
== "__main__":