2 #===- gen_std.py - ------------------------------------------*- python -*--===#
4 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 # See https://llvm.org/LICENSE.txt for license information.
6 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 #===------------------------------------------------------------------------===#
10 """gen_std.py is a tool to generate a lookup table (from qualified names to
11 include headers) for C/C++ Standard Library symbols by parsing archived HTML
12 files from cppreference.
14 The generated files are located in clang/include/Tooling/Inclusions.
17 - only symbols directly in "std" namespace are added, we should also add std's
18 subnamespace symbols (e.g. chrono).
19 - symbols with multiple variants or defined in multiple headers aren't added,
20 e.g. std::move, std::swap
23 1. Install BeautifulSoup dependency, see instruction:
24 https://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-beautiful-soup
25 2. Download cppreference offline HTML files (e.g. html_book_20181028.zip) at
26 https://en.cppreference.com/w/Cppreference:Archives
27 3. Unzip the zip file from step 2 to directory </cppreference>, you should
28 get a "reference" directory in </cppreference>
30 // Generate C++ symbols
31 gen_std.py -cppreference </cppreference/reference> -language=cpp > StdSymbolMap.inc
33 gen_std.py -cppreference </cppreference/reference> -language=c > CSymbolMap.inc
37 import cppreference_parser
44 //===-- gen_std.py generated file -------------------------------*- C++ -*-===//
46 // Used to build a lookup table (qualified names => include headers) for %s
47 // Standard Library symbols.
49 // This file was generated automatically by
50 // clang/tools/include-mapping/gen_std.py, DO NOT EDIT!
52 // Generated from cppreference offline HTML book (modified on %s).
53 //===----------------------------------------------------------------------===//
57 parser
= argparse
.ArgumentParser(description
='Generate StdGen file')
58 parser
.add_argument('-cppreference', metavar
='PATH',
60 help='path to the cppreference offline HTML directory',
63 parser
.add_argument('-language',
65 help='Generate c or cpp symbols',
67 return parser
.parse_args()
72 if args
.language
== 'cpp':
73 page_root
= os
.path
.join(args
.cppreference
, "en", "cpp")
74 symbol_index_root
= os
.path
.join(page_root
, "symbol_index")
76 (page_root
, "symbol_index.html", "std::"),
77 # std sub-namespace symbols have separated pages.
78 # We don't index std literal operators (e.g.
79 # std::literals::chrono_literals::operator""d), these symbols can't be
80 # accessed by std::<symbol_name>.
81 # FIXME: index std::placeholders symbols, placeholders.html page is
82 # different (which contains one entry for _1, _2, ..., _N), we need special
84 (symbol_index_root
, "chrono.html", "std::chrono::"),
85 (symbol_index_root
, "filesystem.html", "std::filesystem::"),
86 (symbol_index_root
, "pmr.html", "std::pmr::"),
87 (symbol_index_root
, "regex_constants.html", "std::regex_constants::"),
88 (symbol_index_root
, "this_thread.html", "std::this_thread::"),
90 elif args
.language
== 'c':
91 page_root
= os
.path
.join(args
.cppreference
, "en", "c")
92 symbol_index_root
= page_root
93 parse_pages
= [(page_root
, "index.html", None)]
95 if not os
.path
.exists(symbol_index_root
):
96 exit("Path %s doesn't exist!" % symbol_index_root
)
98 symbols
= cppreference_parser
.GetSymbols(parse_pages
)
100 # We don't have version information from the unzipped offline HTML files.
101 # so we use the modified time of the symbol_index.html as the version.
102 index_page_path
= os
.path
.join(page_root
, "index.html")
103 cppreference_modified_date
= datetime
.datetime
.fromtimestamp(
104 os
.stat(index_page_path
).st_mtime
).strftime('%Y-%m-%d')
105 print(CODE_PREFIX
% (args
.language
.upper(), cppreference_modified_date
))
106 for symbol
in symbols
:
107 if len(symbol
.headers
) == 1:
108 # SYMBOL(unqualified_name, namespace, header)
109 print("SYMBOL(%s, %s, %s)" % (symbol
.name
, symbol
.namespace
,
111 elif len(symbol
.headers
) == 0:
112 sys
.stderr
.write("No header found for symbol %s\n" % symbol
.name
)
114 # FIXME: support symbols with multiple headers (e.g. std::move).
115 sys
.stderr
.write("Ambiguous header for symbol %s: %s\n" % (
116 symbol
.name
, ', '.join(symbol
.headers
)))
119 if __name__
== '__main__':