3 # Copyright (C) 2023-2024 Free Software Foundation, Inc.
5 # Script to regenerate FOO.opt.urls files for each FOO.opt in the
8 # This file is part of GCC.
10 # GCC is free software; you can redistribute it and/or modify it under
11 # the terms of the GNU General Public License as published by the Free
12 # Software Foundation; either version 3, or (at your option) any later
15 # GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 # You should have received a copy of the GNU General Public License
21 # along with GCC; see the file COPYING3. If not see
22 # <http://www.gnu.org/licenses/>. */
25 Parses the generated HTML (from "make html") to locate anchors
26 for options, then parses the .opt files within the source tree,
27 and generates a .opt.urls in the source tree for each .opt file,
28 giving URLs for each option, where it can.
30 Usage (from build/gcc subdirectory):
31 ../../src/gcc/regenerate-opt-urls.py HTML/gcc-14.0.0/ ../../src
34 ../../src/gcc/regenerate-opt-urls.py HTML/gcc-14.0.0/ ../../src --unit-test
40 from pathlib
import Path
41 from pprint
import pprint
46 def canonicalize_option_name(option_name
):
47 if option_name
.endswith('='):
48 option_name
= option_name
[0:-1]
52 def canonicalize_url_suffix(url_suffix
):
54 Various options have anchors for both the positive and
55 negative form. For example -Wcpp has both:
56 'gcc/Warning-Options.html#index-Wno-cpp'
57 'gcc/Warning-Options.html#index-Wcpp'
59 Return a canonicalized version of the url_suffix that
60 strips out any "no-" prefixes, for use in deduplication.
61 Note that the resulting url suffix might not correspond to
62 an actual anchor in the HTML.
64 url_suffix
= re
.sub('index-Wno-', 'index-W', url_suffix
)
65 url_suffix
= re
.sub('index-fno-', 'index-f', url_suffix
)
66 url_suffix
= re
.sub('_003d$', '', url_suffix
)
67 url_suffix
= re
.sub('-([0-9]+)$', '', url_suffix
)
73 # Map from language (or None) to map from option name to set of URL suffixes
76 def add_entry(self
, matched_text
, url_suffix
, language
, verbose
=False):
77 if 'Attributes.html' in url_suffix
:
79 matched_text
= canonicalize_option_name(matched_text
)
80 if language
not in self
.entries
:
81 self
.entries
[language
] = {}
82 per_lang_entries
= self
.entries
[language
]
83 if matched_text
in per_lang_entries
:
84 # Partition by canonicalized url_suffixes; add the
85 # first url_suffix in each such partition.
86 c_new
= canonicalize_url_suffix(url_suffix
)
87 for entry
in per_lang_entries
[matched_text
]:
88 c_entry
= canonicalize_url_suffix(entry
)
91 per_lang_entries
[matched_text
].add(url_suffix
)
93 per_lang_entries
[matched_text
] = set([url_suffix
])
95 def get_languages(self
):
96 return self
.entries
.keys()
98 def get_url_suffixes(self
, text
, language
=None):
99 text
= canonicalize_option_name(text
)
100 per_lang_entries
= self
.entries
.get(language
)
102 return per_lang_entries
.get(text
)
104 def parse_option_index(self
, input_filename
, language
, verbose
=False):
105 with
open(input_filename
) as f
:
106 dirname
= input_filename
.parent
.name
108 self
.parse_html_line_option_index(dirname
, line
, language
, verbose
)
110 def parse_html_line_option_index(self
, dirname
, line
, language
, verbose
=False):
114 # Update for this in the GCC website's bin/preprocess process_html_file:
115 # | sed -e 's/_002d/-/g' -e 's/_002a/*/g' \
116 line
= line
.replace('_002d', '-')
117 line
= line
.replace('_002a', '*')
119 # e.g. <a href="Optimize-Options.html#index-fmodulo_002dsched"><code>fmodulo-sched</code></a>
120 m
= re
.search(r
'<a href="([\S]+)"><code>([\S]+)</code></a>', line
)
125 url_suffix
, index_text
= m
.groups()
126 option
= '-' + index_text
128 # Strip off "no-" prefixes from options
129 if option
[:5] == '-Wno-':
130 option
= '-W' + option
[5:]
131 if option
[:5] == '-fno-':
132 option
= '-f' + option
[5:]
134 url_suffix
= dirname
+ '/' + url_suffix
135 self
.add_entry(option
, url_suffix
, language
, verbose
)
138 class TestParsingIndex(unittest
.TestCase
):
139 def test_parse_line(self
):
141 index
.parse_html_line_option_index('gcc',
142 '<a href="Optimize-Options.html#index-fmodulo_002dsched"><code>fmodulo-sched</code></a>',
144 self
.assertEqual(index
.get_url_suffixes('-fmodulo-sched'),
145 {'gcc/Optimize-Options.html#index-fmodulo-sched'})
147 def test_negated_flag(self
):
149 index
.parse_html_line_option_index('gcc',
150 '<tr><td></td><td valign="top"><a href="Static-Analyzer-Options.html#index-fno_002danalyzer"><code>fno-analyzer</code></a>:</td><td> </td><td valign="top"><a href="Static-Analyzer-Options.html">Static Analyzer Options</a></td></tr>\n',
152 self
.assertEqual(index
.get_url_suffixes('-fno-analyzer'), None)
153 self
.assertEqual(index
.get_url_suffixes('-fanalyzer'),
154 {'gcc/Static-Analyzer-Options.html#index-fno-analyzer'})
156 def test_negated_warning(self
):
158 index
.parse_html_line_option_index('gcc',
159 '<tr><td></td><td valign="top"><a href="Warning-Options.html#index-Wno_002dalloca"><code>Wno-alloca</code></a>:</td><td> </td><td valign="top"><a href="Warning-Options.html">Warning Options</a></td></tr>\n',
161 self
.assertEqual(index
.get_url_suffixes('-Wno-alloca'),
163 self
.assertEqual(index
.get_url_suffixes('-Walloca'),
164 {'gcc/Warning-Options.html#index-Wno-alloca'})
166 def test_parse_option_index(self
):
168 index
.parse_option_index(INPUT_HTML_PATH
/ 'gcc/Option-Index.html',
170 self
.assertEqual(index
.get_url_suffixes('-fmodulo-sched'),
171 {'gcc/Optimize-Options.html#index-fmodulo-sched'})
172 self
.assertEqual(index
.get_url_suffixes('-O'),
173 {'gcc/Optimize-Options.html#index-O'})
174 self
.assertEqual(index
.get_url_suffixes('-O0'),
175 {'gcc/Optimize-Options.html#index-O0'})
176 self
.assertEqual(index
.get_url_suffixes('-Wframe-larger-than='),
177 {'gcc/Warning-Options.html#index-Wframe-larger-than_003d'})
179 # Check an option with duplicates: '-march'
180 # The url_suffixes will be of the form
181 # 'gcc/HPPA-Options.html#index-march-5',
182 # 'gcc/LoongArch-Options.html#index-march-7',
183 # etc, where the trailing number is, unfortunately, likely to
184 # change from release to release.
185 # Replace them with 'NN' for the purpose of this test:
186 em_arch_url_suffixes
= [re
.sub('(-[0-9]+)', '-NN', s
)
187 for s
in index
.get_url_suffixes('-march')]
188 self
.assertIn('gcc/ARM-Options.html#index-march-NN', em_arch_url_suffixes
)
189 self
.assertIn('gcc/x86-Options.html#index-march-NN', em_arch_url_suffixes
)
191 self
.assertEqual(index
.get_url_suffixes('-Wcpp'),
192 {'gcc/Warning-Options.html#index-Wcpp'})
194 self
.assertNotEqual(index
.get_url_suffixes('-march'), None)
195 self
.assertNotEqual(index
.get_url_suffixes('-march='), None)
198 def __init__(self
, opt_path
, rel_path
):
200 Parse a .opt file. Similar to opt-gather.awk.
202 self
.rel_path
= rel_path
203 assert rel_path
.startswith('gcc')
205 with
open(opt_path
) as f
:
208 if re
.match(r
'[ \t]*(;|$)', line
):
212 self
.records
.append([line
])
215 self
.records
[-1].append(line
)
217 # Mapping from target-specific page to subdirectory containing .opt files
218 # documented on that page.
220 TARGET_SPECIFIC_PAGES
= {
221 'gcc/AArch64-Options.html' : 'gcc/config/aarch64/',
222 'gcc/AMD-GCN-Options.html' : 'gcc/config/gcn/',
223 'gcc/ARC-Options.html' : 'gcc/config/arc/',
224 'gcc/ARC-Options.html' : 'gcc/config/arc/',
225 'gcc/ARM-Options.html' : 'gcc/config/arm/',
226 'gcc/AVR-Options.html' : 'gcc/config/avr/',
227 'gcc/Adapteva-Epiphany-Options.html' : 'gcc/config/epiphany/',
228 'gcc/Blackfin-Options.html' : 'gcc/config/bfin/',
229 'gcc/C-SKY-Options.html' : 'gcc/config/csky/',
230 'gcc/C6X-Options.html' : 'gcc/config/c6x/',
231 'gcc/CRIS-Options.html' : 'gcc/config/cris/',
232 'gcc/DEC-Alpha-Options.html' : 'gcc/config/alpha/',
233 'gcc/FR30-Options.html' : 'gcc/config/fr30/',
234 'gcc/FRV-Options.html' : 'gcc/config/frv/',
235 'gcc/FT32-Options.html' : 'gcc/config/ft32/',
236 'gcc/H8_002f300-Options.html' : 'gcc/config/h8300/',
237 'gcc/HPPA-Options.html' : 'gcc/config/pa/',
238 'gcc/IA-64-Options.html' : 'gcc/config/ia64/',
239 'gcc/LoongArch-Options.html' : 'gcc/config/loongarch/',
240 'gcc/M32C-Options.html' : 'gcc/config/m32c/',
241 'gcc/M32R_002fD-Options.html' : 'gcc/config/m32r/',
242 'gcc/M680x0-Options.html' : 'gcc/config/m68k/',
243 'gcc/MCore-Options.html' : 'gcc/config/mcore/',
244 'gcc/MIPS-Options.html' : 'gcc/config/mips/',
245 'gcc/MMIX-Options.html' : 'gcc/config/mmix/',
246 'gcc/MN10300-Options.html' : 'gcc/config/mn10300/',
247 'gcc/MSP430-Options.html' : 'gcc/config/msp430/',
248 'gcc/MicroBlaze-Options.html' : 'gcc/config/microblaze/',
249 'gcc/Moxie-Options.html' : 'gcc/config/moxie/',
250 'gcc/NDS32-Options.html' : 'gcc/config/nds32/',
251 'gcc/Nios-II-Options.html' : 'gcc/config/nios2/',
252 'gcc/Nvidia-PTX-Options.html' : 'gcc/config/nvptx/',
253 'gcc/OpenRISC-Options.html' : 'gcc/config/or1k/',
254 'gcc/PDP-11-Options.html' : 'gcc/config/pdp11',
255 'gcc/PRU-Options.html' : 'gcc/config/pru/',
256 'gcc/RISC-V-Options.html' : 'gcc/config/riscv/',
257 'gcc/RL78-Options.html' : 'gcc/config/rl78/',
258 'gcc/RS_002f6000-and-PowerPC-Options.html' : 'gcc/config/rs6000/',
259 'gcc/RX-Options.html' : 'gcc/config/rx/',
260 'gcc/SH-Options.html' : 'gcc/config/sh/',
261 'gcc/SPARC-Options.html' : 'gcc/config/sparc/',
262 'gcc/S_002f390-and-zSeries-Options.html' : 'gcc/config/s390',
263 'gcc/V850-Options.html' : 'gcc/config/v850/',
264 'gcc/VAX-Options.html' : 'gcc/config/vax/',
265 'gcc/Visium-Options.html' : 'gcc/config/visium/',
266 'gcc/Xstormy16-Options.html' : 'gcc/config/stormy16/',
267 'gcc/Xtensa-Options.html' : 'gcc/config/xtensa/',
268 'gcc/eBPF-Options.html' : 'gcc/config/bpf/',
269 'gcc/x86-Options.html' : 'gcc/config/i386/',
272 def target_specific(url_suffix
):
273 for page_prefix
, subdir
in TARGET_SPECIFIC_PAGES
.items():
274 if url_suffix
.startswith(page_prefix
):
277 def filter_urlsuffixes_for_optfile(optfile
, url_suffixes
):
279 Filter out target-specific options for the wrong target.
282 for url_suffix
in url_suffixes
:
283 subdir
= target_specific(url_suffix
)
285 if not optfile
.rel_path
.startswith(subdir
):
288 result
.add(url_suffix
)
292 class TestFiltering(unittest
.TestCase
):
293 def test_target_specific(self
):
294 self
.assertEqual(target_specific('gcc/Preprocessor-Options.html#index-A'),
296 self
.assertEqual(target_specific('gcc/MMIX-Options.html#index-mknuthdiv'),
299 def test_filter(self
):
300 s
= {'gcc/MIPS-Options.html#index-munaligned-access-1',
301 'gcc/ARM-Options.html#index-munaligned-access'}
302 arm_optfile
= OptFile('/dev/null', 'gcc/config/arm/arm.opt')
303 mips_optfile
= OptFile('/dev/null', 'gcc/config/mips/mips.opt')
305 filter_urlsuffixes_for_optfile(arm_optfile
, s
),
306 {'gcc/ARM-Options.html#index-munaligned-access'})
308 filter_urlsuffixes_for_optfile(mips_optfile
, s
),
309 {'gcc/MIPS-Options.html#index-munaligned-access-1'})
312 def write_url_file(index
, optfile
, dstfile
):
313 dstfile
.write('; Autogenerated by regenerate-opt-urls.py from %s'
314 ' and generated HTML\n\n'
316 for record
in optfile
.records
:
317 opt
= '-' + record
[0].strip()
318 url_suffixes_per_lang
= {}
320 for lang
in index
.get_languages():
321 this_lang_suffixes
= index
.get_url_suffixes(opt
, language
=lang
)
322 url_suffixes_per_lang
[lang
] = this_lang_suffixes
323 if this_lang_suffixes
:
324 count
+= len(this_lang_suffixes
)
328 for lang
in index
.get_languages():
330 directive
= 'LangUrlSuffix_%s for %r' % (lang
, opt
[1:])
332 directive
= 'UrlSuffix for %r' % opt
[1:]
333 url_suffixes
= url_suffixes_per_lang
[lang
]
335 url_suffixes
= filter_urlsuffixes_for_optfile(optfile
, url_suffixes
)
337 if len(url_suffixes
) == 1:
339 directives
.append('LangUrlSuffix_%s(%s)' % (lang
, list(url_suffixes
)[0]))
341 directives
.append('UrlSuffix(%s)' % list(url_suffixes
)[0])
343 dstfile
.write('; skipping %s due to multiple URLs:\n'
345 for u
in sorted(url_suffixes
):
346 dstfile
.write('; duplicate: %r\n' % u
)
348 dstfile
.write('; skipping %s due to finding no URLs\n'
351 dstfile
.write('%s\n' % opt
[1:])
352 dstfile
.write(' '.join(directives
) + '\n')
355 # A list of (REL_PATH, LANG) pairs, where
356 # - REL_PATH is the relative path to a generated Option-Index.html file
357 # for a specific frontend, and
358 # - LANG is the name of the language (as specified in the "Language" record
359 # within the lang.opt file), or None for the language-independent
362 PER_LANGUAGE_OPTION_INDEXES
= [
363 ('gcc/Option-Index.html', None),
364 ('gdc/Option-Index.html', 'D'),
365 ('gfortran/Option-Index.html', 'Fortran')
370 for option_index_rel_path
, lang
in PER_LANGUAGE_OPTION_INDEXES
:
371 index
.parse_option_index(args
.base_html_dir
/ option_index_rel_path
,
373 for root
, dirs
, files
in os
.walk(args
.src_gcc_dir
):
375 if f
.endswith('.opt'):
376 opt_path
= os
.path
.join(root
, f
)
377 rel_path
= os
.path
.relpath(opt_path
, args
.src_gcc_dir
)
378 optfile
= OptFile(opt_path
, rel_path
)
379 dstname
= f
+ '.urls'
380 urlfile
= os
.path
.join(root
, dstname
)
381 with
open(urlfile
, 'w') as dstfile
:
382 write_url_file(index
, optfile
, dstfile
)
385 if __name__
== '__main__':
386 parser
= argparse
.ArgumentParser(description
=DESCRIPTION
,
387 formatter_class
=argparse
.RawDescriptionHelpFormatter
)
388 parser
.add_argument('base_html_dir', type=Path
)
389 parser
.add_argument('src_gcc_dir', type=Path
)
390 parser
.add_argument('--unit-test', action
='store_true')
391 args
= parser
.parse_args()
394 INPUT_HTML_PATH
= args
.base_html_dir
395 unittest
.main(argv
=[sys
.argv
[0], '-v'])