2 # canonicalize html dirs to ease comaring them
5 # ./tools/c10e-html html
13 from bs4
import BeautifulSoup
16 def prettify(filename
):
17 with
open(filename
, 'r') as doc
:
18 soup
= BeautifulSoup(doc
.read(), 'lxml')
19 with
open(filename
, 'w') as doc
:
20 html
= soup
.prettify()
21 # strip things that mkhtml2 is not producing to reduce the diff
22 html
= html
.replace('a class="link" href', 'a href')
23 html
= html
.replace(' target="_top"', '')
24 html
= html
.replace('summary="Navigation header" ', '')
25 html
= html
.replace(""" <a name="idx">
28 html
= re
.sub(""" <div class="footer">
30 Generated by GTK-Doc V[.0-9]*
33 html
= re
.sub(r
'\s*<p>\s*</p>', '', html
)
34 html
= re
.sub(r
'\s*<div class="titlepage">\s*</div>', '', html
)
35 html
= re
.sub(r
'\s*<meta content="DocBook[^>]*>', '', html
)
36 html
= re
.sub(r
'\s*<meta content="GTK-Doc[^>]*>', '', html
)
41 for filename
in glob
.glob(os
.path
.join(htmldir
, '*.devhelp2')):
43 for filename
in glob
.glob(os
.path
.join(htmldir
, '*.html')):
47 if __name__
== '__main__':
48 parser
= argparse
.ArgumentParser(
49 description
='c10e-html - canonicalize html files for diffing')
50 parser
.add_argument('args', nargs
='*', help='HTML_DIR')
52 options
= parser
.parse_args()
53 if len(options
.args
) < 1:
54 sys
.exit('Too few arguments')