3 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
4 # Use of this source code is governed by a BSD-style license that can be
5 # found in the LICENSE file.
7 '''This utility cleans up the html files as emitted by doxygen so
8 that they are suitable for publication on a Google documentation site.
18 from BeautifulSoup
import BeautifulSoup
, Tag
19 except (ImportError, NotImplementedError):
20 print ("This tool requires the BeautifulSoup package "
21 "(see http://www.crummy.com/software/BeautifulSoup/).\n"
22 "Make sure that the file BeautifulSoup.py is either in this directory "
23 "or is available in your PYTHON_PATH")
29 sys
.stderr
.write(str(msg
) + '\n')
58 'namespacemembers_func.html',
59 'namespacemembers.html',
71 class HTMLFixer(object):
72 '''This class cleans up the html strings as produced by Doxygen
75 def __init__(self
, html
):
76 self
.soup
= BeautifulSoup(html
)
78 def FixTableHeadings(self
):
79 '''Fixes the doxygen table headings.
82 - Using bare <h2> title row instead of row embedded in <tr><td> in table
83 - Putting the "name" attribute into the "id" attribute of the <tr> tag.
84 - Splitting up tables into multiple separate tables if a table
85 heading appears in the middle of a table.
87 For example, this html:
89 <tr><td colspan="2"><h2><a name="pub-attribs"></a>
90 Data Fields List</h2></td></tr>
94 would be converted to this:
95 <h2>Data Fields List</h2>
102 for tag
in self
.soup
.findAll('tr'):
103 if tag
.td
and tag
.td
.h2
and tag
.td
.h2
.a
and tag
.td
.h2
.a
['name']:
104 #tag['id'] = tag.td.h2.a['name']
105 tag
.string
= tag
.td
.h2
.a
.next
107 table_headers
.append(tag
)
109 # reverse the list so that earlier tags don't delete later tags
110 table_headers
.reverse()
111 # Split up tables that have multiple table header (th) rows
112 for tag
in table_headers
:
113 Trace("Header tag: %s is %s" % (tag
.name
, tag
.string
.strip()))
114 # Is this a heading in the middle of a table?
115 if tag
.findPreviousSibling('tr') and tag
.parent
.name
== 'table':
116 Trace("Splitting Table named %s" % tag
.string
.strip())
118 table_parent
= table
.parent
119 table_index
= table_parent
.contents
.index(table
)
120 new_table
= Tag(self
.soup
, name
='table', attrs
=table
.attrs
)
121 table_parent
.insert(table_index
+ 1, new_table
)
122 tag_index
= table
.contents
.index(tag
)
123 for index
, row
in enumerate(table
.contents
[tag_index
:]):
124 new_table
.insert(index
, row
)
125 # Now move the <h2> tag to be in front of the <table> tag
126 assert tag
.parent
.name
== 'table'
128 table_parent
= table
.parent
129 table_index
= table_parent
.contents
.index(table
)
130 table_parent
.insert(table_index
, tag
)
132 def RemoveTopHeadings(self
):
133 '''Removes <div> sections with a header, tabs, or navpath class attribute'''
134 header_tags
= self
.soup
.findAll(
136 attrs
={'class' : re
.compile('^(header|tabs[0-9]*|navpath)$')})
137 [tag
.extract() for tag
in header_tags
]
139 def RemoveVersionNumbers(self
, html
):
140 '''Horrible hack to strip _#_# from struct names.'''
141 return re
.sub(r
'(_\d_\d)(?=[": <])', '', html
)
144 self
.FixTableHeadings()
145 self
.RemoveTopHeadings()
146 html
= str(self
.soup
)
147 html
= self
.RemoveVersionNumbers(html
)
152 """Main entry for the doxy_cleanup utility
154 doxy_cleanup cleans up the html files generated by doxygen.
157 parser
= optparse
.OptionParser(usage
='Usage: %prog [options] directory')
158 parser
.add_option('-v', '--verbose', help='verbose output.',
160 options
, files
= parser
.parse_args(argv
)
163 parser
.error('Expected one directory')
169 html_dir
= os
.path
.join(root_dir
, 'html')
171 # Doxygen puts all files in an 'html' directory.
172 # First, move all files from that directory to root_dir.
173 for filename
in glob
.glob(os
.path
.join(html_dir
, '*')):
174 Trace('Moving %s -> %s' % (filename
, root_dir
))
175 shutil
.move(filename
, root_dir
)
177 # Now remove the 'html' directory.
178 Trace('Removing %s' % html_dir
)
181 # Then remove unneeded files.
182 for wildcard
in FILES_TO_REMOVE
:
183 Trace('Removing "%s":' % wildcard
)
184 path
= os
.path
.join(root_dir
, wildcard
)
185 for filename
in glob
.glob(path
):
186 Trace(' Removing "%s"' % filename
)
189 # Now, fix the HTML files we've kept.
190 Trace('Fixing HTML files...')
191 for root
, _
, files
in os
.walk(root_dir
):
192 for filename
in files
:
193 if not os
.path
.splitext(filename
)[1] == '.html':
194 Trace('Skipping %s' % filename
)
197 filename
= os
.path
.join(root
, filename
)
198 Trace('Processing "%s"...' % filename
)
200 with
open(filename
) as f
:
203 fixer
= HTMLFixer(html
)
204 output
= fixer
.FixAll()
205 with
open(filename
, 'w') as f
:
208 sys
.stderr
.write("Error while processing %s\n" % filename
)
213 if __name__
== '__main__':
215 rtn
= main(sys
.argv
[1:])
216 except KeyboardInterrupt:
217 sys
.stderr
.write('%s: interrupted\n' % os
.path
.basename(__file__
))