[SyncFS] Build indexes from FileTracker entries on disk.
[chromium-blink-merge.git] / native_client_sdk / src / doc / doxygen / doxy_cleanup.py
blob491fd047109cd921e33a57fdc24882a13cdd16fe
1 #!/usr/bin/python
3 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
4 # Use of this source code is governed by a BSD-style license that can be
5 # found in the LICENSE file.
7 '''This utility cleans up the html files as emitted by doxygen so
8 that they are suitable for publication on a Google documentation site.
9 '''
11 import glob
12 import optparse
13 import os
14 import re
15 import shutil
16 import sys
17 try:
18 from BeautifulSoup import BeautifulSoup, Tag
19 except (ImportError, NotImplementedError):
20 print ("This tool requires the BeautifulSoup package "
21 "(see http://www.crummy.com/software/BeautifulSoup/).\n"
22 "Make sure that the file BeautifulSoup.py is either in this directory "
23 "or is available in your PYTHON_PATH")
24 raise
27 def Trace(msg):
28 if Trace.verbose:
29 sys.stderr.write(str(msg) + '\n')
31 Trace.verbose = False
34 FILES_TO_REMOVE = [
35 '*.css',
36 '*.map',
37 '*.md5',
38 'annotated.html',
39 'bc_s.png',
40 'classes.html',
41 'closed.png',
42 'doxygen.png',
43 'files.html',
44 'functions*.html',
45 'globals_0x*.html',
46 'globals_enum.html',
47 'globals_eval.html',
48 'globals_func.html',
49 'globals.html',
50 'globals_type.html',
51 'globals_vars.html',
52 'graph_legend.html',
53 'graph_legend.png',
54 'hierarchy.html',
55 'index_8dox.html',
56 'index.html',
57 'modules.html',
58 'namespacemembers_func.html',
59 'namespacemembers.html',
60 'namespaces.html',
61 'nav_f.png',
62 'nav_h.png',
63 'open.png',
64 'tab_a.png',
65 'tab_b.png',
66 'tab_h.png',
67 'tab_s.png',
71 class HTMLFixer(object):
72 '''This class cleans up the html strings as produced by Doxygen
73 '''
75 def __init__(self, html):
76 self.soup = BeautifulSoup(html)
78 def FixTableHeadings(self):
79 '''Fixes the doxygen table headings.
81 This includes:
82 - Using bare <h2> title row instead of row embedded in <tr><td> in table
83 - Putting the "name" attribute into the "id" attribute of the <tr> tag.
84 - Splitting up tables into multiple separate tables if a table
85 heading appears in the middle of a table.
87 For example, this html:
88 <table>
89 <tr><td colspan="2"><h2><a name="pub-attribs"></a>
90 Data Fields List</h2></td></tr>
91 ...
92 </table>
94 would be converted to this:
95 <h2>Data Fields List</h2>
96 <table>
97 ...
98 </table>
99 '''
101 table_headers = []
102 for tag in self.soup.findAll('tr'):
103 if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']:
104 #tag['id'] = tag.td.h2.a['name']
105 tag.string = tag.td.h2.a.next
106 tag.name = 'h2'
107 table_headers.append(tag)
109 # reverse the list so that earlier tags don't delete later tags
110 table_headers.reverse()
111 # Split up tables that have multiple table header (th) rows
112 for tag in table_headers:
113 Trace("Header tag: %s is %s" % (tag.name, tag.string.strip()))
114 # Is this a heading in the middle of a table?
115 if tag.findPreviousSibling('tr') and tag.parent.name == 'table':
116 Trace("Splitting Table named %s" % tag.string.strip())
117 table = tag.parent
118 table_parent = table.parent
119 table_index = table_parent.contents.index(table)
120 new_table = Tag(self.soup, name='table', attrs=table.attrs)
121 table_parent.insert(table_index + 1, new_table)
122 tag_index = table.contents.index(tag)
123 for index, row in enumerate(table.contents[tag_index:]):
124 new_table.insert(index, row)
125 # Now move the <h2> tag to be in front of the <table> tag
126 assert tag.parent.name == 'table'
127 table = tag.parent
128 table_parent = table.parent
129 table_index = table_parent.contents.index(table)
130 table_parent.insert(table_index, tag)
132 def RemoveTopHeadings(self):
133 '''Removes <div> sections with a header, tabs, or navpath class attribute'''
134 header_tags = self.soup.findAll(
135 name='div',
136 attrs={'class' : re.compile('^(header|tabs[0-9]*|navpath)$')})
137 [tag.extract() for tag in header_tags]
139 def RemoveVersionNumbers(self, html):
140 '''Horrible hack to strip _#_# from struct names.'''
141 return re.sub(r'(_\d_\d)(?=[": <])', '', html)
143 def FixAll(self):
144 self.FixTableHeadings()
145 self.RemoveTopHeadings()
146 html = str(self.soup)
147 html = self.RemoveVersionNumbers(html)
148 return html
151 def main(argv):
152 """Main entry for the doxy_cleanup utility
154 doxy_cleanup cleans up the html files generated by doxygen.
157 parser = optparse.OptionParser(usage='Usage: %prog [options] directory')
158 parser.add_option('-v', '--verbose', help='verbose output.',
159 action='store_true')
160 options, files = parser.parse_args(argv)
162 if len(files) != 1:
163 parser.error('Expected one directory')
165 if options.verbose:
166 Trace.verbose = True
168 root_dir = files[0]
169 html_dir = os.path.join(root_dir, 'html')
171 # Doxygen puts all files in an 'html' directory.
172 # First, move all files from that directory to root_dir.
173 for filename in glob.glob(os.path.join(html_dir, '*')):
174 Trace('Moving %s -> %s' % (filename, root_dir))
175 shutil.move(filename, root_dir)
177 # Now remove the 'html' directory.
178 Trace('Removing %s' % html_dir)
179 os.rmdir(html_dir)
181 # Then remove unneeded files.
182 for wildcard in FILES_TO_REMOVE:
183 Trace('Removing "%s":' % wildcard)
184 path = os.path.join(root_dir, wildcard)
185 for filename in glob.glob(path):
186 Trace(' Removing "%s"' % filename)
187 os.remove(filename)
189 # Now, fix the HTML files we've kept.
190 Trace('Fixing HTML files...')
191 for root, _, files in os.walk(root_dir):
192 for filename in files:
193 if not os.path.splitext(filename)[1] == '.html':
194 Trace('Skipping %s' % filename)
195 continue
197 filename = os.path.join(root, filename)
198 Trace('Processing "%s"...' % filename)
199 try:
200 with open(filename) as f:
201 html = f.read()
203 fixer = HTMLFixer(html)
204 output = fixer.FixAll()
205 with open(filename, 'w') as f:
206 f.write(output)
207 except:
208 sys.stderr.write("Error while processing %s\n" % filename)
209 raise
211 return 0
213 if __name__ == '__main__':
214 try:
215 rtn = main(sys.argv[1:])
216 except KeyboardInterrupt:
217 sys.stderr.write('%s: interrupted\n' % os.path.basename(__file__))
218 rtn = 1
219 sys.exit(rtn)