Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / ppapi / c / documentation / doxy_cleanup.py
blob01b1ca07d4e72d00eae19d4f5a161123b346501d
1 #!/usr/bin/python
3 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
4 # Use of this source code is governed by a BSD-style license that can be
5 # found in the LICENSE file.
7 '''This utility cleans up the html files as emitted by doxygen so
8 that they are suitable for publication on a Google documentation site.
9 '''
11 import optparse
12 import os
13 import re
14 import shutil
15 import string
16 import sys
17 try:
18 from BeautifulSoup import BeautifulSoup, Tag
19 except (ImportError, NotImplementedError):
20 print ("This tool requires the BeautifulSoup package "
21 "(see http://www.crummy.com/software/BeautifulSoup/).\n"
22 "Make sure that the file BeautifulSoup.py is either in this directory "
23 "or is available in your PYTHON_PATH")
24 raise
27 class HTMLFixer(object):
28 '''This class cleans up the html strings as produced by Doxygen
29 '''
31 def __init__(self, html):
32 self.soup = BeautifulSoup(html)
34 def FixTableHeadings(self):
35 '''Fixes the doxygen table headings.
37 This includes:
38 - Using bare <h2> title row instead of row embedded in <tr><td> in table
39 - Putting the "name" attribute into the "id" attribute of the <tr> tag.
40 - Splitting up tables into multiple separate tables if a table
41 heading appears in the middle of a table.
43 For example, this html:
44 <table>
45 <tr><td colspan="2"><h2><a name="pub-attribs"></a>
46 Data Fields List</h2></td></tr>
47 ...
48 </table>
50 would be converted to this:
51 <h2>Data Fields List</h2>
52 <table>
53 ...
54 </table>
55 '''
57 table_headers = []
58 for tag in self.soup.findAll('tr'):
59 if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']:
60 #tag['id'] = tag.td.h2.a['name']
61 tag.string = tag.td.h2.a.next
62 tag.name = 'h2'
63 table_headers.append(tag)
65 # reverse the list so that earlier tags don't delete later tags
66 table_headers.reverse()
67 # Split up tables that have multiple table header (th) rows
68 for tag in table_headers:
69 print "Header tag: %s is %s" % (tag.name, tag.string.strip())
70 # Is this a heading in the middle of a table?
71 if tag.findPreviousSibling('tr') and tag.parent.name == 'table':
72 print "Splitting Table named %s" % tag.string.strip()
73 table = tag.parent
74 table_parent = table.parent
75 table_index = table_parent.contents.index(table)
76 new_table = Tag(self.soup, name='table', attrs=table.attrs)
77 table_parent.insert(table_index + 1, new_table)
78 tag_index = table.contents.index(tag)
79 for index, row in enumerate(table.contents[tag_index:]):
80 new_table.insert(index, row)
81 # Now move the <h2> tag to be in front of the <table> tag
82 assert tag.parent.name == 'table'
83 table = tag.parent
84 table_parent = table.parent
85 table_index = table_parent.contents.index(table)
86 table_parent.insert(table_index, tag)
88 def RemoveTopHeadings(self):
89 '''Removes <div> sections with a header, tabs, or navpath class attribute'''
90 header_tags = self.soup.findAll(
91 name='div',
92 attrs={'class' : re.compile('^(header|tabs[0-9]*|navpath)$')})
93 [tag.extract() for tag in header_tags]
95 def FixAll(self):
96 self.FixTableHeadings()
97 self.RemoveTopHeadings()
99 def __str__(self):
100 return str(self.soup)
103 def main():
104 '''Main entry for the doxy_cleanup utility
106 doxy_cleanup takes a list of html files and modifies them in place.'''
108 parser = optparse.OptionParser(usage='Usage: %prog [options] files...')
110 parser.add_option('-m', '--move', dest='move', action='store_true',
111 default=False, help='move html files to "original_html"')
113 options, files = parser.parse_args()
115 if not files:
116 parser.print_usage()
117 return 1
119 for filename in files:
120 try:
121 with open(filename, 'r') as file:
122 html = file.read()
124 print "Processing %s" % filename
125 fixer = HTMLFixer(html)
126 fixer.FixAll()
127 with open(filename, 'w') as file:
128 file.write(str(fixer))
129 if options.move:
130 new_directory = os.path.join(
131 os.path.dirname(os.path.dirname(filename)), 'original_html')
132 if not os.path.exists(new_directory):
133 os.mkdir(new_directory)
134 shutil.move(filename, new_directory)
135 except:
136 print "Error while processing %s" % filename
137 raise
139 return 0
141 if __name__ == '__main__':
142 sys.exit(main())