Updating trunk VERSION from 2139.0 to 2140.0
[chromium-blink-merge.git] / ppapi / cpp / documentation / doxy_cleanup.py
blob24ddb3c306cb6ee2f9f760076b47daec859b2c31
1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 '''This utility cleans up the html files as emitted by doxygen so
7 that they are suitable for publication on a Google documentation site.
8 '''
10 import optparse
11 import os
12 import re
13 import shutil
14 import string
15 import sys
16 try:
17 from BeautifulSoup import BeautifulSoup, Tag
18 except (ImportError, NotImplementedError):
19 print ("This tool requires the BeautifulSoup package "
20 "(see http://www.crummy.com/software/BeautifulSoup/).\n"
21 "Make sure that the file BeautifulSoup.py is either in this directory "
22 "or is available in your PYTHON_PATH")
23 raise
26 class HTMLFixer(object):
27 '''This class cleans up the html strings as produced by Doxygen
28 '''
30 def __init__(self, html):
31 self.soup = BeautifulSoup(html)
33 def FixTableHeadings(self):
34 '''Fixes the doxygen table headings.
36 This includes:
37 - Using bare <h2> title row instead of row embedded in <tr><td> in table
38 - Putting the "name" attribute into the "id" attribute of the <tr> tag.
39 - Splitting up tables into multiple separate tables if a table
40 heading appears in the middle of a table.
42 For example, this html:
43 <table>
44 <tr><td colspan="2"><h2><a name="pub-attribs"></a>
45 Data Fields List</h2></td></tr>
46 ...
47 </table>
49 would be converted to this:
50 <h2>Data Fields List</h2>
51 <table>
52 ...
53 </table>
54 '''
56 table_headers = []
57 for tag in self.soup.findAll('tr'):
58 if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']:
59 #tag['id'] = tag.td.h2.a['name']
60 tag.string = tag.td.h2.a.next
61 tag.name = 'h2'
62 table_headers.append(tag)
64 # reverse the list so that earlier tags don't delete later tags
65 table_headers.reverse()
66 # Split up tables that have multiple table header (th) rows
67 for tag in table_headers:
68 print "Header tag: %s is %s" % (tag.name, tag.string.strip())
69 # Is this a heading in the middle of a table?
70 if tag.findPreviousSibling('tr') and tag.parent.name == 'table':
71 print "Splitting Table named %s" % tag.string.strip()
72 table = tag.parent
73 table_parent = table.parent
74 table_index = table_parent.contents.index(table)
75 new_table = Tag(self.soup, name='table', attrs=table.attrs)
76 table_parent.insert(table_index + 1, new_table)
77 tag_index = table.contents.index(tag)
78 for index, row in enumerate(table.contents[tag_index:]):
79 new_table.insert(index, row)
80 # Now move the <h2> tag to be in front of the <table> tag
81 assert tag.parent.name == 'table'
82 table = tag.parent
83 table_parent = table.parent
84 table_index = table_parent.contents.index(table)
85 table_parent.insert(table_index, tag)
87 def RemoveTopHeadings(self):
88 '''Removes <div> sections with a header, tabs, or navpath class attribute'''
89 header_tags = self.soup.findAll(
90 name='div',
91 attrs={'class' : re.compile('^(header|tabs[0-9]*|navpath)$')})
92 [tag.extract() for tag in header_tags]
94 def FixAll(self):
95 self.FixTableHeadings()
96 self.RemoveTopHeadings()
98 def __str__(self):
99 return str(self.soup)
102 def main():
103 '''Main entry for the doxy_cleanup utility
105 doxy_cleanup takes a list of html files and modifies them in place.'''
107 parser = optparse.OptionParser(usage='Usage: %prog [options] files...')
109 parser.add_option('-m', '--move', dest='move', action='store_true',
110 default=False, help='move html files to "original_html"')
112 options, files = parser.parse_args()
114 if not files:
115 parser.print_usage()
116 return 1
118 for filename in files:
119 try:
120 with open(filename, 'r') as file:
121 html = file.read()
123 print "Processing %s" % filename
124 fixer = HTMLFixer(html)
125 fixer.FixAll()
126 with open(filename, 'w') as file:
127 file.write(str(fixer))
128 if options.move:
129 new_directory = os.path.join(
130 os.path.dirname(os.path.dirname(filename)), 'original_html')
131 if not os.path.exists(new_directory):
132 os.mkdir(new_directory)
133 shutil.move(filename, new_directory)
134 except:
135 print "Error while processing %s" % filename
136 raise
138 return 0
141 if __name__ == '__main__':
142 sys.exit(main())