3 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
4 # Use of this source code is governed by a BSD-style license that can be
5 # found in the LICENSE file.
7 '''This utility cleans up the html files as emitted by doxygen so
8 that they are suitable for publication on a Google documentation site.
18 from BeautifulSoup
import BeautifulSoup
, Tag
19 except (ImportError, NotImplementedError):
20 print ("This tool requires the BeautifulSoup package "
21 "(see http://www.crummy.com/software/BeautifulSoup/).\n"
22 "Make sure that the file BeautifulSoup.py is either in this directory "
23 "or is available in your PYTHON_PATH")
27 class HTMLFixer(object):
28 '''This class cleans up the html strings as produced by Doxygen
31 def __init__(self
, html
):
32 self
.soup
= BeautifulSoup(html
)
34 def FixTableHeadings(self
):
35 '''Fixes the doxygen table headings.
38 - Using bare <h2> title row instead of row embedded in <tr><td> in table
39 - Putting the "name" attribute into the "id" attribute of the <tr> tag.
40 - Splitting up tables into multiple separate tables if a table
41 heading appears in the middle of a table.
43 For example, this html:
45 <tr><td colspan="2"><h2><a name="pub-attribs"></a>
46 Data Fields List</h2></td></tr>
50 would be converted to this:
51 <h2>Data Fields List</h2>
58 for tag
in self
.soup
.findAll('tr'):
59 if tag
.td
and tag
.td
.h2
and tag
.td
.h2
.a
and tag
.td
.h2
.a
['name']:
60 #tag['id'] = tag.td.h2.a['name']
61 tag
.string
= tag
.td
.h2
.a
.next
63 table_headers
.append(tag
)
65 # reverse the list so that earlier tags don't delete later tags
66 table_headers
.reverse()
67 # Split up tables that have multiple table header (th) rows
68 for tag
in table_headers
:
69 print "Header tag: %s is %s" % (tag
.name
, tag
.string
.strip())
70 # Is this a heading in the middle of a table?
71 if tag
.findPreviousSibling('tr') and tag
.parent
.name
== 'table':
72 print "Splitting Table named %s" % tag
.string
.strip()
74 table_parent
= table
.parent
75 table_index
= table_parent
.contents
.index(table
)
76 new_table
= Tag(self
.soup
, name
='table', attrs
=table
.attrs
)
77 table_parent
.insert(table_index
+ 1, new_table
)
78 tag_index
= table
.contents
.index(tag
)
79 for index
, row
in enumerate(table
.contents
[tag_index
:]):
80 new_table
.insert(index
, row
)
81 # Now move the <h2> tag to be in front of the <table> tag
82 assert tag
.parent
.name
== 'table'
84 table_parent
= table
.parent
85 table_index
= table_parent
.contents
.index(table
)
86 table_parent
.insert(table_index
, tag
)
88 def RemoveTopHeadings(self
):
89 '''Removes <div> sections with a header, tabs, or navpath class attribute'''
90 header_tags
= self
.soup
.findAll(
92 attrs
={'class' : re
.compile('^(header|tabs[0-9]*|navpath)$')})
93 [tag
.extract() for tag
in header_tags
]
96 self
.FixTableHeadings()
97 self
.RemoveTopHeadings()
100 return str(self
.soup
)
104 '''Main entry for the doxy_cleanup utility
106 doxy_cleanup takes a list of html files and modifies them in place.'''
108 parser
= optparse
.OptionParser(usage
='Usage: %prog [options] files...')
110 parser
.add_option('-m', '--move', dest
='move', action
='store_true',
111 default
=False, help='move html files to "original_html"')
113 options
, files
= parser
.parse_args()
119 for filename
in files
:
121 with
open(filename
, 'r') as file:
124 print "Processing %s" % filename
125 fixer
= HTMLFixer(html
)
127 with
open(filename
, 'w') as file:
128 file.write(str(fixer
))
130 new_directory
= os
.path
.join(
131 os
.path
.dirname(os
.path
.dirname(filename
)), 'original_html')
132 if not os
.path
.exists(new_directory
):
133 os
.mkdir(new_directory
)
134 shutil
.move(filename
, new_directory
)
136 print "Error while processing %s" % filename
141 if __name__
== '__main__':