2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 '''This utility cleans up the html files as emitted by doxygen so
7 that they are suitable for publication on a Google documentation site.
17 from BeautifulSoup
import BeautifulSoup
, Tag
18 except (ImportError, NotImplementedError):
19 print ("This tool requires the BeautifulSoup package "
20 "(see http://www.crummy.com/software/BeautifulSoup/).\n"
21 "Make sure that the file BeautifulSoup.py is either in this directory "
22 "or is available in your PYTHON_PATH")
26 class HTMLFixer(object):
27 '''This class cleans up the html strings as produced by Doxygen
30 def __init__(self
, html
):
31 self
.soup
= BeautifulSoup(html
)
33 def FixTableHeadings(self
):
34 '''Fixes the doxygen table headings.
37 - Using bare <h2> title row instead of row embedded in <tr><td> in table
38 - Putting the "name" attribute into the "id" attribute of the <tr> tag.
39 - Splitting up tables into multiple separate tables if a table
40 heading appears in the middle of a table.
42 For example, this html:
44 <tr><td colspan="2"><h2><a name="pub-attribs"></a>
45 Data Fields List</h2></td></tr>
49 would be converted to this:
50 <h2>Data Fields List</h2>
57 for tag
in self
.soup
.findAll('tr'):
58 if tag
.td
and tag
.td
.h2
and tag
.td
.h2
.a
and tag
.td
.h2
.a
['name']:
59 #tag['id'] = tag.td.h2.a['name']
60 tag
.string
= tag
.td
.h2
.a
.next
62 table_headers
.append(tag
)
64 # reverse the list so that earlier tags don't delete later tags
65 table_headers
.reverse()
66 # Split up tables that have multiple table header (th) rows
67 for tag
in table_headers
:
68 print "Header tag: %s is %s" % (tag
.name
, tag
.string
.strip())
69 # Is this a heading in the middle of a table?
70 if tag
.findPreviousSibling('tr') and tag
.parent
.name
== 'table':
71 print "Splitting Table named %s" % tag
.string
.strip()
73 table_parent
= table
.parent
74 table_index
= table_parent
.contents
.index(table
)
75 new_table
= Tag(self
.soup
, name
='table', attrs
=table
.attrs
)
76 table_parent
.insert(table_index
+ 1, new_table
)
77 tag_index
= table
.contents
.index(tag
)
78 for index
, row
in enumerate(table
.contents
[tag_index
:]):
79 new_table
.insert(index
, row
)
80 # Now move the <h2> tag to be in front of the <table> tag
81 assert tag
.parent
.name
== 'table'
83 table_parent
= table
.parent
84 table_index
= table_parent
.contents
.index(table
)
85 table_parent
.insert(table_index
, tag
)
87 def RemoveTopHeadings(self
):
88 '''Removes <div> sections with a header, tabs, or navpath class attribute'''
89 header_tags
= self
.soup
.findAll(
91 attrs
={'class' : re
.compile('^(header|tabs[0-9]*|navpath)$')})
92 [tag
.extract() for tag
in header_tags
]
95 self
.FixTableHeadings()
96 self
.RemoveTopHeadings()
103 '''Main entry for the doxy_cleanup utility
105 doxy_cleanup takes a list of html files and modifies them in place.'''
107 parser
= optparse
.OptionParser(usage
='Usage: %prog [options] files...')
109 parser
.add_option('-m', '--move', dest
='move', action
='store_true',
110 default
=False, help='move html files to "original_html"')
112 options
, files
= parser
.parse_args()
118 for filename
in files
:
120 with
open(filename
, 'r') as file:
123 print "Processing %s" % filename
124 fixer
= HTMLFixer(html
)
126 with
open(filename
, 'w') as file:
127 file.write(str(fixer
))
129 new_directory
= os
.path
.join(
130 os
.path
.dirname(os
.path
.dirname(filename
)), 'original_html')
131 if not os
.path
.exists(new_directory
):
132 os
.mkdir(new_directory
)
133 shutil
.move(filename
, new_directory
)
135 print "Error while processing %s" % filename
141 if __name__
== '__main__':