ppapi/c/documentation/doxy_cleanup.py

   1 #!/usr/bin/python
   2
   3 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
   4 # Use of this source code is governed by a BSD-style license that can be
   5 # found in the LICENSE file.
   6
   7 '''This utility cleans up the html files as emitted by doxygen so
   8 that they are suitable for publication on a Google documentation site.
   9 '''
  10
  11 import optparse
  12 import os
  13 import re
  14 import shutil
  15 import string
  16 import sys
  17 try:
  18   from BeautifulSoup import BeautifulSoup, Tag
  19 except (ImportError, NotImplementedError):
  20   print ("This tool requires the BeautifulSoup package "
  21          "(see http://www.crummy.com/software/BeautifulSoup/).\n"
  22          "Make sure that the file BeautifulSoup.py is either in this directory "
  23          "or is available in your PYTHON_PATH")
  24   raise
  25
  26
  27 class HTMLFixer(object):
  28   '''This class cleans up the html strings as produced by Doxygen
  29   '''
  30
  31   def __init__(self, html):
  32     self.soup = BeautifulSoup(html)
  33
  34   def FixTableHeadings(self):
  35     '''Fixes the doxygen table headings.
  36
  37     This includes:
  38       - Using bare <h2> title row instead of row embedded in <tr><td> in table
  39       - Putting the "name" attribute into the "id" attribute of the <tr> tag.
  40       - Splitting up tables into multiple separate tables if a table
  41         heading appears in the middle of a table.
  42
  43     For example, this html:
  44      <table>
  45       <tr><td colspan="2"><h2><a name="pub-attribs"></a>
  46       Data Fields List</h2></td></tr>
  47       ...
  48      </table>
  49
  50     would be converted to this:
  51      <h2>Data Fields List</h2>
  52      <table>
  53       ...
  54      </table>
  55     '''
  56
  57     table_headers = []
  58     for tag in self.soup.findAll('tr'):
  59       if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']:
  60         #tag['id'] = tag.td.h2.a['name']
  61         tag.string = tag.td.h2.a.next
  62         tag.name = 'h2'
  63         table_headers.append(tag)
  64
  65     # reverse the list so that earlier tags don't delete later tags
  66     table_headers.reverse()
  67     # Split up tables that have multiple table header (th) rows
  68     for tag in table_headers:
  69       print "Header tag: %s is %s" % (tag.name, tag.string.strip())
  70       # Is this a heading in the middle of a table?
  71       if tag.findPreviousSibling('tr') and tag.parent.name == 'table':
  72         print "Splitting Table named %s" % tag.string.strip()
  73         table = tag.parent
  74         table_parent = table.parent
  75         table_index = table_parent.contents.index(table)
  76         new_table = Tag(self.soup, name='table', attrs=table.attrs)
  77         table_parent.insert(table_index + 1, new_table)
  78         tag_index = table.contents.index(tag)
  79         for index, row in enumerate(table.contents[tag_index:]):
  80           new_table.insert(index, row)
  81       # Now move the <h2> tag to be in front of the <table> tag
  82       assert tag.parent.name == 'table'
  83       table = tag.parent
  84       table_parent = table.parent
  85       table_index = table_parent.contents.index(table)
  86       table_parent.insert(table_index, tag)
  87
  88   def RemoveTopHeadings(self):
  89     '''Removes <div> sections with a header, tabs, or navpath class attribute'''
  90     header_tags = self.soup.findAll(
  91         name='div',
  92         attrs={'class' : re.compile('^(header|tabs[0-9]*|navpath)$')})
  93     [tag.extract() for tag in header_tags]
  94
  95   def FixAll(self):
  96     self.FixTableHeadings()
  97     self.RemoveTopHeadings()
  98
  99   def __str__(self):
 100     return str(self.soup)
 101
 102
 103 def main():
 104   '''Main entry for the doxy_cleanup utility
 105
 106   doxy_cleanup takes a list of html files and modifies them in place.'''
 107
 108   parser = optparse.OptionParser(usage='Usage: %prog [options] files...')
 109
 110   parser.add_option('-m', '--move', dest='move', action='store_true',
 111                     default=False, help='move html files to "original_html"')
 112
 113   options, files = parser.parse_args()
 114
 115   if not files:
 116     parser.print_usage()
 117     return 1
 118
 119   for filename in files:
 120     try:
 121       with open(filename, 'r') as file:
 122         html = file.read()
 123
 124       print "Processing %s" % filename
 125       fixer = HTMLFixer(html)
 126       fixer.FixAll()
 127       with open(filename, 'w') as file:
 128         file.write(str(fixer))
 129       if options.move:
 130         new_directory = os.path.join(
 131             os.path.dirname(os.path.dirname(filename)), 'original_html')
 132         if not os.path.exists(new_directory):
 133           os.mkdir(new_directory)
 134         shutil.move(filename, new_directory)
 135     except:
 136       print "Error while processing %s" % filename
 137       raise
 138
 139   return 0
 140
 141 if __name__ == '__main__':
 142   sys.exit(main())