ppapi/cpp/documentation/doxy_cleanup.py

   1 #!/usr/bin/env python
   2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
   3 # Use of this source code is governed by a BSD-style license that can be
   4 # found in the LICENSE file.
   5
   6 '''This utility cleans up the html files as emitted by doxygen so
   7 that they are suitable for publication on a Google documentation site.
   8 '''
   9
  10 import optparse
  11 import os
  12 import re
  13 import shutil
  14 import string
  15 import sys
  16 try:
  17   from BeautifulSoup import BeautifulSoup, Tag
  18 except (ImportError, NotImplementedError):
  19   print ("This tool requires the BeautifulSoup package "
  20          "(see http://www.crummy.com/software/BeautifulSoup/).\n"
  21          "Make sure that the file BeautifulSoup.py is either in this directory "
  22          "or is available in your PYTHON_PATH")
  23   raise
  24
  25
  26 class HTMLFixer(object):
  27   '''This class cleans up the html strings as produced by Doxygen
  28   '''
  29
  30   def __init__(self, html):
  31     self.soup = BeautifulSoup(html)
  32
  33   def FixTableHeadings(self):
  34     '''Fixes the doxygen table headings.
  35
  36     This includes:
  37       - Using bare <h2> title row instead of row embedded in <tr><td> in table
  38       - Putting the "name" attribute into the "id" attribute of the <tr> tag.
  39       - Splitting up tables into multiple separate tables if a table
  40         heading appears in the middle of a table.
  41
  42     For example, this html:
  43      <table>
  44       <tr><td colspan="2"><h2><a name="pub-attribs"></a>
  45       Data Fields List</h2></td></tr>
  46       ...
  47      </table>
  48
  49     would be converted to this:
  50      <h2>Data Fields List</h2>
  51      <table>
  52       ...
  53      </table>
  54     '''
  55
  56     table_headers = []
  57     for tag in self.soup.findAll('tr'):
  58       if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']:
  59         #tag['id'] = tag.td.h2.a['name']
  60         tag.string = tag.td.h2.a.next
  61         tag.name = 'h2'
  62         table_headers.append(tag)
  63
  64     # reverse the list so that earlier tags don't delete later tags
  65     table_headers.reverse()
  66     # Split up tables that have multiple table header (th) rows
  67     for tag in table_headers:
  68       print "Header tag: %s is %s" % (tag.name, tag.string.strip())
  69       # Is this a heading in the middle of a table?
  70       if tag.findPreviousSibling('tr') and tag.parent.name == 'table':
  71         print "Splitting Table named %s" % tag.string.strip()
  72         table = tag.parent
  73         table_parent = table.parent
  74         table_index = table_parent.contents.index(table)
  75         new_table = Tag(self.soup, name='table', attrs=table.attrs)
  76         table_parent.insert(table_index + 1, new_table)
  77         tag_index = table.contents.index(tag)
  78         for index, row in enumerate(table.contents[tag_index:]):
  79           new_table.insert(index, row)
  80       # Now move the <h2> tag to be in front of the <table> tag
  81       assert tag.parent.name == 'table'
  82       table = tag.parent
  83       table_parent = table.parent
  84       table_index = table_parent.contents.index(table)
  85       table_parent.insert(table_index, tag)
  86
  87   def RemoveTopHeadings(self):
  88     '''Removes <div> sections with a header, tabs, or navpath class attribute'''
  89     header_tags = self.soup.findAll(
  90         name='div',
  91         attrs={'class' : re.compile('^(header|tabs[0-9]*|navpath)$')})
  92     [tag.extract() for tag in header_tags]
  93
  94   def FixAll(self):
  95     self.FixTableHeadings()
  96     self.RemoveTopHeadings()
  97
  98   def __str__(self):
  99     return str(self.soup)
 100
 101
 102 def main():
 103   '''Main entry for the doxy_cleanup utility
 104
 105   doxy_cleanup takes a list of html files and modifies them in place.'''
 106
 107   parser = optparse.OptionParser(usage='Usage: %prog [options] files...')
 108
 109   parser.add_option('-m', '--move', dest='move', action='store_true',
 110                     default=False, help='move html files to "original_html"')
 111
 112   options, files = parser.parse_args()
 113
 114   if not files:
 115     parser.print_usage()
 116     return 1
 117
 118   for filename in files:
 119     try:
 120       with open(filename, 'r') as file:
 121         html = file.read()
 122
 123       print "Processing %s" % filename
 124       fixer = HTMLFixer(html)
 125       fixer.FixAll()
 126       with open(filename, 'w') as file:
 127         file.write(str(fixer))
 128       if options.move:
 129         new_directory = os.path.join(
 130             os.path.dirname(os.path.dirname(filename)), 'original_html')
 131         if not os.path.exists(new_directory):
 132           os.mkdir(new_directory)
 133         shutil.move(filename, new_directory)
 134     except:
 135       print "Error while processing %s" % filename
 136       raise
 137
 138   return 0
 139
 140
 141 if __name__ == '__main__':
 142   sys.exit(main())