native_client_sdk/src/doc/doxygen/doxy_cleanup.py

   1 #!/usr/bin/env python
   2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
   3 # Use of this source code is governed by a BSD-style license that can be
   4 # found in the LICENSE file.
   5
   6 '''This utility cleans up the html files as emitted by doxygen so
   7 that they are suitable for publication on a Google documentation site.
   8 '''
   9
  10 import argparse
  11 import glob
  12 import os
  13 import re
  14 import shutil
  15 import sys
  16
  17 try:
  18   from BeautifulSoup import BeautifulSoup, Tag
  19 except (ImportError, NotImplementedError):
  20   print ("This tool requires the BeautifulSoup package "
  21          "(see http://www.crummy.com/software/BeautifulSoup/).\n"
  22          "Make sure that the file BeautifulSoup.py is either in this directory "
  23          "or is available in your PYTHON_PATH")
  24   raise
  25
  26
  27 def Trace(msg):
  28   if Trace.verbose:
  29     sys.stderr.write(str(msg) + '\n')
  30
  31 Trace.verbose = False
  32
  33
  34 FILES_TO_REMOVE = [
  35   '*.css',
  36   '*.map',
  37   '*.md5',
  38   'annotated.html',
  39   'bc_s.png',
  40   'classes.html',
  41   'closed.png',
  42   'doxygen.png',
  43   'files.html',
  44   'functions*.html',
  45   'globals_0x*.html',
  46   'globals_enum.html',
  47   'globals_eval.html',
  48   'globals_func.html',
  49   'globals.html',
  50   'globals_type.html',
  51   'globals_vars.html',
  52   'graph_legend.html',
  53   'graph_legend.png',
  54   'hierarchy.html',
  55   'index_8dox.html',
  56   'index.html',
  57   'modules.html',
  58   'namespacemembers_func.html',
  59   'namespacemembers.html',
  60   'namespaces.html',
  61   'nav_f.png',
  62   'nav_h.png',
  63   'open.png',
  64   'tab_a.png',
  65   'tab_b.png',
  66   'tab_h.png',
  67   'tab_s.png',
  68 ]
  69
  70
  71 class HTMLFixer(object):
  72   '''This class cleans up the html strings as produced by Doxygen
  73   '''
  74
  75   def __init__(self, html):
  76     self.soup = BeautifulSoup(html)
  77
  78   def FixTableHeadings(self):
  79     '''Fixes the doxygen table headings.
  80
  81     This includes:
  82       - Using bare <h2> title row instead of row embedded in <tr><td> in table
  83       - Putting the "name" attribute into the "id" attribute of the <tr> tag.
  84       - Splitting up tables into multiple separate tables if a table
  85         heading appears in the middle of a table.
  86
  87     For example, this html:
  88      <table>
  89       <tr><td colspan="2"><h2><a name="pub-attribs"></a>
  90       Data Fields List</h2></td></tr>
  91       ...
  92      </table>
  93
  94     would be converted to this:
  95      <h2>Data Fields List</h2>
  96      <table>
  97       ...
  98      </table>
  99     '''
 100
 101     table_headers = []
 102     for tag in self.soup.findAll('tr'):
 103       if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']:
 104         #tag['id'] = tag.td.h2.a['name']
 105         tag.string = tag.td.h2.a.next
 106         tag.name = 'h2'
 107         table_headers.append(tag)
 108
 109     # reverse the list so that earlier tags don't delete later tags
 110     table_headers.reverse()
 111     # Split up tables that have multiple table header (th) rows
 112     for tag in table_headers:
 113       Trace("Header tag: %s is %s" % (tag.name, tag.string.strip()))
 114       # Is this a heading in the middle of a table?
 115       if tag.findPreviousSibling('tr') and tag.parent.name == 'table':
 116         Trace("Splitting Table named %s" % tag.string.strip())
 117         table = tag.parent
 118         table_parent = table.parent
 119         table_index = table_parent.contents.index(table)
 120         new_table = Tag(self.soup, name='table', attrs=table.attrs)
 121         table_parent.insert(table_index + 1, new_table)
 122         tag_index = table.contents.index(tag)
 123         for index, row in enumerate(table.contents[tag_index:]):
 124           new_table.insert(index, row)
 125       # Now move the <h2> tag to be in front of the <table> tag
 126       assert tag.parent.name == 'table'
 127       table = tag.parent
 128       table_parent = table.parent
 129       table_index = table_parent.contents.index(table)
 130       table_parent.insert(table_index, tag)
 131
 132   def RemoveTopHeadings(self):
 133     '''Removes <div> sections with a header, tabs, or navpath class attribute'''
 134     header_tags = self.soup.findAll(
 135         name='div',
 136         attrs={'class' : re.compile('^(header|tabs[0-9]*|navpath)$')})
 137     [tag.extract() for tag in header_tags]
 138
 139   def RemoveVersionNumbers(self, html):
 140     '''Horrible hack to strip _#_# from struct names.'''
 141     return re.sub(r'(_\d_\d)(?=[": <])', '', html)
 142
 143   def FixAll(self):
 144     self.FixTableHeadings()
 145     self.RemoveTopHeadings()
 146     html = str(self.soup)
 147     html = self.RemoveVersionNumbers(html)
 148     return html
 149
 150
 151 def main(args):
 152   """Main entry for the doxy_cleanup utility
 153
 154   doxy_cleanup cleans up the html files generated by doxygen.
 155   """
 156
 157   parser = argparse.ArgumentParser(description=__doc__)
 158   parser.add_argument('-v', '--verbose', help='verbose output.',
 159                       action='store_true')
 160   parser.add_argument('directory')
 161
 162   options = parser.parse_args(args)
 163
 164   if options.verbose:
 165     Trace.verbose = True
 166
 167   root_dir = options.directory
 168   html_dir = os.path.join(root_dir, 'html')
 169
 170   # Doxygen puts all files in an 'html' directory.
 171   # First, move all files from that directory to root_dir.
 172   for filename in glob.glob(os.path.join(html_dir, '*')):
 173     Trace('Moving %s -> %s' % (filename, root_dir))
 174     shutil.move(filename, root_dir)
 175
 176   # Now remove the 'html' directory.
 177   Trace('Removing %s' % html_dir)
 178   os.rmdir(html_dir)
 179
 180   # Then remove unneeded files.
 181   for wildcard in FILES_TO_REMOVE:
 182     Trace('Removing "%s":' % wildcard)
 183     path = os.path.join(root_dir, wildcard)
 184     for filename in glob.glob(path):
 185       Trace('  Removing "%s"' % filename)
 186       os.remove(filename)
 187
 188   # Now, fix the HTML files we've kept.
 189   Trace('Fixing HTML files...')
 190   for root, _, files in os.walk(root_dir):
 191     for filename in files:
 192       if not os.path.splitext(filename)[1] == '.html':
 193         Trace('Skipping %s' % filename)
 194         continue
 195
 196       filename = os.path.join(root, filename)
 197       Trace('Processing "%s"...' % filename)
 198       try:
 199         with open(filename) as f:
 200           html = f.read()
 201
 202         fixer = HTMLFixer(html)
 203         output = fixer.FixAll()
 204         with open(filename, 'w') as f:
 205           f.write(output)
 206       except:
 207         sys.stderr.write("Error while processing %s\n" % filename)
 208         raise
 209
 210   return 0
 211
 212 if __name__ == '__main__':
 213   try:
 214     rtn = main(sys.argv[1:])
 215   except KeyboardInterrupt:
 216     sys.stderr.write('%s: interrupted\n' % os.path.basename(__file__))
 217     rtn = 1
 218   sys.exit(rtn)