gtkdoc/mkhtml2.py

   1 #!/usr/bin/env python3
   2 # -*- python; coding: utf-8 -*-
   3 #
   4 # gtk-doc - GTK DocBook documentation generator.
   5 # Copyright (C) 2018  Stefan Sauer
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20 #
  21
  22 """Generate html from docbook
  23
  24 The tool loads the main xml document (<module>-docs.xml) and chunks it
  25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
  26 Each chunk is converted to html using python functions.
  27
  28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
  29 this tools will replace both without relying on external tools such as xsltproc
  30 and source-highlight.
  31
  32 Please note, that we're not aiming for complete docbook-xml support. All tags
  33 used in the generated xml are of course handled. More tags used in handwritten
  34 xml can be easilly supported, but for some combinations of tags we prefer
  35 simplicity.
  36
  37 TODO:
  38 - tag converters:
  39   - inside 'footnote' one can have many tags, we only handle 'para'/'simpara'
  40   - inside 'inlinemediaobject'/'mediaobject' a 'textobject' becomes the 'alt'
  41     attr on the <img> tag of the 'imageobject'
  42 - we're missing local anchors in refsect
  43   - we should create id attrs on the docbook xml, instead of injecting anchors
  44     in our xsl layer
  45 - check each docbook tag if it can contain #PCDATA, if not don't check for
  46   xml.text
  47 - consider some perf-warnings flag
  48   - see 'No "id" attribute on'
  49 - find a better way to print context for warnings
  50   - we use 'xml.sourceline', but this all does not help a lot due to xi:include
  51
  52 DIFFERENCES:
  53 - titles
  54   - we add the chunk label to both title in toc and tile on the page
  55   - docbook xsl only sometimes adds the label to the titles and when it does it
  56     adds name chunk type too (e.g. 'Part I.' instead of 'I.')
  57 - navigation
  58   - we always add an up-link except on the first page
  59
  60 OPTIONAL:
  61 - minify html: https://pypi.python.org/pypi/htmlmin/
  62
  63 Requirements:
  64 sudo pip3 install anytree lxml pygments
  65
  66 Example invocation:
  67 cd tests/bugs/docs/
  68 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
  69 xdg-open db2html/index.html
  70 meld html db2html
  71
  72 Benchmarking:
  73 cd tests/bugs/docs/;
  74 rm html-build.stamp; time make html-build.stamp
  75 """
  76
  77 import argparse
  78 import errno
  79 import logging
  80 import os
  81 import shutil
  82 import sys
  83
  84 from anytree import Node, PreOrderIter
  85 from copy import deepcopy
  86 from glob import glob
  87 from lxml import etree
  88 from pygments import highlight
  89 from pygments.lexers import CLexer
  90 from pygments.formatters import HtmlFormatter
  91 from timeit import default_timer as timer
  92
  93 from . import config, fixxref
  94
  95 # pygments setup
  96 # lazily constructed lexer cache
  97 LEXERS = {
  98     'c': CLexer()
  99 }
 100 HTML_FORMATTER = HtmlFormatter(nowrap=True)
 101
 102
 103 class ChunkParams(object):
 104     def __init__(self, prefix, parent=None, min_idx=0):
 105         self.prefix = prefix
 106         self.parent = parent
 107         self.min_idx = min_idx
 108         self.idx = 1
 109
 110
 111 DONT_CHUNK = float('inf')
 112 # docbook-xsl defines the chunk tags here.
 113 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
 114 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
 115 # If not defined, we can just create an example without an 'id' attr and see
 116 # docbook xsl does.
 117 #
 118 # For toc levels see http://www.sagehill.net/docbookxsl/TOCcontrol.html
 119 # TODO: this list has also a flag that controls wheter we add the
 120 # 'Table of Contents' heading in convert_chunk_with_toc()
 121 CHUNK_PARAMS = {
 122     'appendix': ChunkParams('app', 'book'),
 123     'book': ChunkParams('bk'),
 124     'chapter': ChunkParams('ch', 'book'),
 125     'glossary': ChunkParams('go', 'book'),
 126     'index': ChunkParams('ix', 'book'),
 127     'part': ChunkParams('pt', 'book'),
 128     'preface': ChunkParams('pr', 'book'),
 129     'refentry': ChunkParams('re', 'book'),
 130     'reference': ChunkParams('rn', 'book'),
 131     'sect1': ChunkParams('s', 'chapter', 1),
 132     'section': ChunkParams('s', 'chapter', 1),
 133     'sect2': ChunkParams('s', 'sect1', DONT_CHUNK),
 134     'sect3': ChunkParams('s', 'sect2', DONT_CHUNK),
 135     'sect4': ChunkParams('s', 'sect3', DONT_CHUNK),
 136     'sect5': ChunkParams('s', 'sect4', DONT_CHUNK),
 137 }
 138 # TAGS we don't support:
 139 # 'article', 'bibliography', 'colophon', 'set', 'setindex'
 140
 141 TITLE_XPATHS = {
 142     '_': (etree.XPath('./title'), None),
 143     'book': (etree.XPath('./bookinfo/title'), None),
 144     'refentry': (
 145         etree.XPath('./refmeta/refentrytitle'),
 146         etree.XPath('./refnamediv/refpurpose')
 147     ),
 148 }
 149
 150 ID_XPATH = etree.XPath('//*[@id]')
 151
 152 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
 153 glossary = {}
 154
 155 footnote_idx = 1
 156
 157 # nested dict with subkeys:
 158 # title: textual title
 159 # tag: chunk tag
 160 # xml: title xml node
 161 titles = {}
 162
 163
 164 def gen_chunk_name(node, chunk_params):
 165     """Generate a chunk file name
 166
 167     This is either based on the id or on the position in the doc. In the latter
 168     case it uses a prefix from CHUNK_PARAMS and a sequence number for each chunk
 169     type.
 170     """
 171     if 'id' in node.attrib:
 172         return node.attrib['id']
 173
 174     name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx))
 175     chunk_params.idx += 1
 176
 177     # handle parents to make names of nested tags like in docbook
 178     # - we only need to prepend the parent if there are > 1 of them in the
 179     #   xml. None, the parents we have are not sufficient, e.g. 'index' can
 180     #   be in 'book' or 'part' or ... Maybe we can track the chunk_parents
 181     #   when we chunk explicitly and on each level maintain the 'idx'
 182     # while chunk_params.parent:
 183     #     parent = chunk_params.parent
 184     #     if parent not in CHUNK_PARAMS:
 185     #         break;
 186     #     chunk_params = CHUNK_PARAMS[parent]
 187     #     name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx)) + name
 188
 189     logging.info('Gen chunk name: "%s"', name)
 190     return name
 191
 192
 193 def get_chunk_titles(module, node):
 194     tag = node.tag
 195     (title, subtitle) = TITLE_XPATHS.get(tag, TITLE_XPATHS['_'])
 196
 197     ctx = {
 198         'module': module,
 199         'files': [],
 200     }
 201     result = {
 202         'title': None,
 203         'title_tag': None,
 204         'subtitle': None,
 205         'subtitle_tag': None
 206     }
 207     res = title(node)
 208     if res:
 209         # handle chunk label for tocs
 210         label = node.attrib.get('label')
 211         if label:
 212             label += '. '
 213         else:
 214             label = ''
 215
 216         xml = res[0]
 217         result['title'] = label + ''.join(convert_title(ctx, xml))
 218         if xml.tag != 'title':
 219             result['title_tag'] = xml.tag
 220         else:
 221             result['title_tag'] = tag
 222
 223     if subtitle:
 224         res = subtitle(node)
 225         if res:
 226             xml = res[0]
 227             result['subtitle'] = ''.join(convert_title(ctx, xml))
 228             result['subtitle_tag'] = xml.tag
 229     return result
 230
 231
 232 def chunk(xml_node, module, depth=0, idx=0, parent=None):
 233     """Chunk the tree.
 234
 235     The first time, we're called with parent=None and in that case we return
 236     the new_node as the root of the tree. For each tree-node we generate a
 237     filename and process the children.
 238     """
 239     tag = xml_node.tag
 240     chunk_params = CHUNK_PARAMS.get(tag)
 241     if chunk_params:
 242         title_args = get_chunk_titles(module, xml_node)
 243         chunk_name = gen_chunk_name(xml_node, chunk_params)
 244
 245         # check idx to handle 'sect1'/'section' special casing and title-only
 246         # segments
 247         if idx >= chunk_params.min_idx:
 248             logging.info('chunk tag: "%s"[%d]', tag, idx)
 249             if parent:
 250                 # remove the xml-node from the parent
 251                 sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
 252                 xml_node.getparent().remove(xml_node)
 253                 xml_node = sub_tree
 254
 255             parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
 256                           idx=idx,
 257                           filename=chunk_name + '.html', anchor=None,
 258                           **title_args)
 259         else:
 260             parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
 261                           idx=idx,
 262                           filename=parent.filename, anchor='#' + chunk_name,
 263                           **title_args)
 264
 265         depth += 1
 266         idx = 0
 267         for child in xml_node:
 268             chunk(child, module, depth, idx, parent)
 269             if child.tag in CHUNK_PARAMS:
 270                 idx += 1
 271
 272     return parent
 273
 274
 275 def add_id_links_and_titles(files, links):
 276     for node in files:
 277         chunk_name = node.filename[:-5]
 278         chunk_base = node.filename + '#'
 279         for elem in ID_XPATH(node.xml):
 280             attr = elem.attrib['id']
 281             if attr == chunk_name:
 282                 links[attr] = node.filename
 283             else:
 284                 links[attr] = chunk_base + attr
 285
 286             title = TITLE_XPATHS.get(elem.tag, TITLE_XPATHS['_'])[0]
 287             res = title(elem)
 288             if res:
 289                 xml = res[0]
 290                 # TODO: consider to eval those lazily
 291                 titles[attr] = {
 292                     'title': etree.tostring(xml, method="text", encoding=str).strip(),
 293                     'xml': xml,
 294                     'tag': elem.tag,
 295                 }
 296
 297
 298 def build_glossary(files):
 299     for node in files:
 300         if node.xml.tag != 'glossary':
 301             continue
 302         for term in GLOSSENTRY_XPATH(node.xml):
 303             # TODO: there can be all kind of things in a glossary. This only supports
 304             # what we commonly use
 305             key = etree.tostring(term.find('glossterm'), method="text", encoding=str).strip()
 306             value = etree.tostring(term.find('glossdef'), method="text", encoding=str).strip()
 307             glossary[key] = value
 308             # logging.debug('glosentry: %s:%s', key, value)
 309
 310
 311 # conversion helpers
 312
 313
 314 def convert_inner(ctx, xml, result):
 315     for child in xml:
 316         result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 317
 318
 319 def convert_ignore(ctx, xml):
 320     result = []
 321     convert_inner(ctx, xml, result)
 322     return result
 323
 324
 325 def convert_skip(ctx, xml):
 326     return []
 327
 328
 329 def append_text(ctx, text, result):
 330     if text and ('no-strip' in ctx or text.strip()):
 331         result.append(text.replace('<', '&lt;').replace('>', '&gt;'))
 332
 333
 334 missing_tags = {}
 335
 336
 337 def convert__unknown(ctx, xml):
 338     # don't recurse on subchunks
 339     if xml.tag in CHUNK_PARAMS:
 340         return []
 341     if isinstance(xml, etree._Comment):
 342         return ['<!-- ' + xml.text + '-->\n']
 343     else:
 344         # warn only once
 345         if xml.tag not in missing_tags:
 346             logging.warning('Add tag converter for "%s"', xml.tag)
 347             missing_tags[xml.tag] = True
 348         result = ['<!-- ' + xml.tag + '-->\n']
 349         convert_inner(ctx, xml, result)
 350         result.append('<!-- /' + xml.tag + '-->\n')
 351         return result
 352
 353
 354 def convert_sect(ctx, xml, h_tag, inner_func=convert_inner):
 355     result = ['<div class="%s">\n' % xml.tag]
 356     title_tag = xml.find('title')
 357     if title_tag is not None:
 358         if 'id' in xml.attrib:
 359             result.append('<a name="%s"></a>' % xml.attrib['id'])
 360         result.append('<%s>%s</%s>' % (
 361             h_tag, ''.join(convert_title(ctx, title_tag)), h_tag))
 362     append_text(ctx, xml.text, result)
 363     inner_func(ctx, xml, result)
 364     result.append('</div>')
 365     append_text(ctx, xml.tail, result)
 366     return result
 367
 368
 369 def xml_get_title(ctx, xml):
 370     title_tag = xml.find('title')
 371     if title_tag is not None:
 372         return ''.join(convert_title(ctx, title_tag))
 373     else:
 374         logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
 375         return ''
 376
 377
 378 # docbook tags
 379
 380
 381 def convert_abstract(ctx, xml):
 382     result = ["""<div class="abstract">
 383     <p class="title"><b>Abstract</b></p>"""]
 384     append_text(ctx, xml.text, result)
 385     convert_inner(ctx, xml, result)
 386     result.append('</div>')
 387     append_text(ctx, xml.tail, result)
 388     return result
 389
 390
 391 def convert_acronym(ctx, xml):
 392     key = xml.text
 393     title = glossary.get(key, '')
 394     # TODO: print a sensible warning if missing
 395     result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
 396     if xml.tail:
 397         result.append(xml.tail)
 398     return result
 399
 400
 401 def convert_anchor(ctx, xml):
 402     return ['<a name="%s"></a>' % xml.attrib['id']]
 403
 404
 405 def convert_bookinfo(ctx, xml):
 406     result = ['<div class="titlepage">']
 407     convert_inner(ctx, xml, result)
 408     result.append("""<hr>
 409 </div>""")
 410     if xml.tail:
 411         result.append(xml.tail)
 412     return result
 413
 414
 415 def convert_blockquote(ctx, xml):
 416     result = ['<div class="blockquote">\n<blockquote class="blockquote">']
 417     append_text(ctx, xml.text, result)
 418     convert_inner(ctx, xml, result)
 419     result.append('</blockquote>\n</div>')
 420     append_text(ctx, xml.tail, result)
 421     return result
 422
 423
 424 def convert_code(ctx, xml):
 425     result = ['<code class="%s">' % xml.tag]
 426     append_text(ctx, xml.text, result)
 427     convert_inner(ctx, xml, result)
 428     result.append('</code>')
 429     append_text(ctx, xml.tail, result)
 430     return result
 431
 432
 433 def convert_colspec(ctx, xml):
 434     result = ['<col']
 435     a = xml.attrib
 436     if 'colname' in a:
 437         result.append(' class="%s"' % a['colname'])
 438     if 'colwidth' in a:
 439         result.append(' width="%s"' % a['colwidth'])
 440     result.append('>\n')
 441     # is in tgroup and there can be no 'text'
 442     return result
 443
 444
 445 def convert_command(ctx, xml):
 446     result = ['<strong class="userinput"><code>']
 447     append_text(ctx, xml.text, result)
 448     convert_inner(ctx, xml, result)
 449     result.append('</code></strong>')
 450     append_text(ctx, xml.tail, result)
 451     return result
 452
 453
 454 def convert_corpauthor(ctx, xml):
 455     result = ['<div><h3 class="corpauthor">\n']
 456     append_text(ctx, xml.text, result)
 457     convert_inner(ctx, xml, result)
 458     result.append('</h3></div>\n')
 459     append_text(ctx, xml.tail, result)
 460     return result
 461
 462
 463 def convert_div(ctx, xml):
 464     result = ['<div class="%s">\n' % xml.tag]
 465     append_text(ctx, xml.text, result)
 466     convert_inner(ctx, xml, result)
 467     result.append('</div>')
 468     append_text(ctx, xml.tail, result)
 469     return result
 470
 471
 472 def convert_emphasis(ctx, xml):
 473     if 'role' in xml.attrib:
 474         result = ['<span class="%s">' % xml.attrib['role']]
 475         end = '</span>'
 476     else:
 477         result = ['<span class="emphasis"><em>']
 478         end = '</em></span>'
 479     append_text(ctx, xml.text, result)
 480     convert_inner(ctx, xml, result)
 481     result.append(end)
 482     append_text(ctx, xml.tail, result)
 483     return result
 484
 485
 486 def convert_em_class(ctx, xml):
 487     result = ['<em class="%s"><code>' % xml.tag]
 488     append_text(ctx, xml.text, result)
 489     convert_inner(ctx, xml, result)
 490     result.append('</code></em>')
 491     append_text(ctx, xml.tail, result)
 492     return result
 493
 494
 495 def convert_entry(ctx, xml):
 496     entry_type = ctx['table.entry']
 497     result = ['<' + entry_type]
 498     if 'role' in xml.attrib:
 499         result.append(' class="%s"' % xml.attrib['role'])
 500     if 'morerows' in xml.attrib:
 501         result.append(' rowspan="%s"' % (1 + int(xml.attrib['morerows'])))
 502     result.append('>')
 503     append_text(ctx, xml.text, result)
 504     convert_inner(ctx, xml, result)
 505     result.append('</' + entry_type + '>')
 506     append_text(ctx, xml.tail, result)
 507     return result
 508
 509
 510 def convert_footnote(ctx, xml):
 511     footnotes = ctx.get('footnotes', [])
 512     # footnotes idx is not per page, but per doc
 513     global footnote_idx
 514     idx = footnote_idx
 515     footnote_idx += 1
 516
 517     # need a pair of ids for each footnote (docbook generates different ids)
 518     this_id = 'footnote-%d' % idx
 519     that_id = 'ftn.' + this_id
 520
 521     inner = ['<div id="%s" class="footnote">' % that_id]
 522     inner.append('<p><a href="#%s" class="para"><sup class="para">[%d] </sup></a>' % (
 523         this_id, idx))
 524     # TODO(ensonic): this can contain all kind of tags, if we convert them we'll
 525     # get double nested paras :/.
 526     # convert_inner(ctx, xml, inner)
 527     para = xml.find('para')
 528     if para is None:
 529         para = xml.find('simpara')
 530     if para is not None:
 531         inner.append(para.text)
 532     else:
 533         logging.warning('%s: Unhandled footnote content: %s', xml.sourceline,
 534                         etree.tostring(xml, method="text", encoding=str).strip())
 535     inner.append('</p></div>')
 536     footnotes.append(inner)
 537     ctx['footnotes'] = footnotes
 538     return ['<a href="#%s" class="footnote" name="%s"><sup class="footnote">[%s]</sup></a>' % (
 539         that_id, this_id, idx)]
 540
 541
 542 def convert_formalpara(ctx, xml):
 543     result = None
 544     title_tag = xml.find('title')
 545     result = ['<p><b>%s</b>' % ''.join(convert_title(ctx, title_tag))]
 546     para_tag = xml.find('para')
 547     append_text(para_tag.text, result)
 548     convert_inner(ctx, para_tag, result)
 549     append_text(para_tag.tail, result)
 550     result.append('</p>')
 551     append_text(ctx, xml.tail, result)
 552     return result
 553
 554
 555 def convert_glossdef(ctx, xml):
 556     result = ['<dd class="glossdef">']
 557     convert_inner(ctx, xml, result)
 558     result.append('</dd>\n')
 559     return result
 560
 561
 562 def convert_glossdiv(ctx, xml):
 563     title_tag = xml.find('title')
 564     title = title_tag.text
 565     xml.remove(title_tag)
 566     result = [
 567         '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
 568     ]
 569     convert_inner(ctx, xml, result)
 570     return result
 571
 572
 573 def convert_glossentry(ctx, xml):
 574     result = []
 575     convert_inner(ctx, xml, result)
 576     return result
 577
 578
 579 def convert_glossterm(ctx, xml):
 580     glossid = ''
 581     text = ''
 582     anchor = xml.find('anchor')
 583     if anchor is not None:
 584         glossid = anchor.attrib.get('id', '')
 585         text += anchor.tail or ''
 586     text += xml.text or ''
 587     if glossid == '':
 588         glossid = 'glossterm-' + text
 589     return [
 590         '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
 591             glossid, text)
 592     ]
 593
 594
 595 def convert_imageobject(ctx, xml):
 596     imagedata = xml.find('imagedata')
 597     if imagedata is not None:
 598         # TODO(ensonic): warn on missing fileref attr?
 599         return ['<img src="%s">' % imagedata.attrib.get('fileref', '')]
 600     else:
 601         return []
 602
 603
 604 def convert_indexdiv(ctx, xml):
 605     title_tag = xml.find('title')
 606     title = title_tag.text
 607     xml.remove(title_tag)
 608     result = [
 609         '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
 610     ]
 611     convert_inner(ctx, xml, result)
 612     return result
 613
 614
 615 def convert_informaltable(ctx, xml):
 616     result = ['<div class="informaltable"><table class="informaltable"']
 617     a = xml.attrib
 618     if 'pgwide' in a and a['pgwide'] == '1':
 619         result.append(' width="100%"')
 620     if 'frame' in a and a['frame'] == 'none':
 621         result.append(' border="0"')
 622     result.append('>\n')
 623     convert_inner(ctx, xml, result)
 624     result.append('</table></div>')
 625     if xml.tail:
 626         result.append(xml.tail)
 627     return result
 628
 629
 630 def convert_inlinegraphic(ctx, xml):
 631     # TODO(ensonic): warn on missing fileref attr?
 632     return ['<img src="%s">' % xml.attrib.get('fileref', '')]
 633
 634
 635 def convert_itemizedlist(ctx, xml):
 636     result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
 637     convert_inner(ctx, xml, result)
 638     result.append('</ul></div>')
 639     if xml.tail:
 640         result.append(xml.tail)
 641     return result
 642
 643
 644 def convert_link(ctx, xml):
 645     linkend = xml.attrib['linkend']
 646     result = []
 647     if linkend:
 648         link_text = []
 649         append_text(ctx, xml.text, link_text)
 650         convert_inner(ctx, xml, link_text)
 651         text = ''.join(link_text)
 652
 653         (tid, href) = fixxref.GetXRef(linkend)
 654         if href:
 655             title_attr = ''
 656             title = titles.get(tid)
 657             if title:
 658                 title_attr = ' title="%s"' % title['title']
 659
 660             href = fixxref.MakeRelativeXRef(ctx['module'], href)
 661             result = ['<a href="%s"%s>%s</a>' % (href, title_attr, text)]
 662         else:
 663             # TODO: filename is for the output and xml.sourceline is on the masterdoc ...
 664             fixxref.ReportBadXRef(ctx['node'].filename, 0, linkend, text)
 665             result = [text]
 666     else:
 667         append_text(ctx, xml.text, result)
 668         convert_inner(ctx, xml, result)
 669     append_text(ctx, xml.tail, result)
 670     return result
 671
 672
 673 def convert_listitem(ctx, xml):
 674     result = ['<li class="listitem">']
 675     convert_inner(ctx, xml, result)
 676     result.append('</li>')
 677     # is in itemizedlist and there can be no 'text'
 678     return result
 679
 680
 681 def convert_literallayout(ctx, xml):
 682     result = ['<div class="literallayout"><p><br>\n']
 683     append_text(ctx, xml.text, result)
 684     convert_inner(ctx, xml, result)
 685     result.append('</p></div>')
 686     append_text(ctx, xml.tail, result)
 687     return result
 688
 689
 690 def convert_orderedlist(ctx, xml):
 691     result = ['<div class="orderedlist"><ol class="orderedlist" type="1">']
 692     convert_inner(ctx, xml, result)
 693     result.append('</ol></div>')
 694     append_text(ctx, xml.tail, result)
 695     return result
 696
 697
 698 def convert_para(ctx, xml):
 699     result = []
 700     if 'role' in xml.attrib:
 701         result.append('<p class="%s">' % xml.attrib['role'])
 702     else:
 703         result.append('<p>')
 704     if 'id' in xml.attrib:
 705         result.append('<a name="%s"></a>' % xml.attrib['id'])
 706     append_text(ctx, xml.text, result)
 707     convert_inner(ctx, xml, result)
 708     result.append('</p>')
 709     append_text(ctx, xml.tail, result)
 710     return result
 711
 712
 713 def convert_para_like(ctx, xml):
 714     result = []
 715     if 'id' in xml.attrib:
 716         result.append('<a name="%s"></a>' % xml.attrib['id'])
 717     result.append('<p class="%s">' % xml.tag)
 718     append_text(ctx, xml.text, result)
 719     convert_inner(ctx, xml, result)
 720     result.append('</p>')
 721     append_text(ctx, xml.tail, result)
 722     return result
 723
 724
 725 def convert_phrase(ctx, xml):
 726     result = ['<span']
 727     if 'role' in xml.attrib:
 728         result.append(' class="%s">' % xml.attrib['role'])
 729     else:
 730         result.append('>')
 731     append_text(ctx, xml.text, result)
 732     convert_inner(ctx, xml, result)
 733     result.append('</span>')
 734     append_text(ctx, xml.tail, result)
 735     return result
 736
 737
 738 def convert_primaryie(ctx, xml):
 739     result = ['<dt>\n']
 740     convert_inner(ctx, xml, result)
 741     result.append('\n</dt>\n<dd></dd>\n')
 742     return result
 743
 744
 745 def convert_pre(ctx, xml):
 746     # Since we're inside <pre> don't skip newlines
 747     ctx['no-strip'] = True
 748     result = ['<pre class="%s">' % xml.tag]
 749     append_text(ctx, xml.text, result)
 750     convert_inner(ctx, xml, result)
 751     result.append('</pre>')
 752     del ctx['no-strip']
 753     append_text(ctx, xml.tail, result)
 754     return result
 755
 756
 757 def convert_programlisting(ctx, xml):
 758     result = []
 759     if xml.attrib.get('role', '') == 'example':
 760         if xml.text:
 761             lang = xml.attrib.get('language', 'c').lower()
 762             if lang not in LEXERS:
 763                 LEXERS[lang] = get_lexer_by_name(lang)
 764             lexer = LEXERS.get(lang, None)
 765             if lexer:
 766                 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
 767
 768                 # we do own line-numbering
 769                 line_count = highlighted.count('\n')
 770                 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
 771                 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
 772   <tbody>
 773     <tr>
 774       <td class="listing_lines" align="right"><pre>%s</pre></td>
 775       <td class="listing_code"><pre class="programlisting">%s</pre></td>
 776     </tr>
 777   </tbody>
 778 </table>
 779 """ % (source_lines, highlighted))
 780             else:
 781                 logging.warn('No pygments lexer for language="%s"', lang)
 782                 result.append('<pre class="programlisting">')
 783                 result.append(xml.text)
 784                 result.append('</pre>')
 785     else:
 786         result.append('<pre class="programlisting">')
 787         append_text(ctx, xml.text, result)
 788         convert_inner(ctx, xml, result)
 789         result.append('</pre>')
 790     append_text(ctx, xml.tail, result)
 791     return result
 792
 793
 794 def convert_quote(ctx, xml):
 795     result = ['<span class="quote">"<span class="quote">']
 796     append_text(ctx, xml.text, result)
 797     convert_inner(ctx, xml, result)
 798     result.append('</span>"</span>')
 799     append_text(ctx, xml.tail, result)
 800     return result
 801
 802
 803 def convert_refsect1(ctx, xml):
 804     # Add a divider between two consequitive refsect2
 805     def convert_inner(ctx, xml, result):
 806         prev = None
 807         for child in xml:
 808             if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
 809                 result.append('<hr>\n')
 810             result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 811             prev = child
 812     return convert_sect(ctx, xml, 'h2', convert_inner)
 813
 814
 815 def convert_refsect2(ctx, xml):
 816     return convert_sect(ctx, xml, 'h3')
 817
 818
 819 def convert_refsect3(ctx, xml):
 820     return convert_sect(ctx, xml, 'h4')
 821
 822
 823 def convert_row(ctx, xml):
 824     result = ['<tr>\n']
 825     convert_inner(ctx, xml, result)
 826     result.append('</tr>\n')
 827     return result
 828
 829
 830 def convert_sect1_tag(ctx, xml):
 831     return convert_sect(ctx, xml, 'h2')
 832
 833
 834 def convert_sect2(ctx, xml):
 835     return convert_sect(ctx, xml, 'h3')
 836
 837
 838 def convert_sect3(ctx, xml):
 839     return convert_sect(ctx, xml, 'h4')
 840
 841
 842 def convert_simpara(ctx, xml):
 843     result = ['<p>']
 844     append_text(ctx, xml.text, result)
 845     convert_inner(ctx, xml, result)
 846     result.append('</p>')
 847     append_text(ctx, xml.tail, result)
 848     return result
 849
 850
 851 def convert_span(ctx, xml):
 852     result = ['<span class="%s">' % xml.tag]
 853     append_text(ctx, xml.text, result)
 854     convert_inner(ctx, xml, result)
 855     result.append('</span>')
 856     append_text(ctx, xml.tail, result)
 857     return result
 858
 859
 860 def convert_table(ctx, xml):
 861     result = ['<div class="table">']
 862     if 'id' in xml.attrib:
 863         result.append('<a name="%s"></a>' % xml.attrib['id'])
 864     title_tag = xml.find('title')
 865     if title_tag is not None:
 866         result.append('<p class="title"><b>')
 867         # TODO(ensonic): Add a 'Table X. ' prefix, needs a table counter
 868         result.extend(convert_title(ctx, title_tag))
 869         result.append('</b></p>')
 870     result.append('<div class="table-contents"><table class="table" summary="g_object_new" border="1">')
 871
 872     convert_inner(ctx, xml, result)
 873
 874     result.append('</table></div></div>')
 875     append_text(ctx, xml.tail, result)
 876     return result
 877
 878
 879 def convert_tbody(ctx, xml):
 880     result = ['<tbody>']
 881     ctx['table.entry'] = 'td'
 882     convert_inner(ctx, xml, result)
 883     result.append('</tbody>')
 884     # is in tgroup and there can be no 'text'
 885     return result
 886
 887
 888 def convert_tgroup(ctx, xml):
 889     # tgroup does not expand to anything, but the nested colspecs need to
 890     # be put into a colgroup
 891     cols = xml.findall('colspec')
 892     result = []
 893     if cols:
 894         result.append('<colgroup>\n')
 895         for col in cols:
 896             result.extend(convert_colspec(ctx, col))
 897             xml.remove(col)
 898         result.append('</colgroup>\n')
 899     convert_inner(ctx, xml, result)
 900     # is in informaltable and there can be no 'text'
 901     return result
 902
 903
 904 def convert_thead(ctx, xml):
 905     result = ['<thead>']
 906     ctx['table.entry'] = 'th'
 907     convert_inner(ctx, xml, result)
 908     result.append('</thead>')
 909     # is in tgroup and there can be no 'text'
 910     return result
 911
 912
 913 def convert_title(ctx, xml):
 914     # This is always explicitly called from some context
 915     result = []
 916     append_text(ctx, xml.text, result)
 917     convert_inner(ctx, xml, result)
 918     append_text(ctx, xml.tail, result)
 919     return result
 920
 921
 922 def convert_ulink(ctx, xml):
 923     if xml.text:
 924         result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
 925     else:
 926         url = xml.attrib['url']
 927         result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, url, url)]
 928     append_text(ctx, xml.tail, result)
 929     return result
 930
 931
 932 def convert_userinput(ctx, xml):
 933     result = ['<span class="command"><strong>']
 934     append_text(ctx, xml.text, result)
 935     convert_inner(ctx, xml, result)
 936     result.append('</strong></span>')
 937     append_text(ctx, xml.tail, result)
 938     return result
 939
 940
 941 def convert_variablelist(ctx, xml):
 942     result = ["""<div class="variablelist"><table border="0" class="variablelist">
 943 <colgroup>
 944 <col align="left" valign="top">
 945 <col>
 946 </colgroup>
 947 <tbody>"""]
 948     convert_inner(ctx, xml, result)
 949     result.append("""</tbody>
 950 </table></div>""")
 951     return result
 952
 953
 954 def convert_varlistentry(ctx, xml):
 955     result = ['<tr>']
 956
 957     result.append('<td><p>')
 958     term = xml.find('term')
 959     result.extend(convert_span(ctx, term))
 960     result.append('</p></td>')
 961
 962     result.append('<td>')
 963     listitem = xml.find('listitem')
 964     convert_inner(ctx, listitem, result)
 965     result.append('</td>')
 966
 967     result.append('<tr>')
 968     return result
 969
 970
 971 def convert_xref(ctx, xml):
 972     linkend = xml.attrib['linkend']
 973     (tid, href) = fixxref.GetXRef(linkend)
 974     title = titles.get(tid)
 975     # all sectN need to become 'section
 976     tag = title['tag']
 977     tag = {
 978         'sect1': 'section',
 979         'sect2': 'section',
 980         'sect3': 'section',
 981         'sect4': 'section',
 982         'sect5': 'section',
 983     }.get(tag, tag)
 984     result = [
 985         '<a class="xref" href="%s" title="%s">the %s called “%s”</a>' %
 986         (href, title['title'], tag, ''.join(convert_title(ctx, title['xml'])))
 987     ]
 988
 989     append_text(ctx, xml.tail, result)
 990     return result
 991
 992
 993 # TODO(ensonic): turn into class with converters as functions and ctx as self
 994 convert_tags = {
 995     'abstract': convert_abstract,
 996     'acronym': convert_acronym,
 997     'anchor': convert_anchor,
 998     'application': convert_span,
 999     'bookinfo': convert_bookinfo,
1000     'blockquote': convert_blockquote,
1001     'caption': convert_div,
1002     'code': convert_code,
1003     'colspec': convert_colspec,
1004     'constant': convert_code,
1005     'command': convert_command,
1006     'corpauthor': convert_corpauthor,
1007     'emphasis': convert_emphasis,
1008     'entry': convert_entry,
1009     'envar': convert_code,
1010     'footnote': convert_footnote,
1011     'filename': convert_code,
1012     'formalpara': convert_formalpara,
1013     'function': convert_code,
1014     'glossdef': convert_glossdef,
1015     'glossdiv': convert_glossdiv,
1016     'glossentry': convert_glossentry,
1017     'glossterm': convert_glossterm,
1018     'imageobject': convert_imageobject,
1019     'indexdiv': convert_indexdiv,
1020     'indexentry': convert_ignore,
1021     'indexterm': convert_skip,
1022     'informalexample': convert_div,
1023     'informaltable': convert_informaltable,
1024     'inlinegraphic': convert_inlinegraphic,
1025     'inlinemediaobject': convert_span,
1026     'itemizedlist': convert_itemizedlist,
1027     'legalnotice': convert_div,
1028     'link': convert_link,
1029     'listitem': convert_listitem,
1030     'literal': convert_code,
1031     'literallayout': convert_literallayout,
1032     'mediaobject': convert_div,
1033     'note': convert_div,
1034     'option': convert_code,
1035     'orderedlist': convert_orderedlist,
1036     'para': convert_para,
1037     'partintro': convert_div,
1038     'parameter': convert_em_class,
1039     'phrase': convert_phrase,
1040     'primaryie': convert_primaryie,
1041     'programlisting': convert_programlisting,
1042     'quote': convert_quote,
1043     'releaseinfo': convert_para_like,
1044     'refsect1': convert_refsect1,
1045     'refsect2': convert_refsect2,
1046     'refsect3': convert_refsect3,
1047     'replaceable': convert_em_class,
1048     'returnvalue': convert_span,
1049     'row': convert_row,
1050     'screen': convert_pre,
1051     'sect1': convert_sect1_tag,
1052     'sect2': convert_sect2,
1053     'sect3': convert_sect3,
1054     'simpara': convert_simpara,
1055     'structfield': convert_em_class,
1056     'structname': convert_span,
1057     'synopsis': convert_pre,
1058     'symbol': convert_span,
1059     'table': convert_table,
1060     'tbody': convert_tbody,
1061     'term': convert_span,
1062     'tgroup': convert_tgroup,
1063     'thead': convert_thead,
1064     'title': convert_skip,
1065     'type': convert_span,
1066     'ulink': convert_ulink,
1067     'userinput': convert_userinput,
1068     'varname': convert_code,
1069     'variablelist': convert_variablelist,
1070     'varlistentry': convert_varlistentry,
1071     'warning': convert_div,
1072     'xref': convert_xref,
1073 }
1074
1075 # conversion helpers
1076
1077 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
1078 <html>
1079 <head>
1080 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
1081 <title>%s</title>
1082 %s<link rel="stylesheet" href="style.css" type="text/css">
1083 </head>
1084 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
1085 """
1086
1087
1088 def generate_head_links(ctx):
1089     n = ctx['nav_home']
1090     result = [
1091         '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.title)
1092     ]
1093     if 'nav_up' in ctx:
1094         n = ctx['nav_up']
1095         result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.title))
1096     if 'nav_prev' in ctx:
1097         n = ctx['nav_prev']
1098         result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.title))
1099     if 'nav_next' in ctx:
1100         n = ctx['nav_next']
1101         result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.title))
1102     return ''.join(result)
1103
1104
1105 def generate_nav_links(ctx):
1106     n = ctx['nav_home']
1107     result = [
1108         '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
1109     ]
1110     if 'nav_up' in ctx:
1111         n = ctx['nav_up']
1112         result.append(
1113             '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
1114     else:
1115         result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
1116     if 'nav_prev' in ctx:
1117         n = ctx['nav_prev']
1118         result.append(
1119             '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
1120     else:
1121         result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
1122     if 'nav_next' in ctx:
1123         n = ctx['nav_next']
1124         result.append(
1125             '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
1126     else:
1127         result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
1128
1129     return ''.join(result)
1130
1131
1132 def generate_toc(ctx, node):
1133     result = []
1134     for c in node.children:
1135         # TODO: urlencode the filename: urllib.parse.quote_plus()
1136         link = c.filename
1137         if c.anchor:
1138             link += c.anchor
1139         result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
1140             c.title_tag, link, c.title))
1141         if c.subtitle:
1142             result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
1143         result.append('</dt>\n')
1144         if c.children:
1145             result.append('<dd><dl>')
1146             result.extend(generate_toc(ctx, c))
1147             result.append('</dl></dd>')
1148     return result
1149
1150
1151 def generate_basic_nav(ctx):
1152     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1153   <tr valign="middle">
1154     <td width="100%%" align="left" class="shortcuts"></td>
1155     %s
1156   </tr>
1157 </table>
1158     """ % generate_nav_links(ctx)
1159
1160
1161 def generate_alpha_nav(ctx, divs, prefix, span_id):
1162     ix_nav = []
1163     for s in divs:
1164         title = xml_get_title(ctx, s)
1165         ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
1166
1167     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1168   <tr valign="middle">
1169     <td width="100%%" align="left" class="shortcuts">
1170       <span id="nav_%s">
1171         %s
1172       </span>
1173     </td>
1174     %s
1175   </tr>
1176 </table>
1177     """ % (span_id, '\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
1178
1179
1180 def generate_refentry_nav(ctx, refsect1s, result):
1181     result.append("""<table class="navigation" id="top" width="100%" cellpadding="2" cellspacing="5">
1182   <tr valign="middle">
1183     <td width="100%" align="left" class="shortcuts">
1184       <a href="#" class="shortcut">Top</a>""")
1185
1186     for s in refsect1s:
1187         # don't list TOC sections (role="xxx_proto")
1188         if s.attrib.get('role', '').endswith("_proto"):
1189             continue
1190         # skip section without 'id' attrs
1191         if 'id' not in s.attrib:
1192             continue
1193
1194         ref_id = s.attrib['id']
1195         # skip foreign sections
1196         if '.' not in ref_id:
1197             continue
1198
1199         title = xml_get_title(ctx, s)
1200         span_id = ref_id.split('.')[1].replace('-', '_')
1201
1202         result.append("""
1203           <span id="nav_%s">
1204             <span class="dim">|</span>
1205             <a href="#%s" class="shortcut">%s</a>
1206           </span>
1207           """ % (span_id, ref_id, title))
1208     result.append("""
1209     </td>
1210     %s
1211   </tr>
1212 </table>
1213 """ % generate_nav_links(ctx))
1214
1215
1216 def generate_footer(ctx):
1217     result = []
1218     if 'footnotes' in ctx:
1219         result.append("""<div class="footnotes">\n
1220 <br><hr style="width:100; text-align:left;margin-left: 0">
1221 """)
1222         for f in ctx['footnotes']:
1223             result.extend(f)
1224         result.append('</div>\n')
1225     return result
1226
1227
1228 def get_id_path(node):
1229     """ Generate the 'id'.
1230     We need to walk up the xml-tree and check the positions for each sibling.
1231     When reaching the top of the tree we collect remaining index entries from
1232     the chunked-tree.
1233     """
1234     ix = []
1235     xml = node.xml
1236     parent = xml.getparent()
1237     while parent is not None:
1238         children = parent.getchildren()
1239         ix.insert(0, str(children.index(xml) + 1))
1240         xml = parent
1241         parent = xml.getparent()
1242     while node is not None:
1243         ix.insert(0, str(node.idx + 1))
1244         node = node.parent
1245
1246     return ix
1247
1248
1249 def get_id(node):
1250     xml = node.xml
1251     node_id = xml.attrib.get('id', None)
1252     if node_id:
1253         return node_id
1254
1255     # TODO: this is moot if nothing links to it, we could also consider to omit
1256     # the <a name="$id"></a> tag.
1257     logging.info('%d: No "id" attribute on "%s", generating one',
1258                  xml.sourceline, xml.tag)
1259     ix = get_id_path(node)
1260     # logging.warning('%s: id indexes: %s', node.filename, str(ix))
1261     return 'id-' + '.'.join(ix)
1262
1263
1264 def convert_chunk_with_toc(ctx, div_class, title_tag):
1265     node = ctx['node']
1266     result = [
1267         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1268         generate_basic_nav(ctx),
1269         '<div class="%s">' % div_class,
1270     ]
1271     if node.title:
1272         result.append("""
1273 <div class="titlepage">
1274 <%s class="title"><a name="%s"></a>%s</%s>
1275 </div>""" % (
1276             title_tag, get_id(node), node.title, title_tag))
1277
1278     toc = generate_toc(ctx, node)
1279     if toc:
1280         # TODO: not all docbook page types use this extra heading
1281         result.append("""<p><b>Table of Contents</b></p>
1282     <div class="toc">
1283       <dl class="toc">
1284     """)
1285         result.extend(toc)
1286         result.append("""</dl>
1287     </div>
1288     """)
1289     convert_inner(ctx, node.xml, result)
1290     result.extend(generate_footer(ctx))
1291     result.append("""</div>
1292 </body>
1293 </html>""")
1294     return result
1295
1296
1297 # docbook chunks
1298
1299
1300 def convert_book(ctx):
1301     node = ctx['node']
1302     result = [
1303         HTML_HEADER % (node.title, generate_head_links(ctx)),
1304         """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
1305     <tr><th valign="middle"><p class="title">%s</p></th></tr>
1306 </table>
1307 <div class="book">
1308 """ % node.title
1309     ]
1310     bookinfo = node.xml.findall('bookinfo')[0]
1311     result.extend(convert_bookinfo(ctx, bookinfo))
1312     result.append("""<div class="toc">
1313   <dl class="toc">
1314 """)
1315     result.extend(generate_toc(ctx, node.root))
1316     result.append("""</dl>
1317 </div>
1318 """)
1319     result.extend(generate_footer(ctx))
1320     result.append("""</div>
1321 </body>
1322 </html>""")
1323     return result
1324
1325
1326 def convert_chapter(ctx):
1327     return convert_chunk_with_toc(ctx, 'chapter', 'h2')
1328
1329
1330 def convert_glossary(ctx):
1331     node = ctx['node']
1332     glossdivs = node.xml.findall('glossdiv')
1333
1334     result = [
1335         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1336         generate_alpha_nav(ctx, glossdivs, 'gls', 'glossary'),
1337         """<div class="glossary">
1338 <div class="titlepage"><h%1d class="title">
1339 <a name="%s"></a>%s</h%1d>
1340 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1341     ]
1342     for i in glossdivs:
1343         result.extend(convert_glossdiv(ctx, i))
1344     result.extend(generate_footer(ctx))
1345     result.append("""</div>
1346 </body>
1347 </html>""")
1348     return result
1349
1350
1351 def convert_index(ctx):
1352     node = ctx['node']
1353     # Get all indexdivs under indexdiv
1354     indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1355
1356     result = [
1357         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1358         generate_alpha_nav(ctx, indexdivs, 'idx', 'index'),
1359         """<div class="index">
1360 <div class="titlepage"><h%1d class="title">
1361 <a name="%s"></a>%s</h%1d>
1362 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1363     ]
1364     for i in indexdivs:
1365         result.extend(convert_indexdiv(ctx, i))
1366     result.extend(generate_footer(ctx))
1367     result.append("""</div>
1368 </body>
1369 </html>""")
1370     return result
1371
1372
1373 def convert_part(ctx):
1374     return convert_chunk_with_toc(ctx, 'part', 'h1')
1375
1376
1377 def convert_preface(ctx):
1378     node = ctx['node']
1379     result = [
1380         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1381         generate_basic_nav(ctx),
1382         '<div class="preface">'
1383     ]
1384     if node.title:
1385         result.append("""
1386 <div class="titlepage">
1387 <h2 class="title"><a name="%s"></a>%s</h2>
1388 </div>""" % (get_id(node), node.title))
1389     convert_inner(ctx, node.xml, result)
1390     result.extend(generate_footer(ctx))
1391     result.append("""</div>
1392 </body>
1393 </html>""")
1394     return result
1395
1396
1397 def convert_reference(ctx):
1398     return convert_chunk_with_toc(ctx, 'reference', 'h1')
1399
1400
1401 def convert_refentry(ctx):
1402     node = ctx['node']
1403     node_id = get_id(node)
1404     refsect1s = node.xml.findall('refsect1')
1405
1406     gallery = ''
1407     refmeta = node.xml.find('refmeta')
1408     if refmeta is not None:
1409         refmiscinfo = refmeta.find('refmiscinfo')
1410         if refmiscinfo is not None:
1411             inlinegraphic = refmiscinfo.find('inlinegraphic')
1412             if inlinegraphic is not None:
1413                 gallery = ''.join(convert_inlinegraphic(ctx, inlinegraphic))
1414
1415     result = [
1416         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1417     ]
1418     generate_refentry_nav(ctx, refsect1s, result)
1419     result.append("""
1420 <div class="refentry">
1421 <a name="%s"></a>
1422 <div class="refnamediv">
1423   <table width="100%%"><tr>
1424     <td valign="top">
1425       <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1426       <p>%s — %s</p>
1427     </td>
1428     <td class="gallery_image" valign="top" align="right">%s</td>
1429   </tr></table>
1430 </div>
1431 """ % (node_id, node_id, node.title, node.title, node.subtitle, gallery))
1432
1433     for s in refsect1s:
1434         result.extend(convert_refsect1(ctx, s))
1435     result.extend(generate_footer(ctx))
1436     result.append("""</div>
1437 </body>
1438 </html>""")
1439     return result
1440
1441
1442 def convert_sect1(ctx):
1443     return convert_chunk_with_toc(ctx, 'sect1', 'h2')
1444
1445
1446 # TODO(ensonic): turn into class with converters as functions and ctx as self
1447 convert_chunks = {
1448     'book': convert_book,
1449     'chapter': convert_chapter,
1450     'glossary': convert_glossary,
1451     'index': convert_index,
1452     'part': convert_part,
1453     'preface': convert_preface,
1454     'reference': convert_reference,
1455     'refentry': convert_refentry,
1456     'sect1': convert_sect1,
1457 }
1458
1459
1460 def generate_nav_nodes(files, node):
1461     nav = {
1462         'nav_home': node.root,
1463     }
1464     # nav params: up, prev, next
1465     if node.parent:
1466         nav['nav_up'] = node.parent
1467     ix = files.index(node)
1468     if ix > 0:
1469         nav['nav_prev'] = files[ix - 1]
1470     if ix < len(files) - 1:
1471         nav['nav_next'] = files[ix + 1]
1472     return nav
1473
1474
1475 def convert(out_dir, module, files, node):
1476     """Convert the docbook chunks to a html file.
1477
1478     Args:
1479       out_dir: already created output dir
1480       files: list of nodes in the tree in pre-order
1481       node: current tree node
1482     """
1483
1484     logging.info('Writing: %s', node.filename)
1485     with open(os.path.join(out_dir, node.filename), 'wt',
1486               newline='\n', encoding='utf-8') as html:
1487         ctx = {
1488             'module': module,
1489             'files': files,
1490             'node': node,
1491         }
1492         ctx.update(generate_nav_nodes(files, node))
1493
1494         if node.name in convert_chunks:
1495             for line in convert_chunks[node.name](ctx):
1496                 html.write(line)
1497         else:
1498             logging.warning('Add converter/template for "%s"', node.name)
1499
1500
1501 def create_devhelp2_toc(node):
1502     result = []
1503     for c in node.children:
1504         if c.children:
1505             result.append('<sub name="%s" link="%s">\n' % (c.title, c.filename))
1506             result.extend(create_devhelp2_toc(c))
1507             result.append('</sub>\n')
1508         else:
1509             result.append('<sub name="%s" link="%s"/>\n' % (c.title, c.filename))
1510     return result
1511
1512
1513 def create_devhelp2_condition_attribs(node):
1514     if 'condition' in node.attrib:
1515         # condition -> since, deprecated, ... (separated with '|')
1516         cond = node.attrib['condition'].replace('"', '&quot;').split('|')
1517         keywords = []
1518         for c in cond:
1519             if ':' in c:
1520                 keywords.append('{}="{}"'.format(*c.split(':', 1)))
1521             else:
1522                 # deprecated can have no description
1523                 keywords.append('{}="{}"'.format(c, ''))
1524         return ' ' + ' '.join(keywords)
1525     else:
1526         return ''
1527
1528
1529 def create_devhelp2_refsect2_keyword(node, base_link):
1530     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1531         node.attrib['role'], xml_get_title({}, node), base_link + node.attrib['id'],
1532         create_devhelp2_condition_attribs(node))
1533
1534
1535 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1536     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1537         node.attrib['role'], title, base_link + name,
1538         create_devhelp2_condition_attribs(node))
1539
1540
1541 def create_devhelp2(out_dir, module, xml, files):
1542     with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1543               newline='\n', encoding='utf-8') as idx:
1544         bookinfo_nodes = xml.xpath('/book/bookinfo')
1545         title = ''
1546         if bookinfo_nodes is not None:
1547             bookinfo = bookinfo_nodes[0]
1548             title = bookinfo.xpath('./title/text()')[0]
1549             online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1550             # TODO: support author too (see devhelp2.xsl)
1551         # TODO: fixxref uses '--src-lang' to set the language
1552         result = [
1553             """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1554 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1555   <chapters>
1556 """ % (title, module, online_url)
1557         ]
1558         # toc
1559         result.extend(create_devhelp2_toc(files[0].root))
1560         result.append("""  </chapters>
1561   <functions>
1562 """)
1563         # keywords from all refsect2 and refsect3
1564         refsect2 = etree.XPath('//refsect2[@role]')
1565         refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1566         refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1567         refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1568         refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1569         for node in files:
1570             base_link = node.filename + '#'
1571             refsect2_nodes = refsect2(node.xml)
1572             for refsect2_node in refsect2_nodes:
1573                 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1574                 refsect3_nodes = refsect3_enum(refsect2_node)
1575                 for refsect3_node in refsect3_nodes:
1576                     details_node = refsect3_enum_details(refsect3_node)[0]
1577                     name = details_node.attrib['id']
1578                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1579                 refsect3_nodes = refsect3_struct(refsect2_node)
1580                 for refsect3_node in refsect3_nodes:
1581                     details_node = refsect3_struct_details(refsect3_node)[0]
1582                     name = details_node.attrib['id']
1583                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1584
1585         result.append("""  </functions>
1586 </book>
1587 """)
1588         for line in result:
1589             idx.write(line)
1590
1591
1592 def get_dirs(uninstalled):
1593     if uninstalled:
1594         # this does not work from buiddir!=srcdir
1595         gtkdocdir = os.path.split(sys.argv[0])[0]
1596         if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1597             # try 'srcdir' (set from makefiles) too
1598             if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1599                 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1600         styledir = gtkdocdir + '/style'
1601     else:
1602         gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1603         styledir = gtkdocdir
1604     return (gtkdocdir, styledir)
1605
1606
1607 def main(module, index_file, out_dir, uninstalled):
1608
1609     # == Loading phase ==
1610     # the next 3 steps could be done in paralel
1611
1612     # 1) load the docuemnt
1613     _t = timer()
1614     # does not seem to be faster
1615     # parser = etree.XMLParser(collect_ids=False)
1616     # tree = etree.parse(index_file, parser)
1617     tree = etree.parse(index_file)
1618     tree.xinclude()
1619     logging.warning("1: %7.3lf: load doc", timer() - _t)
1620
1621     # 2) copy datafiles
1622     _t = timer()
1623     # TODO: handle additional images
1624     (gtkdocdir, styledir) = get_dirs(uninstalled)
1625     # copy navigation images and stylesheets to html directory ...
1626     css_file = os.path.join(styledir, 'style.css')
1627     for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1628         shutil.copy(f, out_dir)
1629     css_file = os.path.join(out_dir, 'style.css')
1630     with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1631         css.write(HTML_FORMATTER.get_style_defs())
1632     logging.warning("2: %7.3lf: copy datafiles", timer() - _t)
1633
1634     # 3) load xref targets
1635     _t = timer()
1636     # TODO: migrate options from fixxref
1637     # TODO: ideally explicity specify the files we need, this will save us the
1638     # globbing and we'll load less files.
1639     fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1640     logging.warning("3: %7.3lf: load xrefs", timer() - _t)
1641
1642     # == Processing phase ==
1643
1644     # 4) recursively walk the tree and chunk it into a python tree so that we
1645     #    can generate navigation and link tags.
1646     _t = timer()
1647     files = chunk(tree.getroot(), module)
1648     files = [f for f in PreOrderIter(files) if f.anchor is None]
1649     logging.warning("4: %7.3lf: chunk doc", timer() - _t)
1650
1651     # 5) extract tables:
1652     _t = timer()
1653     # TODO: can be done in parallel
1654     # - find all 'id' attribs and add them to the link map
1655     # - .. get their titles and store them into the titles map
1656     add_id_links_and_titles(files, fixxref.Links)
1657     # - build glossary dict
1658     build_glossary(files)
1659     logging.warning("5: %7.3lf: extract tables", timer() - _t)
1660
1661     # == Output phase ==
1662     # the next two step could be done in parllel
1663
1664     # 6) create a xxx.devhelp2 file
1665     _t = timer()
1666     create_devhelp2(out_dir, module, tree.getroot(), files)
1667     logging.warning("6: %7.3lf: create devhelp2", timer() - _t)
1668
1669     # 7) iterate the tree and output files
1670     _t = timer()
1671     # TODO: can be done in parallel, figure out why this is not faster
1672     # from multiprocessing.pool import Pool
1673     # with Pool(4) as p:
1674     #     p.apply_async(convert, args=(out_dir, module, files))
1675     # from multiprocessing.pool import ThreadPool
1676     # with ThreadPool(4) as p:
1677     #     p.apply_async(convert, args=(out_dir, module, files))
1678     for node in files:
1679         convert(out_dir, module, files, node)
1680     logging.warning("7: %7.3lf: create html", timer() - _t)
1681
1682
1683 def run(options):
1684     logging.info('options: %s', str(options.__dict__))
1685     module = options.args[0]
1686     document = options.args[1]
1687
1688     # TODO: rename to 'html' later on
1689     # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1690     #   outputs into the working directory
1691     out_dir = os.path.join(os.path.dirname(document), 'db2html')
1692     try:
1693         os.mkdir(out_dir)
1694     except OSError as e:
1695         if e.errno != errno.EEXIST:
1696             raise
1697
1698     sys.exit(main(module, document, out_dir, options.uninstalled))