gtkdoc/mkhtml2.py

   1 #!/usr/bin/env python3
   2 # -*- python; coding: utf-8 -*-
   3 #
   4 # gtk-doc - GTK DocBook documentation generator.
   5 # Copyright (C) 2018  Stefan Sauer
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20 #
  21
  22 """Generate html from docbook
  23
  24 The tool loads the main xml document (<module>-docs.xml) and chunks it
  25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
  26 Each chunk is converted to html using python functions.
  27
  28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
  29 this tools will replace both without relying on external tools such as xsltproc
  30 and source-highlight.
  31
  32 Please note, that we're not aiming for complete docbook-xml support. All tags
  33 used in the generated xml are of course handled. More tags used in handwritten
  34 xml can be easilly supported, but for some combinations of tags we prefer
  35 simplicity.
  36
  37 TODO:
  38 - more chunk converters
  39 - more tag converters:
  40   - footnote: maybe track those in ctx and write them out at the end of the chunk
  41   - inside 'inlinemediaobject'/'mediaobject' a 'textobject' becomes the 'alt'
  42     attr on the <img> tag of the 'imageobject'
  43 - check each docbook tag if it can contain #PCDATA, if not don't check for
  44   xml.text
  45 - consider some perf-warnings flag
  46   - see 'No "id" attribute on'
  47
  48 OPTIONAL:
  49 - minify html: https://pypi.python.org/pypi/htmlmin/
  50
  51 Requirements:
  52 sudo pip3 install anytree lxml pygments
  53
  54 Example invocation:
  55 cd tests/bugs/docs/
  56 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
  57 xdg-open db2html/index.html
  58 meld html db2html
  59
  60 Benchmarking:
  61 cd tests/bugs/docs/;
  62 rm html-build.stamp; time make html-build.stamp
  63 """
  64
  65 import argparse
  66 import errno
  67 import logging
  68 import os
  69 import shutil
  70 import sys
  71
  72 from anytree import Node, PreOrderIter
  73 from copy import deepcopy
  74 from glob import glob
  75 from lxml import etree
  76 from pygments import highlight
  77 from pygments.lexers import CLexer
  78 from pygments.formatters import HtmlFormatter
  79
  80 from . import config, fixxref
  81
  82 # pygments setup
  83 # lazily constructed lexer cache
  84 LEXERS = {
  85     'c': CLexer()
  86 }
  87 HTML_FORMATTER = HtmlFormatter(nowrap=True)
  88
  89 # http://www.sagehill.net/docbookxsl/Chunking.html
  90 CHUNK_TAGS = [
  91     'appendix',
  92     'article',
  93     'bibliography',  # in article or book
  94     'book',
  95     'chapter',
  96     'colophon',
  97     'glossary',      # in article or book
  98     'index',         # in article or book
  99     'part',
 100     'preface',
 101     'refentry',
 102     'reference',
 103     'sect1',         # except first
 104     'section',       # if equivalent to sect1
 105     'set',
 106     'setindex',
 107 ]
 108
 109
 110 class ChunkParams(object):
 111     def __init__(self, prefix, parent=None):
 112         self.prefix = prefix
 113         self.parent = None
 114         self.count = 0
 115
 116
 117 # TODO: look up the abbrevs and hierarchy for other tags
 118 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
 119 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
 120 #
 121 # If not defined, we can just create an example without an 'id' attr and see
 122 # docbook xsl does.
 123 CHUNK_PARAMS = {
 124     'appendix': ChunkParams('app', 'book'),
 125     'book': ChunkParams('bk'),
 126     'chapter': ChunkParams('ch', 'book'),
 127     'index': ChunkParams('ix', 'book'),
 128     'part': ChunkParams('pt', 'book'),
 129     'preface': ChunkParams('pr', 'book'),
 130     'sect1': ChunkParams('s', 'chapter'),
 131     'section': ChunkParams('s', 'chapter'),
 132 }
 133
 134 TITLE_XPATHS = {
 135     '_': (etree.XPath('./title'), None),
 136     'book': (etree.XPath('./bookinfo/title'), None),
 137     'refentry': (
 138         etree.XPath('./refmeta/refentrytitle'),
 139         etree.XPath('./refnamediv/refpurpose')
 140     ),
 141 }
 142
 143 ID_XPATH = etree.XPath('//@id')
 144
 145 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
 146 glossary = {}
 147
 148
 149 def gen_chunk_name(node):
 150     if 'id' in node.attrib:
 151         return node.attrib['id']
 152
 153     tag = node.tag
 154     if tag not in CHUNK_PARAMS:
 155         CHUNK_PARAMS[tag] = ChunkParams(node.tag[:2])
 156         logging.warning('Add CHUNK_PARAMS for "%s"', tag)
 157
 158     naming = CHUNK_PARAMS[tag]
 159     naming.count += 1
 160     name = ('%s%02d' % (naming.prefix, naming.count))
 161     # handle parents to make names of nested tags unique
 162     # TODO: we only need to prepend the parent if there are > 1 of them in the
 163     #       xml
 164     # while naming.parent:
 165     #     parent = naming.parent
 166     #     if parent not in CHUNK_PARAMS:
 167     #         break;
 168     #     naming = CHUNK_PARAMS[parent]
 169     #     name = ('%s%02d' % (naming.prefix, naming.count)) + name
 170     return name
 171
 172
 173 def get_chunk_titles(node):
 174     tag = node.tag
 175     if tag not in TITLE_XPATHS:
 176         # Use defaults
 177         (title, subtitle) = TITLE_XPATHS['_']
 178     else:
 179         (title, subtitle) = TITLE_XPATHS[tag]
 180
 181     xml = title(node)[0]
 182     result = {
 183         'title': xml.text
 184     }
 185     if xml.tag != 'title':
 186         result['title_tag'] = xml.tag
 187     else:
 188         result['title_tag'] = tag
 189
 190     if subtitle:
 191         xml = subtitle(node)[0]
 192         result['subtitle'] = xml.text
 193         result['subtitle_tag'] = xml.tag
 194     else:
 195         result['subtitle'] = None
 196         result['subtitle_tag'] = None
 197     return result
 198
 199
 200 def chunk(xml_node, parent=None):
 201     """Chunk the tree.
 202
 203     The first time, we're called with parent=None and in that case we return
 204     the new_node as the root of the tree
 205     """
 206     if xml_node.tag in CHUNK_TAGS:
 207         if parent:
 208             # remove the xml-node from the parent
 209             sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
 210             xml_node.getparent().remove(xml_node)
 211             xml_node = sub_tree
 212
 213         title_args = get_chunk_titles(xml_node)
 214         chunk_name = gen_chunk_name(xml_node)
 215         parent = Node(xml_node.tag, parent=parent, xml=xml_node,
 216                       filename=chunk_name + '.html', **title_args)
 217
 218     for child in xml_node:
 219         chunk(child, parent)
 220
 221     return parent
 222
 223
 224 def add_id_links(files, links):
 225     for node in files:
 226         chunk_name = node.filename[:-5]
 227         chunk_base = node.filename + '#'
 228         for attr in ID_XPATH(node.xml):
 229             if attr == chunk_name:
 230                 links[attr] = node.filename
 231             else:
 232                 links[attr] = chunk_base + attr
 233
 234
 235 def build_glossary(files):
 236     for node in files:
 237         if node.xml.tag != 'glossary':
 238             continue
 239         for term in GLOSSENTRY_XPATH(node.xml):
 240             # TODO: there can be all kind of things in a glossary. This only supports
 241             # what we commonly use
 242             key = etree.tostring(term.find('glossterm'), method="text", encoding=str).strip()
 243             value = etree.tostring(term.find('glossdef'), method="text", encoding=str).strip()
 244             glossary[key] = value
 245             # logging.debug('glosentry: %s:%s', key, value)
 246
 247
 248 # conversion helpers
 249
 250
 251 def convert_inner(ctx, xml, result):
 252     for child in xml:
 253         result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 254
 255
 256 def convert_ignore(ctx, xml):
 257     result = []
 258     convert_inner(ctx, xml, result)
 259     return result
 260
 261
 262 def convert_skip(ctx, xml):
 263     return ['']
 264
 265
 266 missing_tags = {}
 267
 268
 269 def convert__unknown(ctx, xml):
 270     # don't recurse on subchunks
 271     if xml.tag in CHUNK_TAGS:
 272         return []
 273     # warn only once
 274     if xml.tag not in missing_tags:
 275         logging.warning('Add tag converter for "%s"', xml.tag)
 276         missing_tags[xml.tag] = True
 277     result = ['<!-- ' + xml.tag + '-->\n']
 278     convert_inner(ctx, xml, result)
 279     result.append('<!-- /' + xml.tag + '-->\n')
 280     return result
 281
 282
 283 def convert_refsect(ctx, xml, h_tag, inner_func=convert_inner):
 284     result = ['<div class="%s">\n' % xml.tag]
 285     title = xml.find('title')
 286     if title is not None:
 287         if 'id' in xml.attrib:
 288             result.append('<a name="%s"></a>' % xml.attrib['id'])
 289         result.append('<%s>%s</%s>' % (h_tag, title.text, h_tag))
 290         xml.remove(title)
 291     if xml.text:
 292         result.append(xml.text)
 293     inner_func(ctx, xml, result)
 294     result.append('</div>')
 295     if xml.tail:
 296         result.append(xml.tail)
 297     return result
 298
 299
 300 def xml_get_title(xml):
 301     title = xml.find('title')
 302     if title is not None:
 303         return title.text
 304     else:
 305         # TODO(ensonic): any way to get the file (inlcudes) too?
 306         logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
 307         return ''
 308
 309
 310 # docbook tags
 311
 312
 313 def convert_acronym(ctx, xml):
 314     key = xml.text
 315     title = glossary.get(key, '')
 316     # TODO: print a sensible warning if missing
 317     result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
 318     if xml.tail:
 319         result.append(xml.tail)
 320     return result
 321
 322
 323 def convert_bookinfo(ctx, xml):
 324     result = ['<div class="titlepage">']
 325     convert_inner(ctx, xml, result)
 326     result.append("""<hr>
 327 </div>""")
 328     if xml.tail:
 329         result.append(xml.tail)
 330     return result
 331
 332
 333 def convert_blockquote(ctx, xml):
 334     result = ['<div class="blockquote">\n<blockquote class="blockquote">']
 335     if xml.text:
 336         result.append(xml.text)
 337     convert_inner(ctx, xml, result)
 338     result.append('</blockquote>\n</div>')
 339     if xml.tail:
 340         result.append(xml.tail)
 341     return result
 342
 343
 344 def convert_colspec(ctx, xml):
 345     result = ['<col']
 346     a = xml.attrib
 347     if 'colname' in a:
 348         result.append(' class="%s"' % a['colname'])
 349     if 'colwidth' in a:
 350         result.append(' width="%s"' % a['colwidth'])
 351     result.append('>\n')
 352     # is in tgroup and there can be no 'text'
 353     return result
 354
 355
 356 def convert_corpauthor(ctx, xml):
 357     result = ['<div><h3 class="corpauthor">\n']
 358     if xml.text:
 359         result.append(xml.text)
 360     convert_inner(ctx, xml, result)
 361     result.append('</h3></div>\n')
 362     if xml.tail:
 363         result.append(xml.tail)
 364     return result
 365
 366
 367 def convert_div(ctx, xml):
 368     result = ['<div class="%s">\n' % xml.tag]
 369     if xml.text:
 370         result.append(xml.text)
 371     convert_inner(ctx, xml, result)
 372     result.append('</div>')
 373     if xml.tail:
 374         result.append(xml.tail)
 375     return result
 376
 377
 378 def convert_em_class(ctx, xml):
 379     result = ['<em class="%s"><code>' % xml.tag]
 380     if xml.text:
 381         result.append(xml.text)
 382     convert_inner(ctx, xml, result)
 383     result.append('</code></em>')
 384     if xml.tail:
 385         result.append(xml.tail)
 386     return result
 387
 388
 389 def convert_entry(ctx, xml):
 390     result = ['<td']
 391     if 'role' in xml.attrib:
 392         result.append(' class="%s">' % xml.attrib['role'])
 393     else:
 394         result.append('>')
 395     if xml.text:
 396         result.append(xml.text)
 397     convert_inner(ctx, xml, result)
 398     result.append('</td>')
 399     if xml.tail:
 400         result.append(xml.tail)
 401     return result
 402
 403
 404 def convert_glossdef(ctx, xml):
 405     result = ['<dd class="glossdef">']
 406     convert_inner(ctx, xml, result)
 407     result.append('</dd>\n')
 408     return result
 409
 410
 411 def convert_glossdiv(ctx, xml):
 412     title_tag = xml.find('title')
 413     title = title_tag.text
 414     xml.remove(title_tag)
 415     result = [
 416         '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
 417     ]
 418     convert_inner(ctx, xml, result)
 419     return result
 420
 421
 422 def convert_glossentry(ctx, xml):
 423     result = []
 424     convert_inner(ctx, xml, result)
 425     return result
 426
 427
 428 def convert_glossterm(ctx, xml):
 429     glossid = ''
 430     text = ''
 431     anchor = xml.find('anchor')
 432     if anchor is not None:
 433         glossid = anchor.attrib.get('id', '')
 434         text += anchor.tail or ''
 435     text += xml.text or ''
 436     if glossid == '':
 437         glossid = 'glossterm-' + text
 438     return [
 439         '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
 440             glossid, text)
 441     ]
 442
 443
 444 def convert_imageobject(ctx, xml):
 445     imagedata = xml.find('imagedata')
 446     if imagedata is not None:
 447         # TODO(ensonic): warn on missing fileref attr?
 448         return ['<img src="%s">' % imagedata.attrib.get('fileref', '')]
 449     else:
 450         return []
 451
 452
 453 def convert_indexdiv(ctx, xml):
 454     title_tag = xml.find('title')
 455     title = title_tag.text
 456     xml.remove(title_tag)
 457     result = [
 458         '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
 459     ]
 460     convert_inner(ctx, xml, result)
 461     return result
 462
 463
 464 def convert_informaltable(ctx, xml):
 465     result = ['<div class="informaltable"><table class="informaltable"']
 466     a = xml.attrib
 467     if 'pgwide' in a and a['pgwide'] == '1':
 468         result.append(' width="100%"')
 469     if 'frame' in a and a['frame'] == 'none':
 470         result.append(' border="0"')
 471     result.append('>\n')
 472     convert_inner(ctx, xml, result)
 473     result.append('</table></div>')
 474     if xml.tail:
 475         result.append(xml.tail)
 476     return result
 477
 478
 479 def convert_itemizedlist(ctx, xml):
 480     result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
 481     convert_inner(ctx, xml, result)
 482     result.append('</ul></div>')
 483     if xml.tail:
 484         result.append(xml.tail)
 485     return result
 486
 487
 488 def convert_link(ctx, xml):
 489     linkend = xml.attrib['linkend']
 490     if linkend in fixxref.NoLinks:
 491         linkend = None
 492     result = []
 493     if linkend:
 494         link_text = []
 495         convert_inner(ctx, xml, link_text)
 496         if xml.text:
 497             link_text.append(xml.text)
 498         # TODO: fixxref does some weird checks in xml.text
 499         result = [fixxref.MakeXRef(ctx['module'], '', 0, linkend, ''.join(link_text))]
 500     if xml.tail:
 501         result.append(xml.tail)
 502     return result
 503
 504
 505 def convert_listitem(ctx, xml):
 506     result = ['<li class="listitem">']
 507     convert_inner(ctx, xml, result)
 508     result.append('</li>')
 509     # is in itemizedlist and there can be no 'text'
 510     return result
 511
 512
 513 def convert_literal(ctx, xml):
 514     result = ['<code class="%s">' % xml.tag]
 515     if xml.text:
 516         result.append(xml.text)
 517     convert_inner(ctx, xml, result)
 518     result.append('</code>')
 519     if xml.tail:
 520         result.append(xml.tail)
 521     return result
 522
 523
 524 def convert_orderedlist(ctx, xml):
 525     result = ['<div class="orderedlistlist"><ol class="orderedlistlist" type="1">']
 526     convert_inner(ctx, xml, result)
 527     result.append('</ol></div>')
 528     if xml.tail:
 529         result.append(xml.tail)
 530     return result
 531
 532
 533 def convert_para(ctx, xml):
 534     result = []
 535     if 'id' in xml.attrib:
 536         result.append('<a name="%s"></a>' % xml.attrib['id'])
 537     result.append('<p>')
 538     if xml.text:
 539         result.append(xml.text)
 540     convert_inner(ctx, xml, result)
 541     result.append('</p>')
 542     if xml.tail:
 543         result.append(xml.tail)
 544     return result
 545
 546
 547 def convert_para_like(ctx, xml):
 548     result = []
 549     if 'id' in xml.attrib:
 550         result.append('<a name="%s"></a>' % xml.attrib['id'])
 551     result.append('<p class="%s">' % xml.tag)
 552     if xml.text:
 553         result.append(xml.text)
 554     convert_inner(ctx, xml, result)
 555     result.append('</p>')
 556     if xml.tail:
 557         result.append(xml.tail)
 558     return result
 559
 560
 561 def convert_phrase(ctx, xml):
 562     result = ['<span']
 563     if 'role' in xml.attrib:
 564         result.append(' class="%s">' % xml.attrib['role'])
 565     else:
 566         result.append('>')
 567     if xml.text:
 568         result.append(xml.text)
 569     convert_inner(ctx, xml, result)
 570     result.append('</span>')
 571     if xml.tail:
 572         result.append(xml.tail)
 573     return result
 574
 575
 576 def convert_primaryie(ctx, xml):
 577     result = ['<dt>\n']
 578     convert_inner(ctx, xml, result)
 579     result.append('\n</dt>\n<dd></dd>\n')
 580     return result
 581
 582
 583 def convert_pre(ctx, xml):
 584     result = ['<pre class="%s">\n' % xml.tag]
 585     if xml.text:
 586         result.append(xml.text)
 587     convert_inner(ctx, xml, result)
 588     result.append('</pre>')
 589     if xml.tail:
 590         result.append(xml.tail)
 591     return result
 592
 593
 594 def convert_programlisting(ctx, xml):
 595     result = []
 596     if xml.attrib.get('role', '') == 'example':
 597         if xml.text:
 598             lang = xml.attrib.get('language', 'c').lower()
 599             if lang not in LEXERS:
 600                 LEXERS[lang] = get_lexer_by_name(lang)
 601             lexer = LEXERS.get(lang, None)
 602             if lexer:
 603                 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
 604
 605                 # we do own line-numbering
 606                 line_count = highlighted.count('\n')
 607                 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
 608                 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
 609   <tbody>
 610     <tr>
 611       <td class="listing_lines" align="right"><pre>%s</pre></td>
 612       <td class="listing_code"><pre class="programlisting">%s</pre></td>
 613     </tr>
 614   </tbody>
 615 </table>
 616 """ % (source_lines, highlighted))
 617             else:
 618                 logging.warn('No pygments lexer for language="%s"', lang)
 619                 result.append('<pre class="programlisting">')
 620                 result.append(xml.text)
 621                 result.append('</pre>')
 622     else:
 623         result.append('<pre class="programlisting">')
 624         if xml.text:
 625             result.append(xml.text)
 626         convert_inner(ctx, xml, result)
 627         result.append('</pre>')
 628     if xml.tail:
 629         result.append(xml.tail)
 630     return result
 631
 632
 633 def convert_refsect1(ctx, xml):
 634     # Add a divider between two consequitive refsect2
 635     def convert_inner(ctx, xml, result):
 636         prev = None
 637         for child in xml:
 638             if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
 639                 result.append('<hr>\n')
 640             result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 641             prev = child
 642     return convert_refsect(ctx, xml, 'h2', convert_inner)
 643
 644
 645 def convert_refsect2(ctx, xml):
 646     return convert_refsect(ctx, xml, 'h3')
 647
 648
 649 def convert_refsect3(ctx, xml):
 650     return convert_refsect(ctx, xml, 'h4')
 651
 652
 653 def convert_row(ctx, xml):
 654     result = ['<tr>\n']
 655     convert_inner(ctx, xml, result)
 656     result.append('</tr>\n')
 657     return result
 658
 659
 660 def convert_simpara(ctx, xml):
 661     result = ['<p>']
 662     if xml.text:
 663         result.append(xml.text)
 664     result.append('</p>')
 665     if xml.tail:
 666         result.append(xml.tail)
 667     return result
 668
 669
 670 def convert_span(ctx, xml):
 671     result = ['<span class="%s">' % xml.tag]
 672     if xml.text:
 673         result.append(xml.text)
 674     convert_inner(ctx, xml, result)
 675     result.append('</span>')
 676     if xml.tail:
 677         result.append(xml.tail)
 678     return result
 679
 680
 681 def convert_tbody(ctx, xml):
 682     result = ['<tbody>']
 683     convert_inner(ctx, xml, result)
 684     result.append('</tbody>')
 685     # is in tgroup and there can be no 'text'
 686     return result
 687
 688
 689 def convert_tgroup(ctx, xml):
 690     # tgroup does not expand to anything, but the nested colspecs need to
 691     # be put into a colgroup
 692     cols = xml.findall('colspec')
 693     result = []
 694     if cols:
 695         result.append('<colgroup>\n')
 696         for col in cols:
 697             result.extend(convert_colspec(ctx, col))
 698             xml.remove(col)
 699         result.append('</colgroup>\n')
 700     convert_inner(ctx, xml, result)
 701     # is in informaltable and there can be no 'text'
 702     return result
 703
 704
 705 def convert_ulink(ctx, xml):
 706     result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
 707     if xml.tail:
 708         result.append(xml.tail)
 709     return result
 710
 711
 712 # TODO(ensonic): turn into class with converters as functions and ctx as self
 713 convert_tags = {
 714     'acronym': convert_acronym,
 715     'bookinfo': convert_bookinfo,
 716     'blockquote': convert_blockquote,
 717     'caption': convert_div,
 718     'colspec': convert_colspec,
 719     'corpauthor': convert_corpauthor,
 720     'emphasis': convert_span,
 721     'entry': convert_entry,
 722     'function': convert_span,
 723     'glossdef': convert_glossdef,
 724     'glossdiv': convert_glossdiv,
 725     'glossentry': convert_glossentry,
 726     'glossterm': convert_glossterm,
 727     'imageobject': convert_imageobject,
 728     'indexdiv': convert_indexdiv,
 729     'indexentry': convert_ignore,
 730     'indexterm': convert_skip,
 731     'informalexample': convert_div,
 732     'informaltable': convert_informaltable,
 733     'inlinemediaobject': convert_span,
 734     'itemizedlist': convert_itemizedlist,
 735     'legalnotice': convert_para_like,
 736     'link': convert_link,
 737     'listitem': convert_listitem,
 738     'literal': convert_literal,
 739     'mediaobject': convert_div,
 740     'note': convert_div,
 741     'orderedlist': convert_orderedlist,
 742     'para': convert_para,
 743     'parameter': convert_em_class,
 744     'phrase': convert_phrase,
 745     'primaryie': convert_primaryie,
 746     'programlisting': convert_programlisting,
 747     'releaseinfo': convert_para_like,
 748     'refsect1': convert_refsect1,
 749     'refsect2': convert_refsect2,
 750     'refsect3': convert_refsect3,
 751     'replaceable': convert_em_class,
 752     'returnvalue': convert_span,
 753     'row': convert_row,
 754     'screen': convert_pre,
 755     'simpara': convert_simpara,
 756     'structfield': convert_em_class,
 757     'tbody': convert_tbody,
 758     'tgroup': convert_tgroup,
 759     'type': convert_span,
 760     'ulink': convert_ulink,
 761     'warning': convert_div,
 762 }
 763
 764 # conversion helpers
 765
 766 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
 767 <html>
 768 <head>
 769 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
 770 <title>%s</title>
 771 %s<link rel="stylesheet" href="style.css" type="text/css">
 772 </head>
 773 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
 774 """
 775
 776
 777 def generate_head_links(ctx):
 778     n = ctx['nav_home']
 779     result = [
 780         '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.title)
 781     ]
 782     if 'nav_up' in ctx:
 783         n = ctx['nav_up']
 784         result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.title))
 785     if 'nav_prev' in ctx:
 786         n = ctx['nav_prev']
 787         result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.title))
 788     if 'nav_next' in ctx:
 789         n = ctx['nav_next']
 790         result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.title))
 791     return ''.join(result)
 792
 793
 794 def generate_nav_links(ctx):
 795     n = ctx['nav_home']
 796     result = [
 797         '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
 798     ]
 799     if 'nav_up' in ctx:
 800         n = ctx['nav_up']
 801         result.append(
 802             '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
 803     else:
 804         result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
 805     if 'nav_prev' in ctx:
 806         n = ctx['nav_prev']
 807         result.append(
 808             '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
 809     else:
 810         result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
 811     if 'nav_next' in ctx:
 812         n = ctx['nav_next']
 813         result.append(
 814             '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
 815     else:
 816         result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
 817
 818     return ''.join(result)
 819
 820
 821 def generate_toc(ctx, node):
 822     result = []
 823     for c in node.children:
 824         # TODO: urlencode the filename: urllib.parse.quote_plus()
 825         result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
 826             c.title_tag, c.filename, c.title))
 827         if c.subtitle:
 828             result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
 829         result.append('</dt>\n')
 830         if c.children:
 831             result.append('<dd><dl>')
 832             result.extend(generate_toc(ctx, c))
 833             result.append('</dl></dd>')
 834     return result
 835
 836
 837 def generate_basic_nav(ctx):
 838     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
 839   <tr valign="middle">
 840     <td width="100%%" align="left" class="shortcuts"></td>
 841     %s
 842   </tr>
 843 </table>
 844     """ % generate_nav_links(ctx)
 845
 846
 847 def generate_alpha_nav(ctx, divs, prefix):
 848     ix_nav = []
 849     for s in divs:
 850         title = xml_get_title(s)
 851         ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
 852
 853     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
 854   <tr valign="middle">
 855     <td width="100%%" align="left" class="shortcuts">
 856       <span id="nav_index">
 857         %s
 858       </span>
 859     </td>
 860     %s
 861   </tr>
 862 </table>
 863     """ % ('\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
 864
 865
 866 def generate_refentry_nav(ctx, refsect1s, result):
 867     result.append("""<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
 868   <tr valign="middle">
 869     <td width="100%%" align="left" class="shortcuts">
 870       <a href="#" class="shortcut">Top</a>""")
 871
 872     for s in refsect1s:
 873         # don't list TOC sections (role="xxx_proto")
 874         if s.attrib.get('role', '').endswith("_proto"):
 875             continue
 876
 877         title = xml_get_title(s)
 878         result.append("""
 879           <span id="nav_description">
 880             <span class="dim">|</span>
 881             <a href="#%s" class="shortcut">%s</a>
 882           </span>""" % (s.attrib['id'], title))
 883     result.append("""
 884     </td>
 885     %s
 886   </tr>
 887 </table>
 888 """ % generate_nav_links(ctx))
 889
 890
 891 def get_id(node):
 892     xml = node.xml
 893     node_id = xml.attrib.get('id', None)
 894     if node_id:
 895         return node_id
 896
 897     logging.info('%d: No "id" attribute on "%s", generating one',
 898                  xml.sourceline, xml.tag)
 899     ix = []
 900     # Generate the 'id'. We need to walk up the xml-tree and check the positions
 901     # for each sibling.
 902     parent = xml.getparent()
 903     while parent is not None:
 904         children = parent.getchildren()
 905         ix.insert(0, str(children.index(xml) + 1))
 906         xml = parent
 907         parent = xml.getparent()
 908     # logging.warning('%s: id indexes: %s', node.filename, str(ix))
 909     return 'id-1.' + '.'.join(ix)
 910
 911
 912 def convert_chunk_with_toc(ctx, div_class, title_tag):
 913     node = ctx['node']
 914     result = [
 915         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
 916         generate_basic_nav(ctx),
 917         '<div class="%s">' % div_class,
 918     ]
 919     title = node.xml.find('title')
 920     if title is not None:
 921         result.append("""
 922 <div class="titlepage">
 923 <%s class="title"><a name="%s"></a>%s</%s>
 924 </div>""" % (
 925             title_tag, get_id(node), title.text, title_tag))
 926         node.xml.remove(title)
 927     convert_inner(ctx, node.xml, result)
 928     result.append("""<p>
 929   <b>Table of Contents</b>
 930 </p>
 931 <div class="toc">
 932   <dl class="toc">
 933 """)
 934     result.extend(generate_toc(ctx, node))
 935     result.append("""</dl>
 936 </div>
 937 </div>
 938 </body>
 939 </html>""")
 940     return result
 941
 942
 943 # docbook chunks
 944
 945
 946 def convert_book(ctx):
 947     node = ctx['node']
 948     result = [
 949         HTML_HEADER % (node.title, generate_head_links(ctx)),
 950         """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
 951     <tr><th valign="middle"><p class="title">%s</p></th></tr>
 952 </table>
 953 <div class="book">
 954 """ % node.title
 955     ]
 956     bookinfo = node.xml.findall('bookinfo')[0]
 957     # we already used the title
 958     title = bookinfo.find('title')
 959     if title is not None:
 960         bookinfo.remove(title)
 961     result.extend(convert_bookinfo(ctx, bookinfo))
 962     result.append("""<div class="toc">
 963   <dl class="toc">
 964 """)
 965     result.extend(generate_toc(ctx, node.root))
 966     result.append("""</dl>
 967 </div>
 968 </div>
 969 </body>
 970 </html>""")
 971     return result
 972
 973
 974 def convert_chapter(ctx):
 975     return convert_chunk_with_toc(ctx, 'chapter', 'h2')
 976
 977
 978 def convert_glossary(ctx):
 979     node = ctx['node']
 980     glossdivs = node.xml.findall('glossdiv')
 981
 982     result = [
 983         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
 984         generate_alpha_nav(ctx, glossdivs, 'gls'),
 985         """<div class="index">
 986 <div class="titlepage"><h1 class="title">
 987 <a name="%s"></a>%s</h1>
 988 </div>""" % (get_id(node), node.title)
 989     ]
 990
 991     for i in glossdivs:
 992         result.extend(convert_glossdiv(ctx, i))
 993
 994     result.append("""</div>
 995 </body>
 996 </html>""")
 997     return result
 998
 999
1000 def convert_index(ctx):
1001     node = ctx['node']
1002     # Get all indexdivs under indexdiv
1003     indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1004
1005     result = [
1006         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1007         generate_alpha_nav(ctx, indexdivs, 'idx'),
1008         """<div class="glossary">
1009 <div class="titlepage"><h2 class="title">
1010 <a name="%s"></a>%s</h2>
1011 </div>""" % (get_id(node), node.title)
1012     ]
1013     for i in indexdivs:
1014         result.extend(convert_indexdiv(ctx, i))
1015     result.append("""</div>
1016 </body>
1017 </html>""")
1018     return result
1019
1020
1021 def convert_part(ctx):
1022     return convert_chunk_with_toc(ctx, 'part', 'h1')
1023
1024
1025 def convert_preface(ctx):
1026     node = ctx['node']
1027     result = [
1028         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1029         generate_basic_nav(ctx),
1030         '<div class="preface">'
1031     ]
1032     title = node.xml.find('title')
1033     if title is not None:
1034         result.append("""
1035 <div class="titlepage">
1036 <h2 class="title"><a name="%s"></a>%s</h2>
1037 </div>""" % (get_id(node), title.text))
1038         node.xml.remove(title)
1039     convert_inner(ctx, node.xml, result)
1040     result.append("""</div>
1041 </body>
1042 </html>""")
1043     return result
1044
1045
1046 def convert_reference(ctx):
1047     return convert_chunk_with_toc(ctx, 'reference', 'h1')
1048
1049
1050 def convert_refentry(ctx):
1051     node = ctx['node']
1052     node_id = get_id(node)
1053     refsect1s = node.xml.findall('refsect1')
1054
1055     result = [
1056         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1057     ]
1058     generate_refentry_nav(ctx, refsect1s, result)
1059     result.append("""
1060 <div class="refentry">
1061 <a name="%s"></a>
1062 <div class="refnamediv">
1063   <table width="100%%"><tr>
1064     <td valign="top">
1065       <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1066       <p>%s — module for gtk-doc unit test</p>
1067     </td>
1068     <td class="gallery_image" valign="top" align="right"></td>
1069   </tr></table>
1070 </div>
1071 """ % (node_id, node_id, node.title, node.title))
1072
1073     for s in refsect1s:
1074         result.extend(convert_refsect1(ctx, s))
1075     result.append("""</div>
1076 </body>
1077 </html>""")
1078     return result
1079
1080
1081 # TODO(ensonic): turn into class with converters as functions and ctx as self
1082 convert_chunks = {
1083     'book': convert_book,
1084     'chapter': convert_chapter,
1085     'glossary': convert_glossary,
1086     'index': convert_index,
1087     'part': convert_part,
1088     'preface': convert_preface,
1089     'reference': convert_reference,
1090     'refentry': convert_refentry,
1091 }
1092
1093
1094 def generate_nav_nodes(files, node):
1095     nav = {
1096         'nav_home': node.root,
1097     }
1098     # nav params: up, prev, next
1099     if node.parent:
1100         nav['nav_up'] = node.parent
1101     ix = files.index(node)
1102     if ix > 0:
1103         nav['nav_prev'] = files[ix - 1]
1104     if ix < len(files) - 1:
1105         nav['nav_next'] = files[ix + 1]
1106     return nav
1107
1108
1109 def convert(out_dir, module, files, node):
1110     """Convert the docbook chunks to a html file.
1111
1112     Args:
1113       out_dir: already created output dir
1114       files: list of nodes in the tree in pre-order
1115       node: current tree node
1116     """
1117
1118     logging.info('Writing: %s', node.filename)
1119     with open(os.path.join(out_dir, node.filename), 'wt',
1120               newline='\n', encoding='utf-8') as html:
1121         ctx = {
1122             'module': module,
1123             'files': files,
1124             'node': node,
1125         }
1126         ctx.update(generate_nav_nodes(files, node))
1127
1128         if node.name in convert_chunks:
1129             for line in convert_chunks[node.name](ctx):
1130                 html.write(line)
1131         else:
1132             logging.warning('Add converter/template for "%s"', node.name)
1133
1134
1135 def create_devhelp2_toc(node):
1136     result = []
1137     for c in node.children:
1138         if c.children:
1139             result.append('<sub name="%s" link="%s">\n' % (c.title, c.filename))
1140             result.extend(create_devhelp2_toc(c))
1141             result.append('</sub>\n')
1142         else:
1143             result.append('<sub name="%s" link="%s"/>\n' % (c.title, c.filename))
1144     return result
1145
1146
1147 def create_devhelp2_condition_attribs(node):
1148     if 'condition' in node.attrib:
1149         # condition -> since, deprecated, ... (separated with '|')
1150         cond = node.attrib['condition'].replace('"', '&quot;').split('|')
1151         return' ' + ' '.join(['%s="%s"' % tuple(c.split(':', 1)) for c in cond])
1152     else:
1153         return ''
1154
1155
1156 def create_devhelp2_refsect2_keyword(node, base_link):
1157     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1158         node.attrib['role'], xml_get_title(node), base_link + node.attrib['id'],
1159         create_devhelp2_condition_attribs(node))
1160
1161
1162 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1163     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1164         node.attrib['role'], title, base_link + name,
1165         create_devhelp2_condition_attribs(node))
1166
1167
1168 def create_devhelp2(out_dir, module, xml, files):
1169     with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1170               newline='\n', encoding='utf-8') as idx:
1171         bookinfo_nodes = xml.xpath('/book/bookinfo')
1172         title = ''
1173         if bookinfo_nodes is not None:
1174             bookinfo = bookinfo_nodes[0]
1175             title = bookinfo.xpath('./title/text()')[0]
1176             online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1177             # TODO: support author too (see devhelp2.xsl)
1178         # TODO: fixxref uses '--src-lang' to set the language
1179         result = [
1180             """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1181 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1182   <chapters>
1183 """ % (title, module, online_url)
1184         ]
1185         # toc
1186         result.extend(create_devhelp2_toc(files[0].root))
1187         result.append("""  </chapters>
1188   <functions>
1189 """)
1190         # keywords from all refsect2 and refsect3
1191         refsect2 = etree.XPath('//refsect2[@role]')
1192         refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1193         refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1194         refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1195         refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1196         for node in files:
1197             base_link = node.filename + '#'
1198             refsect2_nodes = refsect2(node.xml)
1199             for refsect2_node in refsect2_nodes:
1200                 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1201                 refsect3_nodes = refsect3_enum(refsect2_node)
1202                 for refsect3_node in refsect3_nodes:
1203                     details_node = refsect3_enum_details(refsect3_node)[0]
1204                     name = details_node.attrib['id']
1205                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1206                 refsect3_nodes = refsect3_struct(refsect2_node)
1207                 for refsect3_node in refsect3_nodes:
1208                     details_node = refsect3_struct_details(refsect3_node)[0]
1209                     name = details_node.attrib['id']
1210                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1211
1212         result.append("""  </functions>
1213 </book>
1214 """)
1215         for line in result:
1216             idx.write(line)
1217
1218
1219 def get_dirs(uninstalled):
1220     if uninstalled:
1221         # this does not work from buiddir!=srcdir
1222         gtkdocdir = os.path.split(sys.argv[0])[0]
1223         if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1224             # try 'srcdir' (set from makefiles) too
1225             if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1226                 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1227         styledir = gtkdocdir + '/style'
1228     else:
1229         gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1230         styledir = gtkdocdir
1231     return (gtkdocdir, styledir)
1232
1233
1234 def main(module, index_file, out_dir, uninstalled):
1235     tree = etree.parse(index_file)
1236     tree.xinclude()
1237
1238     (gtkdocdir, styledir) = get_dirs(uninstalled)
1239     # copy navigation images and stylesheets to html directory ...
1240     css_file = os.path.join(styledir, 'style.css')
1241     for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1242         shutil.copy(f, out_dir)
1243     css_file = os.path.join(out_dir, 'style.css')
1244     with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1245         css.write(HTML_FORMATTER.get_style_defs())
1246
1247     # TODO: migrate options from fixxref
1248     # TODO: do in parallel with loading the xml above.
1249     fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1250
1251     # We do multiple passes:
1252     # 1) recursively walk the tree and chunk it into a python tree so that we
1253     #   can generate navigation and link tags.
1254     files = chunk(tree.getroot())
1255     files = list(PreOrderIter(files))
1256     # 2) extract tables:
1257     # TODO: use multiprocessing
1258     # - find all 'id' attribs and add them to the link map
1259     add_id_links(files, fixxref.Links)
1260     # - build glossary dict
1261     build_glossary(files)
1262
1263     # 3) create a xxx.devhelp2 file, do this before 3), since we modify the tree
1264     create_devhelp2(out_dir, module, tree.getroot(), files)
1265     # 4) iterate the tree and output files
1266     # TODO: use multiprocessing
1267     for node in files:
1268         convert(out_dir, module, files, node)
1269
1270
1271 def run(options):
1272     logging.info('options: %s', str(options.__dict__))
1273     module = options.args[0]
1274     document = options.args[1]
1275
1276     # TODO: rename to 'html' later on
1277     # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1278     #   outputs into the working directory
1279     out_dir = os.path.join(os.path.dirname(document), 'db2html')
1280     try:
1281         os.mkdir(out_dir)
1282     except OSError as e:
1283         if e.errno != errno.EEXIST:
1284             raise
1285
1286     sys.exit(main(module, document, out_dir, options.uninstalled))