fixxref: only add glib types to nolinks if we don't have them
[gtk-doc.git] / gtkdoc / mkhtml2.py
blob4ea6bcffaee87b0a75dfcd9c6bae372aa933d3d0
1 #!/usr/bin/env python3
2 # -*- python; coding: utf-8 -*-
4 # gtk-doc - GTK DocBook documentation generator.
5 # Copyright (C) 2018 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 """Generate html from docbook
24 The tool loads the main xml document (<module>-docs.xml) and chunks it
25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
26 Each chunk is converted to html using python functions.
28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
29 this tools will replace both without relying on external tools such as xsltproc
30 and source-highlight.
32 Please note, that we're not aiming for complete docbook-xml support. All tags
33 used in the generated xml are of course handled. More tags used in handwritten
34 xml can be easilly supported, but for some combinations of tags we prefer
35 simplicity.
37 TODO:
38 - tag converters:
39 - inside 'footnote' one can have many tags, we only handle 'para'/'simpara'
40 - inside 'inlinemediaobject'/'mediaobject' a 'textobject' becomes the 'alt'
41 attr on the <img> tag of the 'imageobject'
42 - we're missing local anchors in refsect
43 - we should create id attrs on the docbook xml, instead of injecting anchors
44 in our xsl layer
45 - check each docbook tag if it can contain #PCDATA, if not don't check for
46 xml.text
47 - consider some perf-warnings flag
48 - see 'No "id" attribute on'
49 - find a better way to print context for warnings
50 - we use 'xml.sourceline', but this all does not help a lot due to xi:include
52 DIFFERENCES:
53 - titles
54 - we add the chunk label to both title in toc and tile on the page
55 - docbook xsl only sometimes adds the label to the titles and when it does it
56 adds name chunk type too (e.g. 'Part I.' instead of 'I.')
57 - navigation
58 - we always add an up-link except on the first page
60 OPTIONAL:
61 - minify html: https://pypi.python.org/pypi/htmlmin/
63 Requirements:
64 sudo pip3 install anytree lxml pygments
66 Example invocation:
67 cd tests/bugs/docs/
68 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
69 xdg-open db2html/index.html
70 meld html db2html
72 Benchmarking:
73 cd tests/bugs/docs/;
74 rm html-build.stamp; time make html-build.stamp
75 """
77 import argparse
78 import errno
79 import logging
80 import os
81 import shutil
82 import sys
84 from anytree import Node, PreOrderIter
85 from copy import deepcopy
86 from glob import glob
87 from lxml import etree
88 from pygments import highlight
89 from pygments.lexers import CLexer
90 from pygments.formatters import HtmlFormatter
91 from timeit import default_timer as timer
93 from . import config, fixxref
95 # pygments setup
96 # lazily constructed lexer cache
97 LEXERS = {
98 'c': CLexer()
100 HTML_FORMATTER = HtmlFormatter(nowrap=True)
103 class ChunkParams(object):
104 def __init__(self, prefix, parent=None, min_idx=0):
105 self.prefix = prefix
106 self.parent = parent
107 self.min_idx = min_idx
108 self.idx = 1
111 DONT_CHUNK = float('inf')
112 # docbook-xsl defines the chunk tags here.
113 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
114 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
115 # If not defined, we can just create an example without an 'id' attr and see
116 # docbook xsl does.
118 # For toc levels see http://www.sagehill.net/docbookxsl/TOCcontrol.html
119 # TODO: this list has also a flag that controls wheter we add the
120 # 'Table of Contents' heading in convert_chunk_with_toc()
121 CHUNK_PARAMS = {
122 'appendix': ChunkParams('app', 'book'),
123 'book': ChunkParams('bk'),
124 'chapter': ChunkParams('ch', 'book'),
125 'glossary': ChunkParams('go', 'book'),
126 'index': ChunkParams('ix', 'book'),
127 'part': ChunkParams('pt', 'book'),
128 'preface': ChunkParams('pr', 'book'),
129 'refentry': ChunkParams('re', 'book'),
130 'reference': ChunkParams('rn', 'book'),
131 'sect1': ChunkParams('s', 'chapter', 1),
132 'section': ChunkParams('s', 'chapter', 1),
133 'sect2': ChunkParams('s', 'sect1', DONT_CHUNK),
134 'sect3': ChunkParams('s', 'sect2', DONT_CHUNK),
135 'sect4': ChunkParams('s', 'sect3', DONT_CHUNK),
136 'sect5': ChunkParams('s', 'sect4', DONT_CHUNK),
138 # TAGS we don't support:
139 # 'article', 'bibliography', 'colophon', 'set', 'setindex'
141 TITLE_XPATHS = {
142 '_': (etree.XPath('./title'), None),
143 'book': (etree.XPath('./bookinfo/title'), None),
144 'refentry': (
145 etree.XPath('./refmeta/refentrytitle'),
146 etree.XPath('./refnamediv/refpurpose')
150 ID_XPATH = etree.XPath('//*[@id]')
152 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
153 glossary = {}
155 footnote_idx = 1
157 # nested dict with subkeys:
158 # title: textual title
159 # tag: chunk tag
160 # xml: title xml node
161 titles = {}
164 def gen_chunk_name(node, chunk_params):
165 """Generate a chunk file name
167 This is either based on the id or on the position in the doc. In the latter
168 case it uses a prefix from CHUNK_PARAMS and a sequence number for each chunk
169 type.
171 if 'id' in node.attrib:
172 return node.attrib['id']
174 name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx))
175 chunk_params.idx += 1
177 # handle parents to make names of nested tags like in docbook
178 # - we only need to prepend the parent if there are > 1 of them in the
179 # xml. None, the parents we have are not sufficient, e.g. 'index' can
180 # be in 'book' or 'part' or ... Maybe we can track the chunk_parents
181 # when we chunk explicitly and on each level maintain the 'idx'
182 # while chunk_params.parent:
183 # parent = chunk_params.parent
184 # if parent not in CHUNK_PARAMS:
185 # break;
186 # chunk_params = CHUNK_PARAMS[parent]
187 # name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx)) + name
189 logging.info('Gen chunk name: "%s"', name)
190 return name
193 def get_chunk_titles(module, node):
194 tag = node.tag
195 (title, subtitle) = TITLE_XPATHS.get(tag, TITLE_XPATHS['_'])
197 ctx = {
198 'module': module,
199 'files': [],
201 result = {
202 'title': None,
203 'title_tag': None,
204 'subtitle': None,
205 'subtitle_tag': None
207 res = title(node)
208 if res:
209 # handle chunk label for tocs
210 label = node.attrib.get('label')
211 if label:
212 label += '. '
213 else:
214 label = ''
216 xml = res[0]
217 result['title'] = label + ''.join(convert_title(ctx, xml))
218 if xml.tag != 'title':
219 result['title_tag'] = xml.tag
220 else:
221 result['title_tag'] = tag
223 if subtitle:
224 res = subtitle(node)
225 if res:
226 xml = res[0]
227 result['subtitle'] = ''.join(convert_title(ctx, xml))
228 result['subtitle_tag'] = xml.tag
229 return result
232 def chunk(xml_node, module, depth=0, idx=0, parent=None):
233 """Chunk the tree.
235 The first time, we're called with parent=None and in that case we return
236 the new_node as the root of the tree. For each tree-node we generate a
237 filename and process the children.
239 tag = xml_node.tag
240 chunk_params = CHUNK_PARAMS.get(tag)
241 if chunk_params:
242 title_args = get_chunk_titles(module, xml_node)
243 chunk_name = gen_chunk_name(xml_node, chunk_params)
245 # check idx to handle 'sect1'/'section' special casing and title-only
246 # segments
247 if idx >= chunk_params.min_idx:
248 logging.info('chunk tag: "%s"[%d]', tag, idx)
249 if parent:
250 # remove the xml-node from the parent
251 sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
252 xml_node.getparent().remove(xml_node)
253 xml_node = sub_tree
255 parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
256 idx=idx,
257 filename=chunk_name + '.html', anchor=None,
258 **title_args)
259 else:
260 parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
261 idx=idx,
262 filename=parent.filename, anchor='#' + chunk_name,
263 **title_args)
265 depth += 1
266 idx = 0
267 for child in xml_node:
268 chunk(child, module, depth, idx, parent)
269 if child.tag in CHUNK_PARAMS:
270 idx += 1
272 return parent
275 def add_id_links_and_titles(files, links):
276 for node in files:
277 chunk_name = node.filename[:-5]
278 chunk_base = node.filename + '#'
279 for elem in ID_XPATH(node.xml):
280 attr = elem.attrib['id']
281 if attr == chunk_name:
282 links[attr] = node.filename
283 else:
284 links[attr] = chunk_base + attr
286 title = TITLE_XPATHS.get(elem.tag, TITLE_XPATHS['_'])[0]
287 res = title(elem)
288 if res:
289 xml = res[0]
290 # TODO: consider to eval those lazily
291 titles[attr] = {
292 'title': etree.tostring(xml, method="text", encoding=str).strip(),
293 'xml': xml,
294 'tag': elem.tag,
298 def build_glossary(files):
299 for node in files:
300 if node.xml.tag != 'glossary':
301 continue
302 for term in GLOSSENTRY_XPATH(node.xml):
303 # TODO: there can be all kind of things in a glossary. This only supports
304 # what we commonly use
305 key = etree.tostring(term.find('glossterm'), method="text", encoding=str).strip()
306 value = etree.tostring(term.find('glossdef'), method="text", encoding=str).strip()
307 glossary[key] = value
308 # logging.debug('glosentry: %s:%s', key, value)
311 # conversion helpers
314 def convert_inner(ctx, xml, result):
315 for child in xml:
316 result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
319 def convert_ignore(ctx, xml):
320 result = []
321 convert_inner(ctx, xml, result)
322 return result
325 def convert_skip(ctx, xml):
326 return []
329 def append_text(ctx, text, result):
330 if text and ('no-strip' in ctx or text.strip()):
331 result.append(text.replace('<', '&lt;').replace('>', '&gt;'))
334 missing_tags = {}
337 def convert__unknown(ctx, xml):
338 # don't recurse on subchunks
339 if xml.tag in CHUNK_PARAMS:
340 return []
341 if isinstance(xml, etree._Comment):
342 return ['<!-- ' + xml.text + '-->\n']
343 else:
344 # warn only once
345 if xml.tag not in missing_tags:
346 logging.warning('Add tag converter for "%s"', xml.tag)
347 missing_tags[xml.tag] = True
348 result = ['<!-- ' + xml.tag + '-->\n']
349 convert_inner(ctx, xml, result)
350 result.append('<!-- /' + xml.tag + '-->\n')
351 return result
354 def convert_sect(ctx, xml, h_tag, inner_func=convert_inner):
355 result = ['<div class="%s">\n' % xml.tag]
356 title_tag = xml.find('title')
357 if title_tag is not None:
358 if 'id' in xml.attrib:
359 result.append('<a name="%s"></a>' % xml.attrib['id'])
360 result.append('<%s>%s</%s>' % (
361 h_tag, ''.join(convert_title(ctx, title_tag)), h_tag))
362 append_text(ctx, xml.text, result)
363 inner_func(ctx, xml, result)
364 result.append('</div>')
365 append_text(ctx, xml.tail, result)
366 return result
369 def xml_get_title(ctx, xml):
370 title_tag = xml.find('title')
371 if title_tag is not None:
372 return ''.join(convert_title(ctx, title_tag))
373 else:
374 logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
375 return ''
378 # docbook tags
381 def convert_abstract(ctx, xml):
382 result = ["""<div class="abstract">
383 <p class="title"><b>Abstract</b></p>"""]
384 append_text(ctx, xml.text, result)
385 convert_inner(ctx, xml, result)
386 result.append('</div>')
387 append_text(ctx, xml.tail, result)
388 return result
391 def convert_acronym(ctx, xml):
392 key = xml.text
393 title = glossary.get(key, '')
394 # TODO: print a sensible warning if missing
395 result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
396 if xml.tail:
397 result.append(xml.tail)
398 return result
401 def convert_anchor(ctx, xml):
402 return ['<a name="%s"></a>' % xml.attrib['id']]
405 def convert_bookinfo(ctx, xml):
406 result = ['<div class="titlepage">']
407 convert_inner(ctx, xml, result)
408 result.append("""<hr>
409 </div>""")
410 if xml.tail:
411 result.append(xml.tail)
412 return result
415 def convert_blockquote(ctx, xml):
416 result = ['<div class="blockquote">\n<blockquote class="blockquote">']
417 append_text(ctx, xml.text, result)
418 convert_inner(ctx, xml, result)
419 result.append('</blockquote>\n</div>')
420 append_text(ctx, xml.tail, result)
421 return result
424 def convert_code(ctx, xml):
425 result = ['<code class="%s">' % xml.tag]
426 append_text(ctx, xml.text, result)
427 convert_inner(ctx, xml, result)
428 result.append('</code>')
429 append_text(ctx, xml.tail, result)
430 return result
433 def convert_colspec(ctx, xml):
434 result = ['<col']
435 a = xml.attrib
436 if 'colname' in a:
437 result.append(' class="%s"' % a['colname'])
438 if 'colwidth' in a:
439 result.append(' width="%s"' % a['colwidth'])
440 result.append('>\n')
441 # is in tgroup and there can be no 'text'
442 return result
445 def convert_command(ctx, xml):
446 result = ['<strong class="userinput"><code>']
447 append_text(ctx, xml.text, result)
448 convert_inner(ctx, xml, result)
449 result.append('</code></strong>')
450 append_text(ctx, xml.tail, result)
451 return result
454 def convert_corpauthor(ctx, xml):
455 result = ['<div><h3 class="corpauthor">\n']
456 append_text(ctx, xml.text, result)
457 convert_inner(ctx, xml, result)
458 result.append('</h3></div>\n')
459 append_text(ctx, xml.tail, result)
460 return result
463 def convert_div(ctx, xml):
464 result = ['<div class="%s">\n' % xml.tag]
465 append_text(ctx, xml.text, result)
466 convert_inner(ctx, xml, result)
467 result.append('</div>')
468 append_text(ctx, xml.tail, result)
469 return result
472 def convert_emphasis(ctx, xml):
473 if 'role' in xml.attrib:
474 result = ['<span class="%s">' % xml.attrib['role']]
475 end = '</span>'
476 else:
477 result = ['<span class="emphasis"><em>']
478 end = '</em></span>'
479 append_text(ctx, xml.text, result)
480 convert_inner(ctx, xml, result)
481 result.append(end)
482 append_text(ctx, xml.tail, result)
483 return result
486 def convert_em_class(ctx, xml):
487 result = ['<em class="%s"><code>' % xml.tag]
488 append_text(ctx, xml.text, result)
489 convert_inner(ctx, xml, result)
490 result.append('</code></em>')
491 append_text(ctx, xml.tail, result)
492 return result
495 def convert_entry(ctx, xml):
496 entry_type = ctx['table.entry']
497 result = ['<' + entry_type]
498 if 'role' in xml.attrib:
499 result.append(' class="%s"' % xml.attrib['role'])
500 if 'morerows' in xml.attrib:
501 result.append(' rowspan="%s"' % (1 + int(xml.attrib['morerows'])))
502 result.append('>')
503 append_text(ctx, xml.text, result)
504 convert_inner(ctx, xml, result)
505 result.append('</' + entry_type + '>')
506 append_text(ctx, xml.tail, result)
507 return result
510 def convert_footnote(ctx, xml):
511 footnotes = ctx.get('footnotes', [])
512 # footnotes idx is not per page, but per doc
513 global footnote_idx
514 idx = footnote_idx
515 footnote_idx += 1
517 # need a pair of ids for each footnote (docbook generates different ids)
518 this_id = 'footnote-%d' % idx
519 that_id = 'ftn.' + this_id
521 inner = ['<div id="%s" class="footnote">' % that_id]
522 inner.append('<p><a href="#%s" class="para"><sup class="para">[%d] </sup></a>' % (
523 this_id, idx))
524 # TODO(ensonic): this can contain all kind of tags, if we convert them we'll
525 # get double nested paras :/.
526 # convert_inner(ctx, xml, inner)
527 para = xml.find('para')
528 if para is None:
529 para = xml.find('simpara')
530 if para is not None:
531 inner.append(para.text)
532 else:
533 logging.warning('%s: Unhandled footnote content: %s', xml.sourceline,
534 etree.tostring(xml, method="text", encoding=str).strip())
535 inner.append('</p></div>')
536 footnotes.append(inner)
537 ctx['footnotes'] = footnotes
538 return ['<a href="#%s" class="footnote" name="%s"><sup class="footnote">[%s]</sup></a>' % (
539 that_id, this_id, idx)]
542 def convert_formalpara(ctx, xml):
543 result = None
544 title_tag = xml.find('title')
545 result = ['<p><b>%s</b>' % ''.join(convert_title(ctx, title_tag))]
546 para_tag = xml.find('para')
547 append_text(para_tag.text, result)
548 convert_inner(ctx, para_tag, result)
549 append_text(para_tag.tail, result)
550 result.append('</p>')
551 append_text(ctx, xml.tail, result)
552 return result
555 def convert_glossdef(ctx, xml):
556 result = ['<dd class="glossdef">']
557 convert_inner(ctx, xml, result)
558 result.append('</dd>\n')
559 return result
562 def convert_glossdiv(ctx, xml):
563 title_tag = xml.find('title')
564 title = title_tag.text
565 xml.remove(title_tag)
566 result = [
567 '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
569 convert_inner(ctx, xml, result)
570 return result
573 def convert_glossentry(ctx, xml):
574 result = []
575 convert_inner(ctx, xml, result)
576 return result
579 def convert_glossterm(ctx, xml):
580 glossid = ''
581 text = ''
582 anchor = xml.find('anchor')
583 if anchor is not None:
584 glossid = anchor.attrib.get('id', '')
585 text += anchor.tail or ''
586 text += xml.text or ''
587 if glossid == '':
588 glossid = 'glossterm-' + text
589 return [
590 '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
591 glossid, text)
595 def convert_imageobject(ctx, xml):
596 imagedata = xml.find('imagedata')
597 if imagedata is not None:
598 # TODO(ensonic): warn on missing fileref attr?
599 return ['<img src="%s">' % imagedata.attrib.get('fileref', '')]
600 else:
601 return []
604 def convert_indexdiv(ctx, xml):
605 title_tag = xml.find('title')
606 title = title_tag.text
607 xml.remove(title_tag)
608 result = [
609 '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
611 convert_inner(ctx, xml, result)
612 return result
615 def convert_informaltable(ctx, xml):
616 result = ['<div class="informaltable"><table class="informaltable"']
617 a = xml.attrib
618 if 'pgwide' in a and a['pgwide'] == '1':
619 result.append(' width="100%"')
620 if 'frame' in a and a['frame'] == 'none':
621 result.append(' border="0"')
622 result.append('>\n')
623 convert_inner(ctx, xml, result)
624 result.append('</table></div>')
625 if xml.tail:
626 result.append(xml.tail)
627 return result
630 def convert_inlinegraphic(ctx, xml):
631 # TODO(ensonic): warn on missing fileref attr?
632 return ['<img src="%s">' % xml.attrib.get('fileref', '')]
635 def convert_itemizedlist(ctx, xml):
636 result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
637 convert_inner(ctx, xml, result)
638 result.append('</ul></div>')
639 if xml.tail:
640 result.append(xml.tail)
641 return result
644 def convert_link(ctx, xml):
645 linkend = xml.attrib['linkend']
646 result = []
647 if linkend:
648 link_text = []
649 append_text(ctx, xml.text, link_text)
650 convert_inner(ctx, xml, link_text)
651 text = ''.join(link_text)
653 (tid, href) = fixxref.GetXRef(linkend)
654 if href:
655 title_attr = ''
656 title = titles.get(tid)
657 if title:
658 title_attr = ' title="%s"' % title['title']
660 href = fixxref.MakeRelativeXRef(ctx['module'], href)
661 result = ['<a href="%s"%s>%s</a>' % (href, title_attr, text)]
662 else:
663 # TODO: filename is for the output and xml.sourceline is on the masterdoc ...
664 fixxref.ReportBadXRef(ctx['node'].filename, 0, linkend, text)
665 result = [text]
666 else:
667 append_text(ctx, xml.text, result)
668 convert_inner(ctx, xml, result)
669 append_text(ctx, xml.tail, result)
670 return result
673 def convert_listitem(ctx, xml):
674 result = ['<li class="listitem">']
675 convert_inner(ctx, xml, result)
676 result.append('</li>')
677 # is in itemizedlist and there can be no 'text'
678 return result
681 def convert_literallayout(ctx, xml):
682 result = ['<div class="literallayout"><p><br>\n']
683 append_text(ctx, xml.text, result)
684 convert_inner(ctx, xml, result)
685 result.append('</p></div>')
686 append_text(ctx, xml.tail, result)
687 return result
690 def convert_orderedlist(ctx, xml):
691 result = ['<div class="orderedlist"><ol class="orderedlist" type="1">']
692 convert_inner(ctx, xml, result)
693 result.append('</ol></div>')
694 append_text(ctx, xml.tail, result)
695 return result
698 def convert_para(ctx, xml):
699 result = []
700 if 'role' in xml.attrib:
701 result.append('<p class="%s">' % xml.attrib['role'])
702 else:
703 result.append('<p>')
704 if 'id' in xml.attrib:
705 result.append('<a name="%s"></a>' % xml.attrib['id'])
706 append_text(ctx, xml.text, result)
707 convert_inner(ctx, xml, result)
708 result.append('</p>')
709 append_text(ctx, xml.tail, result)
710 return result
713 def convert_para_like(ctx, xml):
714 result = []
715 if 'id' in xml.attrib:
716 result.append('<a name="%s"></a>' % xml.attrib['id'])
717 result.append('<p class="%s">' % xml.tag)
718 append_text(ctx, xml.text, result)
719 convert_inner(ctx, xml, result)
720 result.append('</p>')
721 append_text(ctx, xml.tail, result)
722 return result
725 def convert_phrase(ctx, xml):
726 result = ['<span']
727 if 'role' in xml.attrib:
728 result.append(' class="%s">' % xml.attrib['role'])
729 else:
730 result.append('>')
731 append_text(ctx, xml.text, result)
732 convert_inner(ctx, xml, result)
733 result.append('</span>')
734 append_text(ctx, xml.tail, result)
735 return result
738 def convert_primaryie(ctx, xml):
739 result = ['<dt>\n']
740 convert_inner(ctx, xml, result)
741 result.append('\n</dt>\n<dd></dd>\n')
742 return result
745 def convert_pre(ctx, xml):
746 # Since we're inside <pre> don't skip newlines
747 ctx['no-strip'] = True
748 result = ['<pre class="%s">' % xml.tag]
749 append_text(ctx, xml.text, result)
750 convert_inner(ctx, xml, result)
751 result.append('</pre>')
752 del ctx['no-strip']
753 append_text(ctx, xml.tail, result)
754 return result
757 def convert_programlisting(ctx, xml):
758 result = []
759 if xml.attrib.get('role', '') == 'example':
760 if xml.text:
761 lang = xml.attrib.get('language', 'c').lower()
762 if lang not in LEXERS:
763 LEXERS[lang] = get_lexer_by_name(lang)
764 lexer = LEXERS.get(lang, None)
765 if lexer:
766 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
768 # we do own line-numbering
769 line_count = highlighted.count('\n')
770 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
771 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
772 <tbody>
773 <tr>
774 <td class="listing_lines" align="right"><pre>%s</pre></td>
775 <td class="listing_code"><pre class="programlisting">%s</pre></td>
776 </tr>
777 </tbody>
778 </table>
779 """ % (source_lines, highlighted))
780 else:
781 logging.warn('No pygments lexer for language="%s"', lang)
782 result.append('<pre class="programlisting">')
783 result.append(xml.text)
784 result.append('</pre>')
785 else:
786 result.append('<pre class="programlisting">')
787 append_text(ctx, xml.text, result)
788 convert_inner(ctx, xml, result)
789 result.append('</pre>')
790 append_text(ctx, xml.tail, result)
791 return result
794 def convert_quote(ctx, xml):
795 result = ['<span class="quote">"<span class="quote">']
796 append_text(ctx, xml.text, result)
797 convert_inner(ctx, xml, result)
798 result.append('</span>"</span>')
799 append_text(ctx, xml.tail, result)
800 return result
803 def convert_refsect1(ctx, xml):
804 # Add a divider between two consequitive refsect2
805 def convert_inner(ctx, xml, result):
806 prev = None
807 for child in xml:
808 if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
809 result.append('<hr>\n')
810 result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
811 prev = child
812 return convert_sect(ctx, xml, 'h2', convert_inner)
815 def convert_refsect2(ctx, xml):
816 return convert_sect(ctx, xml, 'h3')
819 def convert_refsect3(ctx, xml):
820 return convert_sect(ctx, xml, 'h4')
823 def convert_row(ctx, xml):
824 result = ['<tr>\n']
825 convert_inner(ctx, xml, result)
826 result.append('</tr>\n')
827 return result
830 def convert_sect1_tag(ctx, xml):
831 return convert_sect(ctx, xml, 'h2')
834 def convert_sect2(ctx, xml):
835 return convert_sect(ctx, xml, 'h3')
838 def convert_sect3(ctx, xml):
839 return convert_sect(ctx, xml, 'h4')
842 def convert_simpara(ctx, xml):
843 result = ['<p>']
844 append_text(ctx, xml.text, result)
845 convert_inner(ctx, xml, result)
846 result.append('</p>')
847 append_text(ctx, xml.tail, result)
848 return result
851 def convert_span(ctx, xml):
852 result = ['<span class="%s">' % xml.tag]
853 append_text(ctx, xml.text, result)
854 convert_inner(ctx, xml, result)
855 result.append('</span>')
856 append_text(ctx, xml.tail, result)
857 return result
860 def convert_table(ctx, xml):
861 result = ['<div class="table">']
862 if 'id' in xml.attrib:
863 result.append('<a name="%s"></a>' % xml.attrib['id'])
864 title_tag = xml.find('title')
865 if title_tag is not None:
866 result.append('<p class="title"><b>')
867 # TODO(ensonic): Add a 'Table X. ' prefix, needs a table counter
868 result.extend(convert_title(ctx, title_tag))
869 result.append('</b></p>')
870 result.append('<div class="table-contents"><table class="table" summary="g_object_new" border="1">')
872 convert_inner(ctx, xml, result)
874 result.append('</table></div></div>')
875 append_text(ctx, xml.tail, result)
876 return result
879 def convert_tbody(ctx, xml):
880 result = ['<tbody>']
881 ctx['table.entry'] = 'td'
882 convert_inner(ctx, xml, result)
883 result.append('</tbody>')
884 # is in tgroup and there can be no 'text'
885 return result
888 def convert_tgroup(ctx, xml):
889 # tgroup does not expand to anything, but the nested colspecs need to
890 # be put into a colgroup
891 cols = xml.findall('colspec')
892 result = []
893 if cols:
894 result.append('<colgroup>\n')
895 for col in cols:
896 result.extend(convert_colspec(ctx, col))
897 xml.remove(col)
898 result.append('</colgroup>\n')
899 convert_inner(ctx, xml, result)
900 # is in informaltable and there can be no 'text'
901 return result
904 def convert_thead(ctx, xml):
905 result = ['<thead>']
906 ctx['table.entry'] = 'th'
907 convert_inner(ctx, xml, result)
908 result.append('</thead>')
909 # is in tgroup and there can be no 'text'
910 return result
913 def convert_title(ctx, xml):
914 # This is always explicitly called from some context
915 result = []
916 append_text(ctx, xml.text, result)
917 convert_inner(ctx, xml, result)
918 append_text(ctx, xml.tail, result)
919 return result
922 def convert_ulink(ctx, xml):
923 if xml.text:
924 result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
925 else:
926 url = xml.attrib['url']
927 result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, url, url)]
928 append_text(ctx, xml.tail, result)
929 return result
932 def convert_userinput(ctx, xml):
933 result = ['<span class="command"><strong>']
934 append_text(ctx, xml.text, result)
935 convert_inner(ctx, xml, result)
936 result.append('</strong></span>')
937 append_text(ctx, xml.tail, result)
938 return result
941 def convert_variablelist(ctx, xml):
942 result = ["""<div class="variablelist"><table border="0" class="variablelist">
943 <colgroup>
944 <col align="left" valign="top">
945 <col>
946 </colgroup>
947 <tbody>"""]
948 convert_inner(ctx, xml, result)
949 result.append("""</tbody>
950 </table></div>""")
951 return result
954 def convert_varlistentry(ctx, xml):
955 result = ['<tr>']
957 result.append('<td><p>')
958 term = xml.find('term')
959 result.extend(convert_span(ctx, term))
960 result.append('</p></td>')
962 result.append('<td>')
963 listitem = xml.find('listitem')
964 convert_inner(ctx, listitem, result)
965 result.append('</td>')
967 result.append('<tr>')
968 return result
971 def convert_xref(ctx, xml):
972 linkend = xml.attrib['linkend']
973 (tid, href) = fixxref.GetXRef(linkend)
974 title = titles.get(tid)
975 # all sectN need to become 'section
976 tag = title['tag']
977 tag = {
978 'sect1': 'section',
979 'sect2': 'section',
980 'sect3': 'section',
981 'sect4': 'section',
982 'sect5': 'section',
983 }.get(tag, tag)
984 result = [
985 '<a class="xref" href="%s" title="%s">the %s called “%s”</a>' %
986 (href, title['title'], tag, ''.join(convert_title(ctx, title['xml'])))
989 append_text(ctx, xml.tail, result)
990 return result
993 # TODO(ensonic): turn into class with converters as functions and ctx as self
994 convert_tags = {
995 'abstract': convert_abstract,
996 'acronym': convert_acronym,
997 'anchor': convert_anchor,
998 'application': convert_span,
999 'bookinfo': convert_bookinfo,
1000 'blockquote': convert_blockquote,
1001 'caption': convert_div,
1002 'code': convert_code,
1003 'colspec': convert_colspec,
1004 'constant': convert_code,
1005 'command': convert_command,
1006 'corpauthor': convert_corpauthor,
1007 'emphasis': convert_emphasis,
1008 'entry': convert_entry,
1009 'envar': convert_code,
1010 'footnote': convert_footnote,
1011 'filename': convert_code,
1012 'formalpara': convert_formalpara,
1013 'function': convert_code,
1014 'glossdef': convert_glossdef,
1015 'glossdiv': convert_glossdiv,
1016 'glossentry': convert_glossentry,
1017 'glossterm': convert_glossterm,
1018 'imageobject': convert_imageobject,
1019 'indexdiv': convert_indexdiv,
1020 'indexentry': convert_ignore,
1021 'indexterm': convert_skip,
1022 'informalexample': convert_div,
1023 'informaltable': convert_informaltable,
1024 'inlinegraphic': convert_inlinegraphic,
1025 'inlinemediaobject': convert_span,
1026 'itemizedlist': convert_itemizedlist,
1027 'legalnotice': convert_div,
1028 'link': convert_link,
1029 'listitem': convert_listitem,
1030 'literal': convert_code,
1031 'literallayout': convert_literallayout,
1032 'mediaobject': convert_div,
1033 'note': convert_div,
1034 'option': convert_code,
1035 'orderedlist': convert_orderedlist,
1036 'para': convert_para,
1037 'partintro': convert_div,
1038 'parameter': convert_em_class,
1039 'phrase': convert_phrase,
1040 'primaryie': convert_primaryie,
1041 'programlisting': convert_programlisting,
1042 'quote': convert_quote,
1043 'releaseinfo': convert_para_like,
1044 'refsect1': convert_refsect1,
1045 'refsect2': convert_refsect2,
1046 'refsect3': convert_refsect3,
1047 'replaceable': convert_em_class,
1048 'returnvalue': convert_span,
1049 'row': convert_row,
1050 'screen': convert_pre,
1051 'sect1': convert_sect1_tag,
1052 'sect2': convert_sect2,
1053 'sect3': convert_sect3,
1054 'simpara': convert_simpara,
1055 'structfield': convert_em_class,
1056 'structname': convert_span,
1057 'synopsis': convert_pre,
1058 'symbol': convert_span,
1059 'table': convert_table,
1060 'tbody': convert_tbody,
1061 'term': convert_span,
1062 'tgroup': convert_tgroup,
1063 'thead': convert_thead,
1064 'title': convert_skip,
1065 'type': convert_span,
1066 'ulink': convert_ulink,
1067 'userinput': convert_userinput,
1068 'varname': convert_code,
1069 'variablelist': convert_variablelist,
1070 'varlistentry': convert_varlistentry,
1071 'warning': convert_div,
1072 'xref': convert_xref,
1075 # conversion helpers
1077 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
1078 <html>
1079 <head>
1080 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
1081 <title>%s</title>
1082 %s<link rel="stylesheet" href="style.css" type="text/css">
1083 </head>
1084 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
1088 def generate_head_links(ctx):
1089 n = ctx['nav_home']
1090 result = [
1091 '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.title)
1093 if 'nav_up' in ctx:
1094 n = ctx['nav_up']
1095 result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.title))
1096 if 'nav_prev' in ctx:
1097 n = ctx['nav_prev']
1098 result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.title))
1099 if 'nav_next' in ctx:
1100 n = ctx['nav_next']
1101 result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.title))
1102 return ''.join(result)
1105 def generate_nav_links(ctx):
1106 n = ctx['nav_home']
1107 result = [
1108 '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
1110 if 'nav_up' in ctx:
1111 n = ctx['nav_up']
1112 result.append(
1113 '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
1114 else:
1115 result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
1116 if 'nav_prev' in ctx:
1117 n = ctx['nav_prev']
1118 result.append(
1119 '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
1120 else:
1121 result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
1122 if 'nav_next' in ctx:
1123 n = ctx['nav_next']
1124 result.append(
1125 '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
1126 else:
1127 result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
1129 return ''.join(result)
1132 def generate_toc(ctx, node):
1133 result = []
1134 for c in node.children:
1135 # TODO: urlencode the filename: urllib.parse.quote_plus()
1136 link = c.filename
1137 if c.anchor:
1138 link += c.anchor
1139 result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
1140 c.title_tag, link, c.title))
1141 if c.subtitle:
1142 result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
1143 result.append('</dt>\n')
1144 if c.children:
1145 result.append('<dd><dl>')
1146 result.extend(generate_toc(ctx, c))
1147 result.append('</dl></dd>')
1148 return result
1151 def generate_basic_nav(ctx):
1152 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1153 <tr valign="middle">
1154 <td width="100%%" align="left" class="shortcuts"></td>
1156 </tr>
1157 </table>
1158 """ % generate_nav_links(ctx)
1161 def generate_alpha_nav(ctx, divs, prefix, span_id):
1162 ix_nav = []
1163 for s in divs:
1164 title = xml_get_title(ctx, s)
1165 ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
1167 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1168 <tr valign="middle">
1169 <td width="100%%" align="left" class="shortcuts">
1170 <span id="nav_%s">
1172 </span>
1173 </td>
1175 </tr>
1176 </table>
1177 """ % (span_id, '\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
1180 def generate_refentry_nav(ctx, refsect1s, result):
1181 result.append("""<table class="navigation" id="top" width="100%" cellpadding="2" cellspacing="5">
1182 <tr valign="middle">
1183 <td width="100%" align="left" class="shortcuts">
1184 <a href="#" class="shortcut">Top</a>""")
1186 for s in refsect1s:
1187 # don't list TOC sections (role="xxx_proto")
1188 if s.attrib.get('role', '').endswith("_proto"):
1189 continue
1190 # skip section without 'id' attrs
1191 if 'id' not in s.attrib:
1192 continue
1194 ref_id = s.attrib['id']
1195 # skip foreign sections
1196 if '.' not in ref_id:
1197 continue
1199 title = xml_get_title(ctx, s)
1200 span_id = ref_id.split('.')[1].replace('-', '_')
1202 result.append("""
1203 <span id="nav_%s">
1204    <span class="dim">|</span> 
1205 <a href="#%s" class="shortcut">%s</a>
1206 </span>
1207 """ % (span_id, ref_id, title))
1208 result.append("""
1209 </td>
1211 </tr>
1212 </table>
1213 """ % generate_nav_links(ctx))
1216 def generate_footer(ctx):
1217 result = []
1218 if 'footnotes' in ctx:
1219 result.append("""<div class="footnotes">\n
1220 <br><hr style="width:100; text-align:left;margin-left: 0">
1221 """)
1222 for f in ctx['footnotes']:
1223 result.extend(f)
1224 result.append('</div>\n')
1225 return result
1228 def get_id_path(node):
1229 """ Generate the 'id'.
1230 We need to walk up the xml-tree and check the positions for each sibling.
1231 When reaching the top of the tree we collect remaining index entries from
1232 the chunked-tree.
1234 ix = []
1235 xml = node.xml
1236 parent = xml.getparent()
1237 while parent is not None:
1238 children = parent.getchildren()
1239 ix.insert(0, str(children.index(xml) + 1))
1240 xml = parent
1241 parent = xml.getparent()
1242 while node is not None:
1243 ix.insert(0, str(node.idx + 1))
1244 node = node.parent
1246 return ix
1249 def get_id(node):
1250 xml = node.xml
1251 node_id = xml.attrib.get('id', None)
1252 if node_id:
1253 return node_id
1255 # TODO: this is moot if nothing links to it, we could also consider to omit
1256 # the <a name="$id"></a> tag.
1257 logging.info('%d: No "id" attribute on "%s", generating one',
1258 xml.sourceline, xml.tag)
1259 ix = get_id_path(node)
1260 # logging.warning('%s: id indexes: %s', node.filename, str(ix))
1261 return 'id-' + '.'.join(ix)
1264 def convert_chunk_with_toc(ctx, div_class, title_tag):
1265 node = ctx['node']
1266 result = [
1267 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1268 generate_basic_nav(ctx),
1269 '<div class="%s">' % div_class,
1271 if node.title:
1272 result.append("""
1273 <div class="titlepage">
1274 <%s class="title"><a name="%s"></a>%s</%s>
1275 </div>""" % (
1276 title_tag, get_id(node), node.title, title_tag))
1278 toc = generate_toc(ctx, node)
1279 if toc:
1280 # TODO: not all docbook page types use this extra heading
1281 result.append("""<p><b>Table of Contents</b></p>
1282 <div class="toc">
1283 <dl class="toc">
1284 """)
1285 result.extend(toc)
1286 result.append("""</dl>
1287 </div>
1288 """)
1289 convert_inner(ctx, node.xml, result)
1290 result.extend(generate_footer(ctx))
1291 result.append("""</div>
1292 </body>
1293 </html>""")
1294 return result
1297 # docbook chunks
1300 def convert_book(ctx):
1301 node = ctx['node']
1302 result = [
1303 HTML_HEADER % (node.title, generate_head_links(ctx)),
1304 """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
1305 <tr><th valign="middle"><p class="title">%s</p></th></tr>
1306 </table>
1307 <div class="book">
1308 """ % node.title
1310 bookinfo = node.xml.findall('bookinfo')[0]
1311 result.extend(convert_bookinfo(ctx, bookinfo))
1312 result.append("""<div class="toc">
1313 <dl class="toc">
1314 """)
1315 result.extend(generate_toc(ctx, node.root))
1316 result.append("""</dl>
1317 </div>
1318 """)
1319 result.extend(generate_footer(ctx))
1320 result.append("""</div>
1321 </body>
1322 </html>""")
1323 return result
1326 def convert_chapter(ctx):
1327 return convert_chunk_with_toc(ctx, 'chapter', 'h2')
1330 def convert_glossary(ctx):
1331 node = ctx['node']
1332 glossdivs = node.xml.findall('glossdiv')
1334 result = [
1335 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1336 generate_alpha_nav(ctx, glossdivs, 'gls', 'glossary'),
1337 """<div class="glossary">
1338 <div class="titlepage"><h%1d class="title">
1339 <a name="%s"></a>%s</h%1d>
1340 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1342 for i in glossdivs:
1343 result.extend(convert_glossdiv(ctx, i))
1344 result.extend(generate_footer(ctx))
1345 result.append("""</div>
1346 </body>
1347 </html>""")
1348 return result
1351 def convert_index(ctx):
1352 node = ctx['node']
1353 # Get all indexdivs under indexdiv
1354 indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1356 result = [
1357 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1358 generate_alpha_nav(ctx, indexdivs, 'idx', 'index'),
1359 """<div class="index">
1360 <div class="titlepage"><h%1d class="title">
1361 <a name="%s"></a>%s</h%1d>
1362 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1364 for i in indexdivs:
1365 result.extend(convert_indexdiv(ctx, i))
1366 result.extend(generate_footer(ctx))
1367 result.append("""</div>
1368 </body>
1369 </html>""")
1370 return result
1373 def convert_part(ctx):
1374 return convert_chunk_with_toc(ctx, 'part', 'h1')
1377 def convert_preface(ctx):
1378 node = ctx['node']
1379 result = [
1380 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1381 generate_basic_nav(ctx),
1382 '<div class="preface">'
1384 if node.title:
1385 result.append("""
1386 <div class="titlepage">
1387 <h2 class="title"><a name="%s"></a>%s</h2>
1388 </div>""" % (get_id(node), node.title))
1389 convert_inner(ctx, node.xml, result)
1390 result.extend(generate_footer(ctx))
1391 result.append("""</div>
1392 </body>
1393 </html>""")
1394 return result
1397 def convert_reference(ctx):
1398 return convert_chunk_with_toc(ctx, 'reference', 'h1')
1401 def convert_refentry(ctx):
1402 node = ctx['node']
1403 node_id = get_id(node)
1404 refsect1s = node.xml.findall('refsect1')
1406 gallery = ''
1407 refmeta = node.xml.find('refmeta')
1408 if refmeta is not None:
1409 refmiscinfo = refmeta.find('refmiscinfo')
1410 if refmiscinfo is not None:
1411 inlinegraphic = refmiscinfo.find('inlinegraphic')
1412 if inlinegraphic is not None:
1413 gallery = ''.join(convert_inlinegraphic(ctx, inlinegraphic))
1415 result = [
1416 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1418 generate_refentry_nav(ctx, refsect1s, result)
1419 result.append("""
1420 <div class="refentry">
1421 <a name="%s"></a>
1422 <div class="refnamediv">
1423 <table width="100%%"><tr>
1424 <td valign="top">
1425 <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1426 <p>%s %s</p>
1427 </td>
1428 <td class="gallery_image" valign="top" align="right">%s</td>
1429 </tr></table>
1430 </div>
1431 """ % (node_id, node_id, node.title, node.title, node.subtitle, gallery))
1433 for s in refsect1s:
1434 result.extend(convert_refsect1(ctx, s))
1435 result.extend(generate_footer(ctx))
1436 result.append("""</div>
1437 </body>
1438 </html>""")
1439 return result
1442 def convert_sect1(ctx):
1443 return convert_chunk_with_toc(ctx, 'sect1', 'h2')
1446 # TODO(ensonic): turn into class with converters as functions and ctx as self
1447 convert_chunks = {
1448 'book': convert_book,
1449 'chapter': convert_chapter,
1450 'glossary': convert_glossary,
1451 'index': convert_index,
1452 'part': convert_part,
1453 'preface': convert_preface,
1454 'reference': convert_reference,
1455 'refentry': convert_refentry,
1456 'sect1': convert_sect1,
1460 def generate_nav_nodes(files, node):
1461 nav = {
1462 'nav_home': node.root,
1464 # nav params: up, prev, next
1465 if node.parent:
1466 nav['nav_up'] = node.parent
1467 ix = files.index(node)
1468 if ix > 0:
1469 nav['nav_prev'] = files[ix - 1]
1470 if ix < len(files) - 1:
1471 nav['nav_next'] = files[ix + 1]
1472 return nav
1475 def convert(out_dir, module, files, node):
1476 """Convert the docbook chunks to a html file.
1478 Args:
1479 out_dir: already created output dir
1480 files: list of nodes in the tree in pre-order
1481 node: current tree node
1484 logging.info('Writing: %s', node.filename)
1485 with open(os.path.join(out_dir, node.filename), 'wt',
1486 newline='\n', encoding='utf-8') as html:
1487 ctx = {
1488 'module': module,
1489 'files': files,
1490 'node': node,
1492 ctx.update(generate_nav_nodes(files, node))
1494 if node.name in convert_chunks:
1495 for line in convert_chunks[node.name](ctx):
1496 html.write(line)
1497 else:
1498 logging.warning('Add converter/template for "%s"', node.name)
1501 def create_devhelp2_toc(node):
1502 result = []
1503 for c in node.children:
1504 if c.children:
1505 result.append('<sub name="%s" link="%s">\n' % (c.title, c.filename))
1506 result.extend(create_devhelp2_toc(c))
1507 result.append('</sub>\n')
1508 else:
1509 result.append('<sub name="%s" link="%s"/>\n' % (c.title, c.filename))
1510 return result
1513 def create_devhelp2_condition_attribs(node):
1514 if 'condition' in node.attrib:
1515 # condition -> since, deprecated, ... (separated with '|')
1516 cond = node.attrib['condition'].replace('"', '&quot;').split('|')
1517 keywords = []
1518 for c in cond:
1519 if ':' in c:
1520 keywords.append('{}="{}"'.format(*c.split(':', 1)))
1521 else:
1522 # deprecated can have no description
1523 keywords.append('{}="{}"'.format(c, ''))
1524 return ' ' + ' '.join(keywords)
1525 else:
1526 return ''
1529 def create_devhelp2_refsect2_keyword(node, base_link):
1530 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1531 node.attrib['role'], xml_get_title({}, node), base_link + node.attrib['id'],
1532 create_devhelp2_condition_attribs(node))
1535 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1536 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1537 node.attrib['role'], title, base_link + name,
1538 create_devhelp2_condition_attribs(node))
1541 def create_devhelp2(out_dir, module, xml, files):
1542 with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1543 newline='\n', encoding='utf-8') as idx:
1544 bookinfo_nodes = xml.xpath('/book/bookinfo')
1545 title = ''
1546 if bookinfo_nodes is not None:
1547 bookinfo = bookinfo_nodes[0]
1548 title = bookinfo.xpath('./title/text()')[0]
1549 online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1550 # TODO: support author too (see devhelp2.xsl)
1551 # TODO: fixxref uses '--src-lang' to set the language
1552 result = [
1553 """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1554 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1555 <chapters>
1556 """ % (title, module, online_url)
1558 # toc
1559 result.extend(create_devhelp2_toc(files[0].root))
1560 result.append(""" </chapters>
1561 <functions>
1562 """)
1563 # keywords from all refsect2 and refsect3
1564 refsect2 = etree.XPath('//refsect2[@role]')
1565 refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1566 refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1567 refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1568 refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1569 for node in files:
1570 base_link = node.filename + '#'
1571 refsect2_nodes = refsect2(node.xml)
1572 for refsect2_node in refsect2_nodes:
1573 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1574 refsect3_nodes = refsect3_enum(refsect2_node)
1575 for refsect3_node in refsect3_nodes:
1576 details_node = refsect3_enum_details(refsect3_node)[0]
1577 name = details_node.attrib['id']
1578 result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1579 refsect3_nodes = refsect3_struct(refsect2_node)
1580 for refsect3_node in refsect3_nodes:
1581 details_node = refsect3_struct_details(refsect3_node)[0]
1582 name = details_node.attrib['id']
1583 result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1585 result.append(""" </functions>
1586 </book>
1587 """)
1588 for line in result:
1589 idx.write(line)
1592 def get_dirs(uninstalled):
1593 if uninstalled:
1594 # this does not work from buiddir!=srcdir
1595 gtkdocdir = os.path.split(sys.argv[0])[0]
1596 if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1597 # try 'srcdir' (set from makefiles) too
1598 if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1599 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1600 styledir = gtkdocdir + '/style'
1601 else:
1602 gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1603 styledir = gtkdocdir
1604 return (gtkdocdir, styledir)
1607 def main(module, index_file, out_dir, uninstalled):
1609 # == Loading phase ==
1610 # the next 3 steps could be done in paralel
1612 # 1) load the docuemnt
1613 _t = timer()
1614 # does not seem to be faster
1615 # parser = etree.XMLParser(collect_ids=False)
1616 # tree = etree.parse(index_file, parser)
1617 tree = etree.parse(index_file)
1618 tree.xinclude()
1619 logging.warning("1: %7.3lf: load doc", timer() - _t)
1621 # 2) copy datafiles
1622 _t = timer()
1623 # TODO: handle additional images
1624 (gtkdocdir, styledir) = get_dirs(uninstalled)
1625 # copy navigation images and stylesheets to html directory ...
1626 css_file = os.path.join(styledir, 'style.css')
1627 for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1628 shutil.copy(f, out_dir)
1629 css_file = os.path.join(out_dir, 'style.css')
1630 with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1631 css.write(HTML_FORMATTER.get_style_defs())
1632 logging.warning("2: %7.3lf: copy datafiles", timer() - _t)
1634 # 3) load xref targets
1635 _t = timer()
1636 # TODO: migrate options from fixxref
1637 # TODO: ideally explicity specify the files we need, this will save us the
1638 # globbing and we'll load less files.
1639 fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1640 logging.warning("3: %7.3lf: load xrefs", timer() - _t)
1642 # == Processing phase ==
1644 # 4) recursively walk the tree and chunk it into a python tree so that we
1645 # can generate navigation and link tags.
1646 _t = timer()
1647 files = chunk(tree.getroot(), module)
1648 files = [f for f in PreOrderIter(files) if f.anchor is None]
1649 logging.warning("4: %7.3lf: chunk doc", timer() - _t)
1651 # 5) extract tables:
1652 _t = timer()
1653 # TODO: can be done in parallel
1654 # - find all 'id' attribs and add them to the link map
1655 # - .. get their titles and store them into the titles map
1656 add_id_links_and_titles(files, fixxref.Links)
1657 # - build glossary dict
1658 build_glossary(files)
1659 logging.warning("5: %7.3lf: extract tables", timer() - _t)
1661 # == Output phase ==
1662 # the next two step could be done in parllel
1664 # 6) create a xxx.devhelp2 file
1665 _t = timer()
1666 create_devhelp2(out_dir, module, tree.getroot(), files)
1667 logging.warning("6: %7.3lf: create devhelp2", timer() - _t)
1669 # 7) iterate the tree and output files
1670 _t = timer()
1671 # TODO: can be done in parallel, figure out why this is not faster
1672 # from multiprocessing.pool import Pool
1673 # with Pool(4) as p:
1674 # p.apply_async(convert, args=(out_dir, module, files))
1675 # from multiprocessing.pool import ThreadPool
1676 # with ThreadPool(4) as p:
1677 # p.apply_async(convert, args=(out_dir, module, files))
1678 for node in files:
1679 convert(out_dir, module, files, node)
1680 logging.warning("7: %7.3lf: create html", timer() - _t)
1683 def run(options):
1684 logging.info('options: %s', str(options.__dict__))
1685 module = options.args[0]
1686 document = options.args[1]
1688 # TODO: rename to 'html' later on
1689 # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1690 # outputs into the working directory
1691 out_dir = os.path.join(os.path.dirname(document), 'db2html')
1692 try:
1693 os.mkdir(out_dir)
1694 except OSError as e:
1695 if e.errno != errno.EEXIST:
1696 raise
1698 sys.exit(main(module, document, out_dir, options.uninstalled))