mkhtml2: Specify the line-endings and the encoding
[gtk-doc.git] / gtkdoc / mkhtml2.py
blob5f8e7fe6fb976b05124e0928e1cb13cbce9fc831
1 #!/usr/bin/env python3
2 # -*- python; coding: utf-8 -*-
4 # gtk-doc - GTK DocBook documentation generator.
5 # Copyright (C) 2018 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 """Generate html from docbook
24 The tool loads the main xml document (<module>-docs.xml) and chunks it
25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
26 Each chunk is converted to html using python functions.
28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
29 this tools will replace both without relying on external tools such as xsltproc
30 and source-highlight.
32 Please note, that we're not aiming for complete docbook-xml support. All tags
33 used in the generated xml are of course handled. More tags used in handwritten
34 xml can be easilly supported, but for some combinations of tags we prefer
35 simplicity.
37 TODO:
38 - more chunk converters
39 - more tag converters:
40 - footnote: maybe track those in ctx and write them out at the end of the chunk
41 - inside 'inlinemediaobject'/'mediaobject' a 'textobject' becomes the 'alt'
42 attr on the <img> tag of the 'imageobject'
43 - check each docbook tag if it can contain #PCDATA, if not don't check for
44 xml.text
45 - consider some perf-warnings flag
46 - see 'No "id" attribute on'
48 OPTIONAL:
49 - minify html: https://pypi.python.org/pypi/htmlmin/
51 Requirements:
52 sudo pip3 install anytree lxml pygments
54 Example invocation:
55 cd tests/bugs/docs/
56 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
57 xdg-open db2html/index.html
58 meld html db2html
60 Benchmarking:
61 cd tests/bugs/docs/;
62 rm html-build.stamp; time make html-build.stamp
63 """
65 import argparse
66 import errno
67 import logging
68 import os
69 import shutil
70 import sys
72 from anytree import Node, PreOrderIter
73 from copy import deepcopy
74 from glob import glob
75 from lxml import etree
76 from pygments import highlight
77 from pygments.lexers import CLexer
78 from pygments.formatters import HtmlFormatter
80 from . import config, fixxref
82 # pygments setup
83 # lazily constructed lexer cache
84 LEXERS = {
85 'c': CLexer()
87 HTML_FORMATTER = HtmlFormatter(nowrap=True)
89 # http://www.sagehill.net/docbookxsl/Chunking.html
90 CHUNK_TAGS = [
91 'appendix',
92 'article',
93 'bibliography', # in article or book
94 'book',
95 'chapter',
96 'colophon',
97 'glossary', # in article or book
98 'index', # in article or book
99 'part',
100 'preface',
101 'refentry',
102 'reference',
103 'sect1', # except first
104 'section', # if equivalent to sect1
105 'set',
106 'setindex',
110 class ChunkParams(object):
111 def __init__(self, prefix, parent=None):
112 self.prefix = prefix
113 self.parent = None
114 self.count = 0
117 # TODO: look up the abbrevs and hierarchy for other tags
118 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
119 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
121 # If not defined, we can just create an example without an 'id' attr and see
122 # docbook xsl does.
123 CHUNK_PARAMS = {
124 'appendix': ChunkParams('app', 'book'),
125 'book': ChunkParams('bk'),
126 'chapter': ChunkParams('ch', 'book'),
127 'index': ChunkParams('ix', 'book'),
128 'part': ChunkParams('pt', 'book'),
129 'preface': ChunkParams('pr', 'book'),
130 'sect1': ChunkParams('s', 'chapter'),
131 'section': ChunkParams('s', 'chapter'),
134 TITLE_XPATHS = {
135 '_': (etree.XPath('./title'), None),
136 'book': (etree.XPath('./bookinfo/title'), None),
137 'refentry': (
138 etree.XPath('./refmeta/refentrytitle'),
139 etree.XPath('./refnamediv/refpurpose')
143 ID_XPATH = etree.XPath('//@id')
145 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
146 glossary = {}
149 def gen_chunk_name(node):
150 if 'id' in node.attrib:
151 return node.attrib['id']
153 tag = node.tag
154 if tag not in CHUNK_PARAMS:
155 CHUNK_PARAMS[tag] = ChunkParams(node.tag[:2])
156 logging.warning('Add CHUNK_PARAMS for "%s"', tag)
158 naming = CHUNK_PARAMS[tag]
159 naming.count += 1
160 name = ('%s%02d' % (naming.prefix, naming.count))
161 # handle parents to make names of nested tags unique
162 # TODO: we only need to prepend the parent if there are > 1 of them in the
163 # xml
164 # while naming.parent:
165 # parent = naming.parent
166 # if parent not in CHUNK_PARAMS:
167 # break;
168 # naming = CHUNK_PARAMS[parent]
169 # name = ('%s%02d' % (naming.prefix, naming.count)) + name
170 return name
173 def get_chunk_titles(node):
174 tag = node.tag
175 if tag not in TITLE_XPATHS:
176 # Use defaults
177 (title, subtitle) = TITLE_XPATHS['_']
178 else:
179 (title, subtitle) = TITLE_XPATHS[tag]
181 xml = title(node)[0]
182 result = {
183 'title': xml.text
185 if xml.tag != 'title':
186 result['title_tag'] = xml.tag
187 else:
188 result['title_tag'] = tag
190 if subtitle:
191 xml = subtitle(node)[0]
192 result['subtitle'] = xml.text
193 result['subtitle_tag'] = xml.tag
194 else:
195 result['subtitle'] = None
196 result['subtitle_tag'] = None
197 return result
200 def chunk(xml_node, parent=None):
201 """Chunk the tree.
203 The first time, we're called with parent=None and in that case we return
204 the new_node as the root of the tree
206 if xml_node.tag in CHUNK_TAGS:
207 if parent:
208 # remove the xml-node from the parent
209 sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
210 xml_node.getparent().remove(xml_node)
211 xml_node = sub_tree
213 title_args = get_chunk_titles(xml_node)
214 chunk_name = gen_chunk_name(xml_node)
215 parent = Node(xml_node.tag, parent=parent, xml=xml_node,
216 filename=chunk_name + '.html', **title_args)
218 for child in xml_node:
219 chunk(child, parent)
221 return parent
224 def add_id_links(files, links):
225 for node in files:
226 chunk_name = node.filename[:-5]
227 chunk_base = node.filename + '#'
228 for attr in ID_XPATH(node.xml):
229 if attr == chunk_name:
230 links[attr] = node.filename
231 else:
232 links[attr] = chunk_base + attr
235 def build_glossary(files):
236 for node in files:
237 if node.xml.tag != 'glossary':
238 continue
239 for term in GLOSSENTRY_XPATH(node.xml):
240 # TODO: there can be all kind of things in a glossary. This only supports
241 # what we commonly use
242 key = etree.tostring(term.find('glossterm'), method="text", encoding=str).strip()
243 value = etree.tostring(term.find('glossdef'), method="text", encoding=str).strip()
244 glossary[key] = value
245 # logging.debug('glosentry: %s:%s', key, value)
248 # conversion helpers
251 def convert_inner(ctx, xml, result):
252 for child in xml:
253 result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
256 def convert_ignore(ctx, xml):
257 result = []
258 convert_inner(ctx, xml, result)
259 return result
262 def convert_skip(ctx, xml):
263 return ['']
266 missing_tags = {}
269 def convert__unknown(ctx, xml):
270 # don't recurse on subchunks
271 if xml.tag in CHUNK_TAGS:
272 return []
273 # warn only once
274 if xml.tag not in missing_tags:
275 logging.warning('Add tag converter for "%s"', xml.tag)
276 missing_tags[xml.tag] = True
277 result = ['<!-- ' + xml.tag + '-->\n']
278 convert_inner(ctx, xml, result)
279 result.append('<!-- /' + xml.tag + '-->\n')
280 return result
283 def convert_refsect(ctx, xml, h_tag, inner_func=convert_inner):
284 result = ['<div class="%s">\n' % xml.tag]
285 title = xml.find('title')
286 if title is not None:
287 if 'id' in xml.attrib:
288 result.append('<a name="%s"></a>' % xml.attrib['id'])
289 result.append('<%s>%s</%s>' % (h_tag, title.text, h_tag))
290 xml.remove(title)
291 if xml.text:
292 result.append(xml.text)
293 inner_func(ctx, xml, result)
294 result.append('</div>')
295 if xml.tail:
296 result.append(xml.tail)
297 return result
300 def xml_get_title(xml):
301 title = xml.find('title')
302 if title is not None:
303 return title.text
304 else:
305 # TODO(ensonic): any way to get the file (inlcudes) too?
306 logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
307 return ''
310 # docbook tags
313 def convert_acronym(ctx, xml):
314 key = xml.text
315 title = glossary.get(key, '')
316 # TODO: print a sensible warning if missing
317 result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
318 if xml.tail:
319 result.append(xml.tail)
320 return result
323 def convert_bookinfo(ctx, xml):
324 result = ['<div class="titlepage">']
325 convert_inner(ctx, xml, result)
326 result.append("""<hr>
327 </div>""")
328 if xml.tail:
329 result.append(xml.tail)
330 return result
333 def convert_blockquote(ctx, xml):
334 result = ['<div class="blockquote">\n<blockquote class="blockquote">']
335 if xml.text:
336 result.append(xml.text)
337 convert_inner(ctx, xml, result)
338 result.append('</blockquote>\n</div>')
339 if xml.tail:
340 result.append(xml.tail)
341 return result
344 def convert_colspec(ctx, xml):
345 result = ['<col']
346 a = xml.attrib
347 if 'colname' in a:
348 result.append(' class="%s"' % a['colname'])
349 if 'colwidth' in a:
350 result.append(' width="%s"' % a['colwidth'])
351 result.append('>\n')
352 # is in tgroup and there can be no 'text'
353 return result
356 def convert_corpauthor(ctx, xml):
357 result = ['<div><h3 class="corpauthor">\n']
358 if xml.text:
359 result.append(xml.text)
360 convert_inner(ctx, xml, result)
361 result.append('</h3></div>\n')
362 if xml.tail:
363 result.append(xml.tail)
364 return result
367 def convert_div(ctx, xml):
368 result = ['<div class="%s">\n' % xml.tag]
369 if xml.text:
370 result.append(xml.text)
371 convert_inner(ctx, xml, result)
372 result.append('</div>')
373 if xml.tail:
374 result.append(xml.tail)
375 return result
378 def convert_em_class(ctx, xml):
379 result = ['<em class="%s"><code>' % xml.tag]
380 if xml.text:
381 result.append(xml.text)
382 convert_inner(ctx, xml, result)
383 result.append('</code></em>')
384 if xml.tail:
385 result.append(xml.tail)
386 return result
389 def convert_entry(ctx, xml):
390 result = ['<td']
391 if 'role' in xml.attrib:
392 result.append(' class="%s">' % xml.attrib['role'])
393 else:
394 result.append('>')
395 if xml.text:
396 result.append(xml.text)
397 convert_inner(ctx, xml, result)
398 result.append('</td>')
399 if xml.tail:
400 result.append(xml.tail)
401 return result
404 def convert_glossdef(ctx, xml):
405 result = ['<dd class="glossdef">']
406 convert_inner(ctx, xml, result)
407 result.append('</dd>\n')
408 return result
411 def convert_glossdiv(ctx, xml):
412 title_tag = xml.find('title')
413 title = title_tag.text
414 xml.remove(title_tag)
415 result = [
416 '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
418 convert_inner(ctx, xml, result)
419 return result
422 def convert_glossentry(ctx, xml):
423 result = []
424 convert_inner(ctx, xml, result)
425 return result
428 def convert_glossterm(ctx, xml):
429 glossid = ''
430 text = ''
431 anchor = xml.find('anchor')
432 if anchor is not None:
433 glossid = anchor.attrib.get('id', '')
434 text += anchor.tail or ''
435 text += xml.text or ''
436 if glossid == '':
437 glossid = 'glossterm-' + text
438 return [
439 '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
440 glossid, text)
444 def convert_imageobject(ctx, xml):
445 imagedata = xml.find('imagedata')
446 if imagedata is not None:
447 # TODO(ensonic): warn on missing fileref attr?
448 return ['<img src="%s">' % imagedata.attrib.get('fileref', '')]
449 else:
450 return []
453 def convert_indexdiv(ctx, xml):
454 title_tag = xml.find('title')
455 title = title_tag.text
456 xml.remove(title_tag)
457 result = [
458 '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
460 convert_inner(ctx, xml, result)
461 return result
464 def convert_informaltable(ctx, xml):
465 result = ['<div class="informaltable"><table class="informaltable"']
466 a = xml.attrib
467 if 'pgwide' in a and a['pgwide'] == '1':
468 result.append(' width="100%"')
469 if 'frame' in a and a['frame'] == 'none':
470 result.append(' border="0"')
471 result.append('>\n')
472 convert_inner(ctx, xml, result)
473 result.append('</table></div>')
474 if xml.tail:
475 result.append(xml.tail)
476 return result
479 def convert_itemizedlist(ctx, xml):
480 result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
481 convert_inner(ctx, xml, result)
482 result.append('</ul></div>')
483 if xml.tail:
484 result.append(xml.tail)
485 return result
488 def convert_link(ctx, xml):
489 linkend = xml.attrib['linkend']
490 if linkend in fixxref.NoLinks:
491 linkend = None
492 result = []
493 if linkend:
494 link_text = []
495 convert_inner(ctx, xml, link_text)
496 if xml.text:
497 link_text.append(xml.text)
498 # TODO: fixxref does some weird checks in xml.text
499 result = [fixxref.MakeXRef(ctx['module'], '', 0, linkend, ''.join(link_text))]
500 if xml.tail:
501 result.append(xml.tail)
502 return result
505 def convert_listitem(ctx, xml):
506 result = ['<li class="listitem">']
507 convert_inner(ctx, xml, result)
508 result.append('</li>')
509 # is in itemizedlist and there can be no 'text'
510 return result
513 def convert_literal(ctx, xml):
514 result = ['<code class="%s">' % xml.tag]
515 if xml.text:
516 result.append(xml.text)
517 convert_inner(ctx, xml, result)
518 result.append('</code>')
519 if xml.tail:
520 result.append(xml.tail)
521 return result
524 def convert_orderedlist(ctx, xml):
525 result = ['<div class="orderedlistlist"><ol class="orderedlistlist" type="1">']
526 convert_inner(ctx, xml, result)
527 result.append('</ol></div>')
528 if xml.tail:
529 result.append(xml.tail)
530 return result
533 def convert_para(ctx, xml):
534 result = []
535 if 'id' in xml.attrib:
536 result.append('<a name="%s"></a>' % xml.attrib['id'])
537 result.append('<p>')
538 if xml.text:
539 result.append(xml.text)
540 convert_inner(ctx, xml, result)
541 result.append('</p>')
542 if xml.tail:
543 result.append(xml.tail)
544 return result
547 def convert_para_like(ctx, xml):
548 result = []
549 if 'id' in xml.attrib:
550 result.append('<a name="%s"></a>' % xml.attrib['id'])
551 result.append('<p class="%s">' % xml.tag)
552 if xml.text:
553 result.append(xml.text)
554 convert_inner(ctx, xml, result)
555 result.append('</p>')
556 if xml.tail:
557 result.append(xml.tail)
558 return result
561 def convert_phrase(ctx, xml):
562 result = ['<span']
563 if 'role' in xml.attrib:
564 result.append(' class="%s">' % xml.attrib['role'])
565 else:
566 result.append('>')
567 if xml.text:
568 result.append(xml.text)
569 convert_inner(ctx, xml, result)
570 result.append('</span>')
571 if xml.tail:
572 result.append(xml.tail)
573 return result
576 def convert_primaryie(ctx, xml):
577 result = ['<dt>\n']
578 convert_inner(ctx, xml, result)
579 result.append('\n</dt>\n<dd></dd>\n')
580 return result
583 def convert_pre(ctx, xml):
584 result = ['<pre class="%s">\n' % xml.tag]
585 if xml.text:
586 result.append(xml.text)
587 convert_inner(ctx, xml, result)
588 result.append('</pre>')
589 if xml.tail:
590 result.append(xml.tail)
591 return result
594 def convert_programlisting(ctx, xml):
595 result = []
596 if xml.attrib.get('role', '') == 'example':
597 if xml.text:
598 lang = xml.attrib.get('language', 'c').lower()
599 if lang not in LEXERS:
600 LEXERS[lang] = get_lexer_by_name(lang)
601 lexer = LEXERS.get(lang, None)
602 if lexer:
603 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
605 # we do own line-numbering
606 line_count = highlighted.count('\n')
607 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
608 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
609 <tbody>
610 <tr>
611 <td class="listing_lines" align="right"><pre>%s</pre></td>
612 <td class="listing_code"><pre class="programlisting">%s</pre></td>
613 </tr>
614 </tbody>
615 </table>
616 """ % (source_lines, highlighted))
617 else:
618 logging.warn('No pygments lexer for language="%s"', lang)
619 result.append('<pre class="programlisting">')
620 result.append(xml.text)
621 result.append('</pre>')
622 else:
623 result.append('<pre class="programlisting">')
624 if xml.text:
625 result.append(xml.text)
626 convert_inner(ctx, xml, result)
627 result.append('</pre>')
628 if xml.tail:
629 result.append(xml.tail)
630 return result
633 def convert_refsect1(ctx, xml):
634 # Add a divider between two consequitive refsect2
635 def convert_inner(ctx, xml, result):
636 prev = None
637 for child in xml:
638 if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
639 result.append('<hr>\n')
640 result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
641 prev = child
642 return convert_refsect(ctx, xml, 'h2', convert_inner)
645 def convert_refsect2(ctx, xml):
646 return convert_refsect(ctx, xml, 'h3')
649 def convert_refsect3(ctx, xml):
650 return convert_refsect(ctx, xml, 'h4')
653 def convert_row(ctx, xml):
654 result = ['<tr>\n']
655 convert_inner(ctx, xml, result)
656 result.append('</tr>\n')
657 return result
660 def convert_simpara(ctx, xml):
661 result = ['<p>']
662 if xml.text:
663 result.append(xml.text)
664 result.append('</p>')
665 if xml.tail:
666 result.append(xml.tail)
667 return result
670 def convert_span(ctx, xml):
671 result = ['<span class="%s">' % xml.tag]
672 if xml.text:
673 result.append(xml.text)
674 convert_inner(ctx, xml, result)
675 result.append('</span>')
676 if xml.tail:
677 result.append(xml.tail)
678 return result
681 def convert_tbody(ctx, xml):
682 result = ['<tbody>']
683 convert_inner(ctx, xml, result)
684 result.append('</tbody>')
685 # is in tgroup and there can be no 'text'
686 return result
689 def convert_tgroup(ctx, xml):
690 # tgroup does not expand to anything, but the nested colspecs need to
691 # be put into a colgroup
692 cols = xml.findall('colspec')
693 result = []
694 if cols:
695 result.append('<colgroup>\n')
696 for col in cols:
697 result.extend(convert_colspec(ctx, col))
698 xml.remove(col)
699 result.append('</colgroup>\n')
700 convert_inner(ctx, xml, result)
701 # is in informaltable and there can be no 'text'
702 return result
705 def convert_ulink(ctx, xml):
706 result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
707 if xml.tail:
708 result.append(xml.tail)
709 return result
712 # TODO(ensonic): turn into class with converters as functions and ctx as self
713 convert_tags = {
714 'acronym': convert_acronym,
715 'bookinfo': convert_bookinfo,
716 'blockquote': convert_blockquote,
717 'caption': convert_div,
718 'colspec': convert_colspec,
719 'corpauthor': convert_corpauthor,
720 'emphasis': convert_span,
721 'entry': convert_entry,
722 'function': convert_span,
723 'glossdef': convert_glossdef,
724 'glossdiv': convert_glossdiv,
725 'glossentry': convert_glossentry,
726 'glossterm': convert_glossterm,
727 'imageobject': convert_imageobject,
728 'indexdiv': convert_indexdiv,
729 'indexentry': convert_ignore,
730 'indexterm': convert_skip,
731 'informalexample': convert_div,
732 'informaltable': convert_informaltable,
733 'inlinemediaobject': convert_span,
734 'itemizedlist': convert_itemizedlist,
735 'legalnotice': convert_para_like,
736 'link': convert_link,
737 'listitem': convert_listitem,
738 'literal': convert_literal,
739 'mediaobject': convert_div,
740 'note': convert_div,
741 'orderedlist': convert_orderedlist,
742 'para': convert_para,
743 'parameter': convert_em_class,
744 'phrase': convert_phrase,
745 'primaryie': convert_primaryie,
746 'programlisting': convert_programlisting,
747 'releaseinfo': convert_para_like,
748 'refsect1': convert_refsect1,
749 'refsect2': convert_refsect2,
750 'refsect3': convert_refsect3,
751 'replaceable': convert_em_class,
752 'returnvalue': convert_span,
753 'row': convert_row,
754 'screen': convert_pre,
755 'simpara': convert_simpara,
756 'structfield': convert_em_class,
757 'tbody': convert_tbody,
758 'tgroup': convert_tgroup,
759 'type': convert_span,
760 'ulink': convert_ulink,
761 'warning': convert_div,
764 # conversion helpers
766 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
767 <html>
768 <head>
769 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
770 <title>%s</title>
771 %s<link rel="stylesheet" href="style.css" type="text/css">
772 </head>
773 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
777 def generate_head_links(ctx):
778 n = ctx['nav_home']
779 result = [
780 '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.title)
782 if 'nav_up' in ctx:
783 n = ctx['nav_up']
784 result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.title))
785 if 'nav_prev' in ctx:
786 n = ctx['nav_prev']
787 result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.title))
788 if 'nav_next' in ctx:
789 n = ctx['nav_next']
790 result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.title))
791 return ''.join(result)
794 def generate_nav_links(ctx):
795 n = ctx['nav_home']
796 result = [
797 '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
799 if 'nav_up' in ctx:
800 n = ctx['nav_up']
801 result.append(
802 '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
803 else:
804 result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
805 if 'nav_prev' in ctx:
806 n = ctx['nav_prev']
807 result.append(
808 '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
809 else:
810 result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
811 if 'nav_next' in ctx:
812 n = ctx['nav_next']
813 result.append(
814 '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
815 else:
816 result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
818 return ''.join(result)
821 def generate_toc(ctx, node):
822 result = []
823 for c in node.children:
824 # TODO: urlencode the filename: urllib.parse.quote_plus()
825 result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
826 c.title_tag, c.filename, c.title))
827 if c.subtitle:
828 result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
829 result.append('</dt>\n')
830 if c.children:
831 result.append('<dd><dl>')
832 result.extend(generate_toc(ctx, c))
833 result.append('</dl></dd>')
834 return result
837 def generate_basic_nav(ctx):
838 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
839 <tr valign="middle">
840 <td width="100%%" align="left" class="shortcuts"></td>
842 </tr>
843 </table>
844 """ % generate_nav_links(ctx)
847 def generate_alpha_nav(ctx, divs, prefix):
848 ix_nav = []
849 for s in divs:
850 title = xml_get_title(s)
851 ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
853 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
854 <tr valign="middle">
855 <td width="100%%" align="left" class="shortcuts">
856 <span id="nav_index">
858 </span>
859 </td>
861 </tr>
862 </table>
863 """ % ('\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
866 def generate_refentry_nav(ctx, refsect1s, result):
867 result.append("""<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
868 <tr valign="middle">
869 <td width="100%%" align="left" class="shortcuts">
870 <a href="#" class="shortcut">Top</a>""")
872 for s in refsect1s:
873 # don't list TOC sections (role="xxx_proto")
874 if s.attrib.get('role', '').endswith("_proto"):
875 continue
877 title = xml_get_title(s)
878 result.append("""
879 <span id="nav_description">
880   <span class="dim">|</span> 
881 <a href="#%s" class="shortcut">%s</a>
882 </span>""" % (s.attrib['id'], title))
883 result.append("""
884 </td>
886 </tr>
887 </table>
888 """ % generate_nav_links(ctx))
891 def get_id(node):
892 xml = node.xml
893 node_id = xml.attrib.get('id', None)
894 if node_id:
895 return node_id
897 logging.info('%d: No "id" attribute on "%s", generating one',
898 xml.sourceline, xml.tag)
899 ix = []
900 # Generate the 'id'. We need to walk up the xml-tree and check the positions
901 # for each sibling.
902 parent = xml.getparent()
903 while parent is not None:
904 children = parent.getchildren()
905 ix.insert(0, str(children.index(xml) + 1))
906 xml = parent
907 parent = xml.getparent()
908 # logging.warning('%s: id indexes: %s', node.filename, str(ix))
909 return 'id-1.' + '.'.join(ix)
912 def convert_chunk_with_toc(ctx, div_class, title_tag):
913 node = ctx['node']
914 result = [
915 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
916 generate_basic_nav(ctx),
917 '<div class="%s">' % div_class,
919 title = node.xml.find('title')
920 if title is not None:
921 result.append("""
922 <div class="titlepage">
923 <%s class="title"><a name="%s"></a>%s</%s>
924 </div>""" % (
925 title_tag, get_id(node), title.text, title_tag))
926 node.xml.remove(title)
927 convert_inner(ctx, node.xml, result)
928 result.append("""<p>
929 <b>Table of Contents</b>
930 </p>
931 <div class="toc">
932 <dl class="toc">
933 """)
934 result.extend(generate_toc(ctx, node))
935 result.append("""</dl>
936 </div>
937 </div>
938 </body>
939 </html>""")
940 return result
943 # docbook chunks
946 def convert_book(ctx):
947 node = ctx['node']
948 result = [
949 HTML_HEADER % (node.title, generate_head_links(ctx)),
950 """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
951 <tr><th valign="middle"><p class="title">%s</p></th></tr>
952 </table>
953 <div class="book">
954 """ % node.title
956 bookinfo = node.xml.findall('bookinfo')[0]
957 # we already used the title
958 title = bookinfo.find('title')
959 if title is not None:
960 bookinfo.remove(title)
961 result.extend(convert_bookinfo(ctx, bookinfo))
962 result.append("""<div class="toc">
963 <dl class="toc">
964 """)
965 result.extend(generate_toc(ctx, node.root))
966 result.append("""</dl>
967 </div>
968 </div>
969 </body>
970 </html>""")
971 return result
974 def convert_chapter(ctx):
975 return convert_chunk_with_toc(ctx, 'chapter', 'h2')
978 def convert_glossary(ctx):
979 node = ctx['node']
980 glossdivs = node.xml.findall('glossdiv')
982 result = [
983 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
984 generate_alpha_nav(ctx, glossdivs, 'gls'),
985 """<div class="index">
986 <div class="titlepage"><h1 class="title">
987 <a name="%s"></a>%s</h1>
988 </div>""" % (get_id(node), node.title)
991 for i in glossdivs:
992 result.extend(convert_glossdiv(ctx, i))
994 result.append("""</div>
995 </body>
996 </html>""")
997 return result
1000 def convert_index(ctx):
1001 node = ctx['node']
1002 # Get all indexdivs under indexdiv
1003 indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1005 result = [
1006 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1007 generate_alpha_nav(ctx, indexdivs, 'idx'),
1008 """<div class="glossary">
1009 <div class="titlepage"><h2 class="title">
1010 <a name="%s"></a>%s</h2>
1011 </div>""" % (get_id(node), node.title)
1013 for i in indexdivs:
1014 result.extend(convert_indexdiv(ctx, i))
1015 result.append("""</div>
1016 </body>
1017 </html>""")
1018 return result
1021 def convert_part(ctx):
1022 return convert_chunk_with_toc(ctx, 'part', 'h1')
1025 def convert_preface(ctx):
1026 node = ctx['node']
1027 result = [
1028 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1029 generate_basic_nav(ctx),
1030 '<div class="preface">'
1032 title = node.xml.find('title')
1033 if title is not None:
1034 result.append("""
1035 <div class="titlepage">
1036 <h2 class="title"><a name="%s"></a>%s</h2>
1037 </div>""" % (get_id(node), title.text))
1038 node.xml.remove(title)
1039 convert_inner(ctx, node.xml, result)
1040 result.append("""</div>
1041 </body>
1042 </html>""")
1043 return result
1046 def convert_reference(ctx):
1047 return convert_chunk_with_toc(ctx, 'reference', 'h1')
1050 def convert_refentry(ctx):
1051 node = ctx['node']
1052 node_id = get_id(node)
1053 refsect1s = node.xml.findall('refsect1')
1055 result = [
1056 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1058 generate_refentry_nav(ctx, refsect1s, result)
1059 result.append("""
1060 <div class="refentry">
1061 <a name="%s"></a>
1062 <div class="refnamediv">
1063 <table width="100%%"><tr>
1064 <td valign="top">
1065 <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1066 <p>%s — module for gtk-doc unit test</p>
1067 </td>
1068 <td class="gallery_image" valign="top" align="right"></td>
1069 </tr></table>
1070 </div>
1071 """ % (node_id, node_id, node.title, node.title))
1073 for s in refsect1s:
1074 result.extend(convert_refsect1(ctx, s))
1075 result.append("""</div>
1076 </body>
1077 </html>""")
1078 return result
1081 # TODO(ensonic): turn into class with converters as functions and ctx as self
1082 convert_chunks = {
1083 'book': convert_book,
1084 'chapter': convert_chapter,
1085 'glossary': convert_glossary,
1086 'index': convert_index,
1087 'part': convert_part,
1088 'preface': convert_preface,
1089 'reference': convert_reference,
1090 'refentry': convert_refentry,
1094 def generate_nav_nodes(files, node):
1095 nav = {
1096 'nav_home': node.root,
1098 # nav params: up, prev, next
1099 if node.parent:
1100 nav['nav_up'] = node.parent
1101 ix = files.index(node)
1102 if ix > 0:
1103 nav['nav_prev'] = files[ix - 1]
1104 if ix < len(files) - 1:
1105 nav['nav_next'] = files[ix + 1]
1106 return nav
1109 def convert(out_dir, module, files, node):
1110 """Convert the docbook chunks to a html file.
1112 Args:
1113 out_dir: already created output dir
1114 files: list of nodes in the tree in pre-order
1115 node: current tree node
1118 logging.info('Writing: %s', node.filename)
1119 with open(os.path.join(out_dir, node.filename), 'wt',
1120 newline='\n', encoding='utf-8') as html:
1121 ctx = {
1122 'module': module,
1123 'files': files,
1124 'node': node,
1126 ctx.update(generate_nav_nodes(files, node))
1128 if node.name in convert_chunks:
1129 for line in convert_chunks[node.name](ctx):
1130 html.write(line)
1131 else:
1132 logging.warning('Add converter/template for "%s"', node.name)
1135 def create_devhelp2_toc(node):
1136 result = []
1137 for c in node.children:
1138 if c.children:
1139 result.append('<sub name="%s" link="%s">\n' % (c.title, c.filename))
1140 result.extend(create_devhelp2_toc(c))
1141 result.append('</sub>\n')
1142 else:
1143 result.append('<sub name="%s" link="%s"/>\n' % (c.title, c.filename))
1144 return result
1147 def create_devhelp2_condition_attribs(node):
1148 if 'condition' in node.attrib:
1149 # condition -> since, deprecated, ... (separated with '|')
1150 cond = node.attrib['condition'].replace('"', '&quot;').split('|')
1151 return' ' + ' '.join(['%s="%s"' % tuple(c.split(':', 1)) for c in cond])
1152 else:
1153 return ''
1156 def create_devhelp2_refsect2_keyword(node, base_link):
1157 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1158 node.attrib['role'], xml_get_title(node), base_link + node.attrib['id'],
1159 create_devhelp2_condition_attribs(node))
1162 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1163 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1164 node.attrib['role'], title, base_link + name,
1165 create_devhelp2_condition_attribs(node))
1168 def create_devhelp2(out_dir, module, xml, files):
1169 with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1170 newline='\n', encoding='utf-8') as idx:
1171 bookinfo_nodes = xml.xpath('/book/bookinfo')
1172 title = ''
1173 if bookinfo_nodes is not None:
1174 bookinfo = bookinfo_nodes[0]
1175 title = bookinfo.xpath('./title/text()')[0]
1176 online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1177 # TODO: support author too (see devhelp2.xsl)
1178 # TODO: fixxref uses '--src-lang' to set the language
1179 result = [
1180 """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1181 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1182 <chapters>
1183 """ % (title, module, online_url)
1185 # toc
1186 result.extend(create_devhelp2_toc(files[0].root))
1187 result.append(""" </chapters>
1188 <functions>
1189 """)
1190 # keywords from all refsect2 and refsect3
1191 refsect2 = etree.XPath('//refsect2[@role]')
1192 refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1193 refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1194 refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1195 refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1196 for node in files:
1197 base_link = node.filename + '#'
1198 refsect2_nodes = refsect2(node.xml)
1199 for refsect2_node in refsect2_nodes:
1200 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1201 refsect3_nodes = refsect3_enum(refsect2_node)
1202 for refsect3_node in refsect3_nodes:
1203 details_node = refsect3_enum_details(refsect3_node)[0]
1204 name = details_node.attrib['id']
1205 result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1206 refsect3_nodes = refsect3_struct(refsect2_node)
1207 for refsect3_node in refsect3_nodes:
1208 details_node = refsect3_struct_details(refsect3_node)[0]
1209 name = details_node.attrib['id']
1210 result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1212 result.append(""" </functions>
1213 </book>
1214 """)
1215 for line in result:
1216 idx.write(line)
1219 def get_dirs(uninstalled):
1220 if uninstalled:
1221 # this does not work from buiddir!=srcdir
1222 gtkdocdir = os.path.split(sys.argv[0])[0]
1223 if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1224 # try 'srcdir' (set from makefiles) too
1225 if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1226 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1227 styledir = gtkdocdir + '/style'
1228 else:
1229 gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1230 styledir = gtkdocdir
1231 return (gtkdocdir, styledir)
1234 def main(module, index_file, out_dir, uninstalled):
1235 tree = etree.parse(index_file)
1236 tree.xinclude()
1238 (gtkdocdir, styledir) = get_dirs(uninstalled)
1239 # copy navigation images and stylesheets to html directory ...
1240 css_file = os.path.join(styledir, 'style.css')
1241 for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1242 shutil.copy(f, out_dir)
1243 css_file = os.path.join(out_dir, 'style.css')
1244 with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1245 css.write(HTML_FORMATTER.get_style_defs())
1247 # TODO: migrate options from fixxref
1248 # TODO: do in parallel with loading the xml above.
1249 fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1251 # We do multiple passes:
1252 # 1) recursively walk the tree and chunk it into a python tree so that we
1253 # can generate navigation and link tags.
1254 files = chunk(tree.getroot())
1255 files = list(PreOrderIter(files))
1256 # 2) extract tables:
1257 # TODO: use multiprocessing
1258 # - find all 'id' attribs and add them to the link map
1259 add_id_links(files, fixxref.Links)
1260 # - build glossary dict
1261 build_glossary(files)
1263 # 3) create a xxx.devhelp2 file, do this before 3), since we modify the tree
1264 create_devhelp2(out_dir, module, tree.getroot(), files)
1265 # 4) iterate the tree and output files
1266 # TODO: use multiprocessing
1267 for node in files:
1268 convert(out_dir, module, files, node)
1271 def run(options):
1272 logging.info('options: %s', str(options.__dict__))
1273 module = options.args[0]
1274 document = options.args[1]
1276 # TODO: rename to 'html' later on
1277 # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1278 # outputs into the working directory
1279 out_dir = os.path.join(os.path.dirname(document), 'db2html')
1280 try:
1281 os.mkdir(out_dir)
1282 except OSError as e:
1283 if e.errno != errno.EEXIST:
1284 raise
1286 sys.exit(main(module, document, out_dir, options.uninstalled))