From d5c909da8a7409d24f6bb365273035c8e8c846ae Mon Sep 17 00:00:00 2001 From: Stefan Sauer Date: Sun, 10 Dec 2017 15:18:48 +0100 Subject: [PATCH] tools: add the start of an experiemnt to replace the xsl conversion The new tool will chunk to a 'db2html' dir. It does not yet convert anything, but should create the same files as one would have in the 'html' dir. --- doc/design-2.x.txt | 35 +++++++++-- tools/db2html.py | 174 +++++++++++++++++++++++++++++++++++++++++++++++++++++ tools/db2md.py | 9 +-- 3 files changed, 210 insertions(+), 8 deletions(-) create mode 100644 tools/db2html.py diff --git a/doc/design-2.x.txt b/doc/design-2.x.txt index abc0109..7a71a7b 100644 --- a/doc/design-2.x.txt +++ b/doc/design-2.x.txt @@ -93,7 +93,8 @@ We could change gtkdoc-mkpdf to use wkhtmltopdf/htmldoc. For man-pages we can use https://rtomayko.github.io/ronn/ronn.1.html. The devhelp2 files would be output directly from gtkdoc-mkhtml2. -We can enable such a toolchain via the configure flavors option. +We can enable this new toolchain via the configure flavors option (needs support +for cmake, meson, ...). These would be the steps to do this: 1.) [in progress] write the docbook comment migation tool: @@ -113,11 +114,12 @@ options for gtkdocize. Create a stub gtkdoc-mkhtml2 tool. - only have the parse there 4.) [unassigned] write gtkdoc/mkhtml2.py +- select a template engine (e.g. jinja) - create templates from the current html for the various page types (refentry, index, ...). -- we won't need content_files and expand_content_files in Makefile.am, mkhtml2 - would read $(DOC_MODULE)-docs.md (rename to index.md?) and find local links - from it +- we won't need content_files and expand_content_files in Makefile.am, as + mkhtml2 would read $(DOC_MODULE)-docs.md (rename to index.md?) and find local + links in there - convert all hand-written md files starting from the main-doc to html Open Issues: @@ -142,3 +144,28 @@ gdbus-codegen: gstreamer plugindocs: - generate markdown formatted files + +=== only drop docbook-xsl === +Since the processing with docbook-xsl is what is slow, we could also consider to +keep the whole gtkdoc-mkdb and have 2 codepaths in gtkdoc-mkhtml. The new code- +path would real the docbook with element-tree, replicate the chunking that +docbook-xsl does and use a templating system to generate the html files +(e.g. jinja). + +This is probably easier to achieve, but has less potential in the long run (e.g. +incremental doc updates). + +On the plus side, we can do rarely used output-formats (like pdf, man) the way +we do them right now. + +These would be the steps to do this: +1.) [done] write a the chunker +- standalone tool to load the docbook xml and chunk it (just touch the resulting + *.html files) until we produce the same + +2.) [unassigned] transform some docbook to html +- evaluate template engines +- we need to warn when not handling certain docbook + +3.) [unassigned] integrate this into the gtkdoc library +- add an option to gtkgoc-mkhtml (e.g. --engine={xslt,builtin} or just --noxlst) diff --git a/tools/db2html.py b/tools/db2html.py new file mode 100644 index 0000000..9bc5d86 --- /dev/null +++ b/tools/db2html.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 +# -*- python; coding: utf-8 -*- +# +# gtk-doc - GTK DocBook documentation generator. +# Copyright (C) 2017 Stefan Sauer +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# + +"""Prototype for builtin docbook processing + +The tool loaded the main xml document (-docs.xml) and chunks it +like the xsl-stylesheets would do. For that it resolves all the xml-includes. + +TODO: convert the docbook-xml to html + +Examples: +python3 tools/db2html.py tests/gobject/docs/tester-docs.xml +ll tests/gobject/docs/db2html +python3 tools/db2html.py tests/bugs/docs/tester-docs.xml +ll tests/bugs/docs/db2html +""" + +import argparse +import errno +import logging +import os +import sys + +from lxml import etree + +# http://www.sagehill.net/docbookxsl/Chunking.html +CHUNK_TAGS = [ + 'appendix', + 'article', + 'bibliography', # in article or book + 'book', + 'chapter', + 'colophon', + 'glossary', # in article or book + 'index', # in article or book + 'part', + 'preface', + 'refentry', + 'reference', + 'sect1', # except first + 'section', # if equivalent to sect1 + 'set', + 'setindex', +] + +# TODO: look up the abbrevs and hierarchy for other tags +# http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames +CHUNK_NAMING = { + 'book': { + 'prefix': 'bk', + 'count': 0, + 'parent': None, + }, + 'chapter': { + 'prefix': 'ch', + 'count': 0, + 'parent': 'book' + }, + 'index': { + 'prefix': 'ix', + 'count': 0, + 'parent': 'book' + }, + 'sect1': { + 'prefix': 's', + 'count': 0, + 'parent': 'chapter', + }, + 'section': { + 'prefix': 's', + 'count': 0, + 'parent': 'chapter', + }, +} + + +def gen_chunk_name(node): + if 'id' in node.attrib: + return node.attrib['id'] + + tag = node.tag + if tag not in CHUNK_NAMING: + CHUNK_NAMING[tag] = { + 'prefix': node.tag[:2], + 'count': 0 + } + logging.warning('Add CHUNK_NAMING for "%s"', tag) + + naming = CHUNK_NAMING[tag] + naming['count'] += 1 + name = ('%s%02d' % (naming['prefix'], naming['count'])) + # handle parents to make names of nested tags unique + # TODO: we only need to prepend the parent if there are > 1 of them in the + # xml + # while naming['parent']: + # parent = naming['parent'] + # if parent not in CHUNK_NAMING: + # break; + # naming = CHUNK_NAMING[parent] + # name = ('%s%02d' % (naming['prefix'], naming['count'])) + name + return name + + +def convert(out_dir, node, out_file=None): + # iterate and chunk + # TODO: convert to HTML, need a templates for each CHUNK_TAG + + for child in node: + print('<%s %s>' % (child.tag, child.attrib)) + if child.tag in CHUNK_TAGS: + base = gen_chunk_name(child) + '.html' + out_filename = os.path.join(out_dir, base) + convert(out_dir, child, open(out_filename, 'wt')) + else: + convert(out_dir, child, out_file) + if out_file: + out_file.close() + + +def main(index_file): + tree = etree.parse(index_file) + tree.xinclude() + + dir_name = os.path.dirname(index_file) + + # for testing: dump to output file + # out_file = os.path.join(dir_name, 'db2html.xml') + # tree.write(out_file) + + # TODO: rename to 'html' later on + out_dir = os.path.join(dir_name, 'db2html') + try: + os.mkdir(out_dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise + convert(out_dir, tree.getroot()) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='db2html - chunk docbook') + parser.add_argument('sources', nargs='*') + options = parser.parse_args() + if len(options.sources) != 1: + sys.exit('Expect one source file argument.') + + log_level = os.environ.get('GTKDOC_TRACE') + if log_level == '': + log_level = 'INFO' + if log_level: + logging.basicConfig(stream=sys.stdout, + level=logging.getLevelName(log_level.upper()), + format='%(asctime)s:%(filename)s:%(funcName)s:%(lineno)d:%(levelname)s:%(message)s') + + sys.exit(main(options.sources[0])) diff --git a/tools/db2md.py b/tools/db2md.py index 8d4ad79..7a6c23f 100644 --- a/tools/db2md.py +++ b/tools/db2md.py @@ -96,12 +96,12 @@ def convert_block(dry_run, filename, lines, beg, end): # check for historic non markdown compatible chars if re.search(r'\s\*\w+[\s.]', line): logging.warning("%s:%d: leading '*' needs escaping: '%s'", filename, ix, line) - #if re.search(r'\s\w+\*[\s.]', line): - # logging.warning("%s:%d: trailing '*' needs escaping: '%s'", filename, ix, line) + # if re.search(r'\s\w+\*[\s.]', line): + # logging.warning("%s:%d: trailing '*' needs escaping: '%s'", filename, ix, line) if re.search(r'\s_\w+[\s.]', line): logging.warning("%s:%d: leading '_' needs escaping: '%s'", filename, ix, line) - #if re.search(r'\s\w+_[\s.]', line): - # logging.warning("%s:%d: trailing '_' needs escaping: '%s'", filename, ix, line) + # if re.search(r'\s\w+_[\s.]', line): + # logging.warning("%s:%d: trailing '_' needs escaping: '%s'", filename, ix, line) # look for docbook for m in re.finditer(r'<([^>]*)>', line): @@ -190,6 +190,7 @@ def main(dry_run, files): found_docbook = found_docbook | convert_file(dry_run, f) return found_docbook + if __name__ == '__main__': parser = argparse.ArgumentParser( description='db2md - convert docbook in comment to markdown') -- 2.11.4.GIT