From d5c909da8a7409d24f6bb365273035c8e8c846ae Mon Sep 17 00:00:00 2001
From: Stefan Sauer <ensonic@users.sf.net>
Date: Sun, 10 Dec 2017 15:18:48 +0100
Subject: [PATCH] tools: add the start of an experiemnt to replace the xsl
 conversion

The new tool will chunk to a 'db2html' dir. It does not yet convert
anything, but should create the same files as one would have in the
'html' dir.
---
 doc/design-2.x.txt |  35 +++++++++--
 tools/db2html.py   | 174 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 tools/db2md.py     |   9 +--
 3 files changed, 210 insertions(+), 8 deletions(-)
 create mode 100644 tools/db2html.py

diff --git a/doc/design-2.x.txt b/doc/design-2.x.txt
index abc0109..7a71a7b 100644
--- a/doc/design-2.x.txt
+++ b/doc/design-2.x.txt
@@ -93,7 +93,8 @@ We could change gtkdoc-mkpdf to use wkhtmltopdf/htmldoc. For man-pages we
 can use https://rtomayko.github.io/ronn/ronn.1.html.
 The devhelp2 files would be output directly from gtkdoc-mkhtml2.
 
-We can enable such a toolchain via the configure flavors option.
+We can enable this new toolchain via the configure flavors option (needs support
+for cmake, meson, ...).
 
 These would be the steps to do this:
 1.) [in progress] write the docbook comment migation tool:
@@ -113,11 +114,12 @@ options for gtkdocize. Create a stub  gtkdoc-mkhtml2 tool.
   - only have the parse there
 
 4.) [unassigned] write gtkdoc/mkhtml2.py
+- select a template engine (e.g. jinja)
 - create templates from the current html for the various page types (refentry,
   index, ...).
-- we won't need content_files and expand_content_files in Makefile.am, mkhtml2
-  would read $(DOC_MODULE)-docs.md (rename to index.md?) and find local links
-  from it
+- we won't need content_files and expand_content_files in Makefile.am, as
+  mkhtml2 would read $(DOC_MODULE)-docs.md (rename to index.md?) and find local
+  links in there
 - convert all hand-written md files starting from the main-doc to html
 
 Open Issues:
@@ -142,3 +144,28 @@ gdbus-codegen:
 
 gstreamer plugindocs:
 - generate markdown formatted files
+
+=== only drop docbook-xsl ===
+Since the processing with docbook-xsl is what is slow, we could also consider to
+keep the whole gtkdoc-mkdb and have 2 codepaths in gtkdoc-mkhtml. The new code-
+path would real the docbook with element-tree, replicate the chunking that
+docbook-xsl does and use a templating system to generate the html files
+(e.g. jinja).
+
+This is probably easier to achieve, but has less potential in the long run (e.g.
+incremental doc updates).
+
+On the plus side, we can do rarely used output-formats (like pdf, man) the way
+we do them right now.
+
+These would be the steps to do this:
+1.) [done] write a the chunker
+- standalone tool to load the docbook xml and chunk it (just touch the resulting
+  *.html files) until we produce the same
+
+2.) [unassigned] transform some docbook to html
+- evaluate template engines
+- we need to warn when not handling certain docbook
+
+3.) [unassigned] integrate this into the gtkdoc library
+- add an option to gtkgoc-mkhtml (e.g. --engine={xslt,builtin} or just --noxlst)
diff --git a/tools/db2html.py b/tools/db2html.py
new file mode 100644
index 0000000..9bc5d86
--- /dev/null
+++ b/tools/db2html.py
@@ -0,0 +1,174 @@
+#!/usr/bin/env python3
+# -*- python; coding: utf-8 -*-
+#
+# gtk-doc - GTK DocBook documentation generator.
+# Copyright (C) 2017  Stefan Sauer
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+
+"""Prototype for builtin docbook processing
+
+The tool loaded the main xml document (<module>-docs.xml) and chunks it
+like the xsl-stylesheets would do. For that it resolves all the xml-includes.
+
+TODO: convert the docbook-xml to html
+
+Examples:
+python3 tools/db2html.py tests/gobject/docs/tester-docs.xml
+ll tests/gobject/docs/db2html
+python3 tools/db2html.py tests/bugs/docs/tester-docs.xml
+ll tests/bugs/docs/db2html
+"""
+
+import argparse
+import errno
+import logging
+import os
+import sys
+
+from lxml import etree
+
+# http://www.sagehill.net/docbookxsl/Chunking.html
+CHUNK_TAGS = [
+    'appendix',
+    'article',
+    'bibliography',  # in article or book
+    'book',
+    'chapter',
+    'colophon',
+    'glossary',      # in article or book
+    'index',         # in article or book
+    'part',
+    'preface',
+    'refentry',
+    'reference',
+    'sect1',         # except first
+    'section',       # if equivalent to sect1
+    'set',
+    'setindex',
+]
+
+# TODO: look up the abbrevs and hierarchy for other tags
+# http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
+CHUNK_NAMING = {
+    'book': {
+        'prefix': 'bk',
+        'count': 0,
+        'parent': None,
+    },
+    'chapter': {
+        'prefix': 'ch',
+        'count': 0,
+        'parent': 'book'
+    },
+    'index': {
+        'prefix': 'ix',
+        'count': 0,
+        'parent': 'book'
+    },
+    'sect1': {
+        'prefix': 's',
+        'count': 0,
+        'parent': 'chapter',
+    },
+    'section': {
+        'prefix': 's',
+        'count': 0,
+        'parent': 'chapter',
+    },
+}
+
+
+def gen_chunk_name(node):
+    if 'id' in node.attrib:
+        return node.attrib['id']
+
+    tag = node.tag
+    if tag not in CHUNK_NAMING:
+        CHUNK_NAMING[tag] = {
+            'prefix': node.tag[:2],
+            'count': 0
+        }
+        logging.warning('Add CHUNK_NAMING for "%s"', tag)
+
+    naming = CHUNK_NAMING[tag]
+    naming['count'] += 1
+    name = ('%s%02d' % (naming['prefix'], naming['count']))
+    # handle parents to make names of nested tags unique
+    # TODO: we only need to prepend the parent if there are > 1 of them in the
+    #       xml
+    # while naming['parent']:
+    #     parent = naming['parent']
+    #     if parent not in CHUNK_NAMING:
+    #         break;
+    #     naming = CHUNK_NAMING[parent]
+    #     name = ('%s%02d' % (naming['prefix'], naming['count'])) + name
+    return name
+
+
+def convert(out_dir, node, out_file=None):
+    # iterate and chunk
+    # TODO: convert to HTML, need a templates for each CHUNK_TAG
+
+    for child in node:
+        print('<%s %s>' % (child.tag, child.attrib))
+        if child.tag in CHUNK_TAGS:
+            base = gen_chunk_name(child) + '.html'
+            out_filename = os.path.join(out_dir, base)
+            convert(out_dir, child, open(out_filename, 'wt'))
+        else:
+            convert(out_dir, child, out_file)
+    if out_file:
+        out_file.close()
+
+
+def main(index_file):
+    tree = etree.parse(index_file)
+    tree.xinclude()
+
+    dir_name = os.path.dirname(index_file)
+
+    # for testing: dump to output file
+    # out_file = os.path.join(dir_name, 'db2html.xml')
+    # tree.write(out_file)
+
+    # TODO: rename to 'html' later on
+    out_dir = os.path.join(dir_name, 'db2html')
+    try:
+        os.mkdir(out_dir)
+    except OSError as e:
+        if e.errno != errno.EEXIST:
+            raise
+    convert(out_dir, tree.getroot())
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description='db2html - chunk docbook')
+    parser.add_argument('sources', nargs='*')
+    options = parser.parse_args()
+    if len(options.sources) != 1:
+        sys.exit('Expect one source file argument.')
+
+    log_level = os.environ.get('GTKDOC_TRACE')
+    if log_level == '':
+        log_level = 'INFO'
+    if log_level:
+        logging.basicConfig(stream=sys.stdout,
+                            level=logging.getLevelName(log_level.upper()),
+                            format='%(asctime)s:%(filename)s:%(funcName)s:%(lineno)d:%(levelname)s:%(message)s')
+
+    sys.exit(main(options.sources[0]))
diff --git a/tools/db2md.py b/tools/db2md.py
index 8d4ad79..7a6c23f 100644
--- a/tools/db2md.py
+++ b/tools/db2md.py
@@ -96,12 +96,12 @@ def convert_block(dry_run, filename, lines, beg, end):
         # check for historic non markdown compatible chars
         if re.search(r'\s\*\w+[\s.]', line):
             logging.warning("%s:%d: leading '*' needs escaping: '%s'", filename, ix, line)
-        #if re.search(r'\s\w+\*[\s.]', line):
-        #    logging.warning("%s:%d: trailing '*' needs escaping: '%s'", filename, ix, line)
+        # if re.search(r'\s\w+\*[\s.]', line):
+        #     logging.warning("%s:%d: trailing '*' needs escaping: '%s'", filename, ix, line)
         if re.search(r'\s_\w+[\s.]', line):
             logging.warning("%s:%d: leading '_' needs escaping: '%s'", filename, ix, line)
-        #if re.search(r'\s\w+_[\s.]', line):
-        #    logging.warning("%s:%d: trailing '_' needs escaping: '%s'", filename, ix, line)
+        # if re.search(r'\s\w+_[\s.]', line):
+        #     logging.warning("%s:%d: trailing '_' needs escaping: '%s'", filename, ix, line)
 
         # look for docbook
         for m in re.finditer(r'<([^>]*)>', line):
@@ -190,6 +190,7 @@ def main(dry_run, files):
         found_docbook = found_docbook | convert_file(dry_run, f)
     return found_docbook
 
+
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(
         description='db2md - convert docbook in comment to markdown')
-- 
2.11.4.GIT