bin/SConsDoc.py

   1 #!/usr/bin/env python
   2 #
   3 # SPDX-FileCopyrightText: Copyright The SCons Foundation (https://scons.org)
   4 # SPDX-License-Identifier: MIT
   5
   6 """Module for handling SCons documentation processing.
   7
   8 This module parses home-brew XML files that document important SCons
   9 components.  Currently it handles Builders, Environment functions/methods,
  10 Construction Variables, and Tools (further expansion is possible). These
  11 documentation snippets are turned into files with content and reference
  12 tags that can be included into the manpage and/or user guide, which
  13 prevents a lot of duplication.
  14
  15 In general, you can use any DocBook tag in the input, and this module
  16 just adds processing various home-brew tags to try to make life a
  17 little easier.
  18
  19 Builder example:
  20
  21     <builder name="BUILDER">
  22     <summary>
  23     <para>This is the summary description of an SCons Builder.
  24     It will get placed in the man page,
  25     and in the appropriate User's Guide appendix.
  26     The name of this builder may be interpolated
  27     anywhere in the document by specifying the
  28     &b-BUILDER; element. A link to this definition may be
  29     interpolated by specifying the &b-link-BUILDER; element.
  30     </para>
  31
  32     Unlike normal XML, blank lines are significant in these
  33     descriptions and serve to separate paragraphs.
  34     They'll get replaced in DocBook output with appropriate tags
  35     to indicate a new paragraph.
  36
  37     <example>
  38     print("this is example code, it will be offset and indented")
  39     </example>
  40     </summary>
  41     </builder>
  42
  43 Function example:
  44
  45     <scons_function name="FUNCTION">
  46     <arguments signature="SIGTYPE">
  47     (arg1, arg2, key=value)
  48     </arguments>
  49     <summary>
  50     <para>This is the summary description of an SCons function.
  51     It will get placed in the man page,
  52     and in the appropriate User's Guide appendix.
  53     If the "signature" attribute is specified, SIGTYPE may be one
  54     of "global", "env" or "both" (the default if omitted is "both"),
  55     to indicate the signature applies to the global form or the
  56     environment form, or to generate both with the same signature
  57     (excepting the insertion of "env.").
  58     This allows for the cases of
  59     describing that only one signature should be generated,
  60     or both signatures should be generated and they differ,
  61     or both signatures should be generated and they are the same.
  62     The name of this function may be interpolated
  63     anywhere in the document by specifying the
  64     &f-FUNCTION; element or the &f-env-FUNCTION; element.
  65     Links to this definition may be interpolated by specifying
  66     the &f-link-FUNCTION: or &f-link-env-FUNCTION; element.
  67     </para>
  68
  69     <example>
  70     print("this is example code, it will be offset and indented")
  71     </example>
  72     </summary>
  73     </scons_function>
  74
  75 Construction variable example:
  76
  77     <cvar name="VARIABLE">
  78     <summary>
  79     <para>This is the summary description of a construction variable.
  80     It will get placed in the man page,
  81     and in the appropriate User's Guide appendix.
  82     The name of this construction variable may be interpolated
  83     anywhere in the document by specifying the
  84     &cv-VARIABLE; element. A link to this definition may be
  85     interpolated by specifying the &cv-link-VARIABLE; element.
  86     </para>
  87
  88     <example>
  89     print("this is example code, it will be offset and indented")
  90     </example>
  91     </summary>
  92     </cvar>
  93
  94 Tool example:
  95
  96     <tool name="TOOL">
  97     <summary>
  98     <para>This is the summary description of an SCons Tool.
  99     It will get placed in the man page,
 100     and in the appropriate User's Guide appendix.
 101     The name of this tool may be interpolated
 102     anywhere in the document by specifying the
 103     &t-TOOL; element. A link to this definition may be
 104     interpolated by specifying the &t-link-TOOL; element.
 105     </para>
 106
 107     <example>
 108     print("this is example code, it will be offset and indented")
 109     </example>
 110     </summary>
 111     </tool>
 112 """
 113
 114 import os.path
 115 import re
 116 import sys
 117 import copy
 118 import importlib
 119
 120 try:
 121     from lxml import etree
 122 except ImportError:
 123     try:
 124         import xml.etree.ElementTree as etree
 125     except ImportError:
 126         raise ImportError("Failed to import ElementTree from any known place")
 127
 128 # patterns to help trim XML passed in as strings
 129 re_entity = re.compile(r"&([^;]+);")
 130 re_entity_header = re.compile(r"<!DOCTYPE\s+sconsdoc\s+[^\]]+\]>")
 131
 132 # Namespace for the SCons Docbook XSD
 133 dbxsd = "http://www.scons.org/dbxsd/v1.0"
 134 # Namsespace pattern to help identify an scons-xml file read as  bytes
 135 dbxsdpat = b'xmlns="%s"' % dbxsd.encode('utf-8')
 136 # Namespace map identifier for the SCons Docbook XSD
 137 dbxid = "dbx"
 138 # Namespace for schema instances
 139 xsi = "http://www.w3.org/2001/XMLSchema-instance"
 140
 141 # Header comment with copyright (unused at present)
 142 copyright_comment = """
 143 SPDX-FileCopyrightText: Copyright The SCons Foundation (https://scons.org)
 144 SPDX-License-Identifier: MIT
 145 SPDX-FileType: DOCUMENTATION
 146
 147 This file is processed by the bin/SConsDoc.py module.
 148 """
 149
 150 def isSConsXml(fpath):
 151     """ Check whether the given file is an SCons XML file.
 152
 153     It is SCons XML if it contains the default target namespace definition
 154     described by dbxsdpat
 155
 156     """
 157     try:
 158         with open(fpath, 'rb') as f:
 159             content = f.read()
 160         if content.find(dbxsdpat) >= 0:
 161             return True
 162     except Exception:
 163         pass
 164
 165     return False
 166
 167 def remove_entities(content):
 168     # Cut out entity inclusions
 169     content = re_entity_header.sub("", content, re.M)
 170     # Cut out entities themselves
 171     content = re_entity.sub(lambda match: match.group(1), content)
 172
 173     return content
 174
 175 default_xsd = os.path.join('doc', 'xsd', 'scons.xsd')
 176
 177 ARG = "dbscons"
 178
 179
 180 class Libxml2ValidityHandler:
 181
 182     def __init__(self):
 183         self.errors = []
 184         self.warnings = []
 185
 186     def error(self, msg, data):
 187         if data != ARG:
 188             raise Exception("Error handler did not receive correct argument")
 189         self.errors.append(msg)
 190
 191     def warning(self, msg, data):
 192         if data != ARG:
 193             raise Exception("Warning handler did not receive correct argument")
 194         self.warnings.append(msg)
 195
 196
 197 class DoctypeEntity:
 198     def __init__(self, name_, uri_):
 199         self.name = name_
 200         self.uri = uri_
 201
 202     def getEntityString(self):
 203         txt = """    <!ENTITY %(perc)s %(name)s SYSTEM "%(uri)s">
 204     %(perc)s%(name)s;
 205 """ % {'perc': perc, 'name': self.name, 'uri': self.uri}
 206
 207         return txt
 208
 209
 210 class DoctypeDeclaration:
 211     def __init__(self, name_=None):
 212         self.name = name_
 213         self.entries = []
 214         if self.name is None:
 215             # Add default entries
 216             self.name = "sconsdoc"
 217             self.addEntity("scons", "../scons.mod")
 218             self.addEntity("builders-mod", "builders.mod")
 219             self.addEntity("functions-mod", "functions.mod")
 220             self.addEntity("tools-mod", "tools.mod")
 221             self.addEntity("variables-mod", "variables.mod")
 222
 223     def addEntity(self, name, uri):
 224         self.entries.append(DoctypeEntity(name, uri))
 225
 226     def createDoctype(self):
 227         content = '<!DOCTYPE %s [\n' % self.name
 228         for e in self.entries:
 229             content += e.getEntityString()
 230         content += ']>\n'
 231
 232         return content
 233
 234 class TreeFactory:
 235     def __init__(self):
 236         pass
 237
 238     @staticmethod
 239     def newNode(tag, **kwargs):
 240         return etree.Element(tag, **kwargs)
 241
 242     @staticmethod
 243     def newSubNode(parent, tag, **kwargs):
 244         return etree.SubElement(parent, tag, **kwargs)
 245
 246     @staticmethod
 247     def newEtreeNode(tag, init_ns=False, **kwargs):
 248         if init_ns:
 249             NSMAP = {None: dbxsd,
 250                      'xsi' : xsi}
 251             return etree.Element(tag, nsmap=NSMAP, **kwargs)
 252
 253         return etree.Element(tag, **kwargs)
 254
 255     @staticmethod
 256     def copyNode(node):
 257         return copy.deepcopy(node)
 258
 259     @staticmethod
 260     def appendNode(parent, child):
 261         parent.append(child)
 262
 263     @staticmethod
 264     def hasAttribute(node, att):
 265         return att in node.attrib
 266
 267     @staticmethod
 268     def getAttribute(node, att):
 269         return node.attrib[att]
 270
 271     @staticmethod
 272     def setAttribute(node, att, value):
 273         node.attrib[att] = value
 274
 275     @staticmethod
 276     def getText(root):
 277         return root.text
 278
 279     @staticmethod
 280     def appendCvLink(root, key, lntail):
 281         linknode = etree.Entity('cv-link-' + key)
 282         linknode.tail = lntail
 283         root.append(linknode)
 284
 285     @staticmethod
 286     def setText(root, txt):
 287         root.text = txt
 288
 289     @staticmethod
 290     def getTail(root):
 291         return root.tail
 292
 293     @staticmethod
 294     def setTail(root, txt):
 295         root.tail = txt
 296
 297     @staticmethod
 298     def writeGenTree(root, fp):
 299         dt = DoctypeDeclaration()
 300         fp.write(etree.tostring(root, encoding="utf-8",
 301                                 pretty_print=True,
 302                                 doctype=dt.createDoctype()).decode('utf-8'))
 303
 304     @staticmethod
 305     def writeTree(root, fpath):
 306         with open(fpath, 'wb') as fp:
 307             fp.write(etree.tostring(root, encoding="utf-8",
 308                                     pretty_print=True))
 309
 310     @staticmethod
 311     def prettyPrintFile(fpath):
 312         with open(fpath,'rb') as fin:
 313             tree = etree.parse(fin)
 314             pretty_content = etree.tostring(tree, encoding="utf-8",
 315                                             pretty_print=True)
 316
 317         with open(fpath,'wb') as fout:
 318             fout.write(pretty_content)
 319
 320     @staticmethod
 321     def decorateWithHeader(root):
 322         root.attrib["{"+xsi+"}schemaLocation"] = "%s %s/scons.xsd" % (dbxsd, dbxsd)
 323         return root
 324
 325     def newXmlTree(self, root):
 326         """ Return a XML file tree with the correct namespaces set,
 327             the element root as top entry and the given header comment.
 328         """
 329         NSMAP = {None: dbxsd, 'xsi' : xsi}
 330         t = etree.Element(root, nsmap=NSMAP)
 331         return self.decorateWithHeader(t)
 332
 333     # singleton to cache parsed xmlschema..
 334     xmlschema = None
 335
 336     @staticmethod
 337     def validateXml(fpath, xmlschema_context):
 338
 339         if TreeFactory.xmlschema is None:
 340             TreeFactory.xmlschema = etree.XMLSchema(xmlschema_context)
 341         try:
 342             doc = etree.parse(fpath)
 343         except Exception as e:
 344             print("ERROR: %s fails to parse:"%fpath)
 345             print(e)
 346             return False
 347         doc.xinclude()
 348         try:
 349             TreeFactory.xmlschema.assertValid(doc)
 350         except etree.XMLSchemaValidateError as e:
 351             print("ERROR: %s fails to validate:" % fpath)
 352             print(e)
 353             print(e.error_log.last_error.message)
 354             print("In file: [%s]" % e.error_log.last_error.filename)
 355             print("Line   : %d" % e.error_log.last_error.line)
 356             return False
 357
 358         except Exception as e:
 359             print("ERROR: %s fails to validate:" % fpath)
 360             print(e)
 361
 362             return False
 363         return True
 364
 365     @staticmethod
 366     def findAll(root, tag, ns=None, xp_ctxt=None, nsmap=None):
 367         expression = ".//{%s}%s" % (nsmap[ns], tag)
 368         if not ns or not nsmap:
 369             expression = ".//%s" % tag
 370         return root.findall(expression)
 371
 372     @staticmethod
 373     def findAllChildrenOf(root, tag, ns=None, xp_ctxt=None, nsmap=None):
 374         expression = "./{%s}%s/*" % (nsmap[ns], tag)
 375         if not ns or not nsmap:
 376             expression = "./%s/*" % tag
 377         return root.findall(expression)
 378
 379     @staticmethod
 380     def convertElementTree(root):
 381         """ Convert the given tree of etree.Element
 382             entries to a list of tree nodes for the
 383             current XML toolkit.
 384         """
 385         return [root]
 386
 387 tf = TreeFactory()
 388
 389
 390 class SConsDocTree:
 391     def __init__(self):
 392         self.nsmap = {'dbx': dbxsd}
 393         self.doc = None
 394         self.root = None
 395         self.xpath_context = None
 396
 397     def parseContent(self, content, include_entities=True):
 398         """ Parses the given text content as XML
 399
 400         This is the setup portion, called from parseContent in
 401         an SConsDocHandler instance - see the notes there.
 402         """
 403         if not include_entities:
 404             content = remove_entities(content)
 405         # Create domtree from given content string
 406         self.root = etree.fromstring(content)
 407
 408     def parseXmlFile(self, fpath):
 409         # Create domtree from file
 410         parser = etree.XMLParser(load_dtd=True, resolve_entities=False)
 411         domtree = etree.parse(fpath, parser)
 412         self.root = domtree.getroot()
 413
 414     def __del__(self):
 415         if self.doc is not None:
 416             self.doc.freeDoc()
 417         if self.xpath_context is not None:
 418             self.xpath_context.xpathFreeContext()
 419
 420 perc = "%"
 421
 422 def validate_all_xml(dpaths, xsdfile=default_xsd):
 423     xmlschema_context = etree.parse(xsdfile)
 424
 425     fpaths = []
 426     for dp in dpaths:
 427         if dp.endswith('.xml') and isSConsXml(dp):
 428             path = '.'
 429             fpaths.append(dp)
 430         else:
 431             for path, dirs, files in os.walk(dp):
 432                 for f in files:
 433                     if f.endswith('.xml'):
 434                         fp = os.path.join(path, f)
 435                         if isSConsXml(fp):
 436                             fpaths.append(fp)
 437
 438     fails = []
 439     fpaths = sorted(fpaths)
 440     for idx, fp in enumerate(fpaths):
 441         fpath = os.path.join(path, fp)
 442         print("%.2f%s (%d/%d) %s" % (float(idx + 1) * 100.0 /float(len(fpaths)),
 443                                      perc, idx + 1, len(fpaths), fp))
 444
 445         if not tf.validateXml(fp, xmlschema_context):
 446             fails.append(fp)
 447             continue
 448
 449     if fails:
 450         return False
 451
 452     return True
 453
 454
 455 class Item:
 456     def __init__(self, name):
 457         self.name = name
 458         self.sort_name = name.lower()
 459         if self.sort_name[0] == '_':
 460             self.sort_name = self.sort_name[1:]
 461         self.sets = []
 462         self.uses = []
 463         self.summary = None
 464         self.arguments = None
 465     def cmp_name(self, name):
 466         if name[0] == '_':
 467             name = name[1:]
 468         return name.lower()
 469     def __eq__(self, other):
 470         return self.sort_name == other.sort_name
 471     def __lt__(self, other):
 472         return self.sort_name < other.sort_name
 473
 474
 475 class Builder(Item):
 476     pass
 477
 478
 479 class Function(Item):
 480     pass
 481
 482
 483 class Tool(Item):
 484     def __init__(self, name):
 485         super().__init__(name)
 486         self.entity = self.name.replace('+', 'X')
 487
 488
 489 class ConstructionVariable(Item):
 490     pass
 491
 492
 493 class Arguments:
 494     def __init__(self, signature, body=None):
 495         if not body:
 496             body = []
 497         self.body = body
 498         self.signature = signature
 499     def __str__(self):
 500         s = ''.join(self.body).strip()
 501         result = []
 502         for m in re.findall(r'([a-zA-Z/_]+|[^a-zA-Z/_]+)', s):
 503             if ' ' in m:
 504                 m = '"%s"' % m
 505             result.append(m)
 506         return ' '.join(result)
 507     def append(self, data):
 508         self.body.append(data)
 509
 510
 511 class SConsDocHandler:
 512     def __init__(self):
 513         self.builders = {}
 514         self.functions = {}
 515         self.tools = {}
 516         self.cvars = {}
 517
 518     def parseItems(self, domelem, xpath_context, nsmap):
 519         items = []
 520
 521         for i in tf.findAll(domelem, "item", dbxid, xpath_context, nsmap):
 522             txt = tf.getText(i)
 523             if txt is not None:
 524                 txt = txt.strip()
 525                 if len(txt):
 526                     items.append(txt.strip())
 527
 528         return items
 529
 530     def parseUsesSets(self, domelem, xpath_context, nsmap):
 531         uses = []
 532         sets = []
 533
 534         for u in tf.findAll(domelem, "uses", dbxid, xpath_context, nsmap):
 535             uses.extend(self.parseItems(u, xpath_context, nsmap))
 536         for s in tf.findAll(domelem, "sets", dbxid, xpath_context, nsmap):
 537             sets.extend(self.parseItems(s, xpath_context, nsmap))
 538
 539         return sorted(uses), sorted(sets)
 540
 541     def parseInstance(self, domelem, map, Class,
 542                         xpath_context, nsmap, include_entities=True):
 543         name = 'unknown'
 544         if tf.hasAttribute(domelem, 'name'):
 545             name = tf.getAttribute(domelem, 'name')
 546         try:
 547             instance = map[name]
 548         except KeyError:
 549             instance = Class(name)
 550             map[name] = instance
 551         uses, sets = self.parseUsesSets(domelem, xpath_context, nsmap)
 552         instance.uses.extend(uses)
 553         instance.sets.extend(sets)
 554         if include_entities:
 555             # Parse summary and function arguments
 556             for s in tf.findAllChildrenOf(domelem, "summary", dbxid, xpath_context, nsmap):
 557                 if instance.summary is None:
 558                     instance.summary = []
 559                 instance.summary.append(tf.copyNode(s))
 560             for a in tf.findAll(domelem, "arguments", dbxid, xpath_context, nsmap):
 561                 if instance.arguments is None:
 562                     instance.arguments = []
 563                 instance.arguments.append(tf.copyNode(a))
 564
 565     def parseDomtree(self, root, xpath_context=None, nsmap=None, include_entities=True):
 566         # Process Builders
 567         for b in tf.findAll(root, "builder", dbxid, xpath_context, nsmap):
 568             self.parseInstance(b, self.builders, Builder,
 569                                xpath_context, nsmap, include_entities)
 570         # Process Functions
 571         for f in tf.findAll(root, "scons_function", dbxid, xpath_context, nsmap):
 572             self.parseInstance(f, self.functions, Function,
 573                                xpath_context, nsmap, include_entities)
 574         # Process Tools
 575         for t in tf.findAll(root, "tool", dbxid, xpath_context, nsmap):
 576             self.parseInstance(t, self.tools, Tool,
 577                                xpath_context, nsmap, include_entities)
 578         # Process CVars
 579         for c in tf.findAll(root, "cvar", dbxid, xpath_context, nsmap):
 580             self.parseInstance(c, self.cvars, ConstructionVariable,
 581                                xpath_context, nsmap, include_entities)
 582
 583     def parseContent(self, content, include_entities=True):
 584         """Parse the given content as XML.
 585
 586         This method is used when we generate the basic lists of entities
 587         for the builders, tools and functions.  So we usually don't
 588         bother about namespaces and resolving entities here...
 589         this is handled in parseXmlFile below (step 2 of the overall process).
 590         """
 591         # Create doctree
 592         t = SConsDocTree()
 593         t.parseContent(content, include_entities)
 594         # Parse it
 595         self.parseDomtree(t.root, t.xpath_context, t.nsmap, include_entities)
 596
 597     def parseXmlFile(self, fpath):
 598         # Create doctree
 599         t = SConsDocTree()
 600         t.parseXmlFile(fpath)
 601         # Parse it
 602         self.parseDomtree(t.root, t.xpath_context, t.nsmap)
 603
 604 def importfile(path):
 605     """Import a Python source file or compiled file given its path."""
 606     from importlib.util import MAGIC_NUMBER
 607     with open(path, 'rb') as ifp:
 608         is_bytecode = MAGIC_NUMBER == ifp.read(len(MAGIC_NUMBER))
 609     filename = os.path.basename(path)
 610     name, ext = os.path.splitext(filename)
 611     if is_bytecode:
 612         loader = importlib._bootstrap_external.SourcelessFileLoader(name, path)
 613     else:
 614         loader = importlib._bootstrap_external.SourceFileLoader(name, path)
 615     # XXX We probably don't need to pass in the loader here.
 616     spec = importlib.util.spec_from_file_location(name, path, loader=loader)
 617     try:
 618         return importlib._bootstrap._load(spec)
 619     except ImportError:
 620         raise Exception(path, sys.exc_info())
 621
 622 # Local Variables:
 623 # tab-width:4
 624 # indent-tabs-mode:nil
 625 # End:
 626 # vim: set expandtab tabstop=4 shiftwidth=4: