xmlmerge.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 # XML Merge 2.0.1
   5
   6 # Copyright 2008,2009  Felix Rabe  <public@felixrabe.net>
   7
   8
   9 # This file is part of XML Merge.
  10
  11 # XML Merge is free software: you can redistribute it and/or modify it
  12 # under the terms of the GNU Lesser General Public License as published by
  13 # the Free Software Foundation, either version 3 of the License, or (at
  14 # your option) any later version.
  15
  16 # XML Merge is distributed in the hope that it will be useful, but
  17 # WITHOUT ANY WARRANTY; without even the implied warranty of
  18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 # GNU Lesser General Public License for more details.
  20
  21 # You should have received a copy of the GNU Lesser General Public License
  22 # along with XML Merge.  If not, see <http://www.gnu.org/licenses/>.
  23
  24
  25 # Developed (i.e. tested) mainly on Win32 using Python 2.6.4 + lxml 2.2.2,
  26 # and to a lesser extent also with Python 2.5.4 + lxml 2.1.1.
  27
  28 """
  29 The purpose of XML Merge is to preprocess any kind of XML file with great
  30 flexibility.
  31
  32 XML Merge performs (among other things) recursive XML file inclusion and
  33 XML element and attribute modification.
  34
  35 XML Merge is a Python module. It is normally invoked as a program from the
  36 command line, but can equally well be used from within another Python
  37 program or module.
  38 """
  39
  40 __version_info__ = (2, 0, 1)
  41 __version__ = ".".join(str(n) for n in __version_info__[:2])
  42
  43 ## IMPORTS AND CONSTANTS
  44
  45 import copy
  46 import itertools
  47 import optparse
  48 import os
  49 import re
  50 import sys
  51 import textwrap
  52
  53 import lxml.etree as ET
  54
  55 # Namespace mapping (can be directly used for lxml nsmap arguments):
  56 xmns = {"xm":   "urn:felixrabe:xmlns:xmlmerge:preprocess",
  57         "xmt":  "urn:felixrabe:xmlns:xmlmerge:inctrace"}
  58
  59
  60 ## COMMAND LINE OPTION PARSING
  61
  62 class OptionParser(optparse.OptionParser):
  63
  64     def __init__(self, *a, **kw):
  65         optparse.OptionParser.__init__(self, *a, **kw)
  66         self.add_option("-i", "--input",
  67                         help=("(REQUIRED) input XML file"))
  68         self.add_option("-o", "--output",
  69                         help=("output XML file (.out.xml if not given)"))
  70         self.add_option("-s", "--xml-schema",
  71                         help=("XML Schema (.xsd) to validate output " +
  72                               "against"))
  73         self.add_option("-r", "--reference",
  74                         help=("reference XML file to compare output " +
  75                               "against"))
  76         self.add_option("-d", "--html-diff", action="store_true",
  77                         help=("only with -r; if output and reference " +
  78                               "differ, produce a HTML file showing the " +
  79                               "differences"))
  80         self.add_option("-t", "--trace-includes", action="store_true",
  81                         help=("add tracing information to included " +
  82                               "XML fragments"))
  83         self.add_option("-v", "--verbose", action="store_const",
  84                         dest="verbose", const=3,
  85                         help=("show debugging messages"))
  86         self.add_option("-q", "--quiet", action="store_const",
  87                         dest="verbose", const=1,
  88                         help=("only show error messages"))
  89         self.set_defaults(verbose=2)
  90
  91         # Explanation: levels of verbosity
  92         # --quiet   -> self.verbose == 1  # only show error messages
  93         #           -> self.verbose == 2  # no verbosity option given
  94         # --verbose -> self.verbose == 3  # show debugging messages
  95
  96     def error(self, *a, **kw):
  97         self.print_help()
  98         return optparse.OptionParser.error(self, *a, **kw)
  99
 100
 101 def parse_command_line(argv):
 102     """
 103     parse_command_line(argv) -> optparse.Values
 104
 105     Parse argv and return an optparse.Values object containing the options.
 106
 107     This function performs all the necessary checks and conversions to make
 108     sure all necessary options are given, and that all options are
 109     available in a normalized format.
 110
 111     It also tries to create the containing directory for the output file if
 112     it does not exist already.
 113     """
 114     # Parse options using OptionParser:
 115     option_parser = OptionParser()
 116     options, args = option_parser.parse_args(argv[1:])
 117
 118     # Make sure only options, and no other arguments, are passed on the
 119     # command line:
 120     try:
 121         assert args == []
 122         assert options.input is not None
 123     except:
 124         option_parser.error("Error: invalid argument list")
 125
 126     # If the output option has been omitted, build the output filename from
 127     # the input filename, resulting in the file extension ".out.xml":
 128     if options.output is None:
 129         if options.input.lower().endswith(".xml"):
 130             options.output = options.input[:-4] + ".out.xml"
 131         else:
 132             options.output = options.input      + ".out.xml"
 133
 134     # Convert all filename options to normalized absolutized pathnames:
 135     for n in "input output reference".split():
 136         if getattr(options, n) is None: continue  # if "-r" was not given
 137         setattr(options, n, os.path.abspath(getattr(options, n)))
 138
 139     # When --verbose, print all filename options:
 140     if options.verbose >= 3:
 141         print "Input:     %s" % options.input
 142         print "Output:    %s" % options.output
 143         print "Reference: %s" % options.reference
 144
 145     # Make sure there is a directory where the output XML file should go:
 146     try:
 147         os.makedirs(os.path.dirname(options.output))
 148     except:
 149         pass  # fail later if there still is no output directory now
 150
 151     return options
 152
 153
 154 ## XML PROCESSING AND COMPARISON
 155
 156 def read_input_file(input_filename):
 157     """
 158     read_input_file(input_filename) -> ET._Element
 159
 160     Read the input file, and return the corresponding XML Element object,
 161     the element tree root.
 162     """
 163     input_xml = ET.parse(input_filename).getroot()
 164     return input_xml
 165
 166 def postprocess_xml(output_xml):
 167     """
 168     postprocess_xml(output_xml) -> ET._Element
 169
 170     Remove unnecessary namespace declarations and whitespace. Returns a
 171     modified copy of output_xml. The argument may be modified by calling
 172     this function.
 173     """
 174     # Remove unused namespace declarations:
 175     # (http://codespeak.net/pipermail/lxml-dev/2009-September/004888.html)
 176     ns_root = ET.Element("NS_ROOT", nsmap=xmns)
 177     ns_root.append(output_xml)
 178     ns_root.remove(output_xml)
 179     # If you don't perform this copy, each output_xml element's
 180     # getroottree() will report the temporary tree containing the empty
 181     # NS_ROOT element. This is not a hack, this is about how lxml works.
 182     output_xml = ET.ElementTree(copy.copy(output_xml)).getroot()
 183
 184     # Make pretty-printing work by removing unnecessary whitespace:
 185     for el in output_xml.iter():
 186         if el.text and not el.text.strip():
 187             el.text = None
 188         if el.tail and not el.tail.strip():
 189             el.tail = None
 190
 191     return output_xml
 192
 193 def write_output_file(output_xml, output_filename):
 194     """
 195     Write the output XML Element to the specified output filename.
 196     """
 197     output_xmltree = output_xml.getroottree()
 198     output_xmltree.write(output_filename, pretty_print=True,
 199                          xml_declaration=True, encoding="utf-8")
 200
 201 def read_xml_schema_file(xml_schema_filename):
 202     """
 203     read_xml_schema_file(xml_schema_filename) -> ET.XMLSchema
 204
 205     Read the XML Schema file, and return the corresponding XML Schema
 206     object.
 207     """
 208     xml_schema_xmltree = ET.parse(xml_schema_filename)
 209     xml_schema = ET.XMLSchema(xml_schema_xmltree)
 210     return xml_schema
 211
 212 def match_against_schema(options, output_xml, xml_schema):
 213     """
 214     match_against_schema(options, output_xml, xml_schema) -> bool
 215
 216     Validate output against XML Schema.
 217
 218     The result is True if the output XML Element (tree) matches the XML
 219     Schema, otherwise the result is False.
 220     """
 221     is_valid = xml_schema.validate(output_xml.getroottree())
 222     if options.verbose >= 2:
 223         if is_valid:
 224             print "Output matches XML Schema."
 225         else:
 226             print "Output invalid according to XML Schema."
 227             print xml_schema.error_log.last_error
 228     return is_valid
 229
 230 def match_against_reference(options, output_xml):
 231     """
 232     match_against_reference(options, output_xml) -> bool
 233
 234     Compare the output string (read from file options.output) to the
 235     reference string (read from options.reference). If they are not the
 236     same (bytewise), and if options.html_diff is True, create an HTML file
 237     showing the differences.
 238
 239     The result is True if output and reference are the same (bytewise),
 240     otherwise the result is False.
 241     """
 242     reference_filename = options.reference
 243     output_filename = options.output
 244     do_html_diff = options.html_diff
 245
 246     reference_str = file(reference_filename, "rb").read()
 247     output_str = file(output_filename, "rb").read()
 248     is_valid = (reference_str == output_str)
 249     if options.verbose >= 2:
 250         if is_valid:
 251             print "Output matches reference."
 252         elif not do_html_diff:
 253             print "Output and reference differ."
 254     if do_html_diff and not is_valid:
 255         html_filename = "%s.diff.html" % output_filename
 256         if options.verbose >= 2:
 257             print ("Output and reference differ - " +
 258                    "generating '%s'..." % html_filename)
 259         create_reference_diff_html(html_filename, reference_str,
 260                                    output_str)
 261     return is_valid
 262
 263 def create_reference_diff_html(html_filename, reference_str, output_str):
 264     """
 265     Create an HTML file (created at html_filename) showing the differrences
 266     between the reference string and the output string side-by-side.
 267     """
 268     reference_lines = reference_str.splitlines()
 269     output_lines    = output_str   .splitlines()
 270
 271     import difflib
 272     html_diff = difflib.HtmlDiff(wrapcolumn=75)
 273     html_str = html_diff.make_file(reference_lines, output_lines,
 274                                    "Reference",     "Output")
 275     file(html_filename, "w").write(html_str)
 276
 277
 278 ## XML ERROR REPORTING
 279
 280 def print_xml_error(xml_element, code=None):
 281     print >>sys.stderr, "*** XML ERROR ***"
 282     tree = xml_element.getroottree()
 283     print >>sys.stderr, "File URL:", tree.docinfo.URL
 284     xpath = tree.getpath(xml_element)
 285     print >>sys.stderr, "Line:", xml_element.sourceline, " XPath:", xpath
 286     if code is not None:
 287         print >>sys.stderr, "Offending Python code / expression:"
 288         print >>sys.stderr, "    %s" % code.replace("\n", "\n    ")
 289
 290
 291 ## XML PREPROCESS CLASS
 292
 293 class XMLPreprocess(object):
 294     """
 295     Use:
 296
 297     >>> proc = XMLPreprocess()
 298     >>> output_xml = proc(options, input_xml)  # input_xml may change
 299     """
 300
 301     def __init__(self, initial_namespace={}):
 302         super(XMLPreprocess, self).__init__()
 303         self._namespace_stack = [initial_namespace]
 304
 305     def __call__(self, xml_element, namespace=None,
 306                  trace_includes=False, xml_filename=None):
 307         """
 308         XMLPreprocess()(...)
 309
 310         Preprocess the input XML Element, xml_element. The element tree of
 311         xml_element will be modified in-place.
 312
 313         The namespace given should be a dict that can be used as a Python
 314         namespace. This namespace will be used in XML attribute
 315         substitution.
 316
 317         If trace_includes is True, the output will contain tags that
 318         surround included sections of the file. The xml_filename argument
 319         is then required.
 320
 321         Processing tags will recursively call this method (__call__) for
 322         preprocessing the included file and for recursive inclusion.
 323         """
 324         if namespace is not None:
 325             self._namespace_stack.append(namespace)
 326         self.namespace = self._namespace_stack[-1]
 327         self.trace_includes = trace_includes
 328         self.xml_filename = xml_filename
 329
 330         ns = "{%s}" % xmns["xm"]
 331         len_ns = len(ns)
 332
 333         # Evaluate Python expressions in the attributes of xml_element:
 334         for attr_name, attr_value in xml_element.items():  # attr map
 335             v = self._eval_substitution(attr_value, xml_element)
 336             xml_element.set(attr_name, v)
 337
 338         # If xml_element has xmns["xm"] as its namespace, proceed with the
 339         # appropriate method of this class:
 340         if xml_element.nsmap.get(xml_element.prefix) == xmns["xm"]:
 341             tag = xml_element.tag[len_ns:]  # just the tag without namespc
 342             method = "_xm_" + tag.lower()  # tolerate any case
 343             if not hasattr(self, method):
 344                 raise Exception, "cannot process <xm:%s/>" % tag
 345             getattr(self, method)(xml_element)  # call the method
 346             # Preserve tail text:
 347             tail = xml_element.tail
 348             if tail is not None:
 349                 prev = xml_element.getprevious()
 350                 parent = xml_element.getparent()
 351                 if prev is not None:
 352                     prev.tail = (prev.tail or "") + tail
 353                 else:
 354                     parent.text = (parent.text or "") + tail
 355             xml_element.getparent().remove(xml_element)
 356
 357         # If not, recurse:
 358         else:
 359             self._recurse_into(xml_element)
 360
 361         return None
 362
 363     def _recurse_into(self, xml_element, namespace=None):
 364         if namespace is not None:
 365             self._namespace_stack.append(namespace)
 366         for xml_sub_element in xml_element.xpath("*"):
 367             self(xml_sub_element, None,
 368                  self.trace_includes, self.xml_filename)
 369         if namespace is not None:
 370             self._namespace_stack.pop()
 371             self.namespace = self._namespace_stack[-1]
 372
 373     _eval_substitution_regex = re.compile(r"\{(.*?)\}")
 374
 375     def _eval_substitution(self, string, xml_element=None):
 376         """
 377         Evaluate Python expressions within strings.
 378
 379         Internal method to perform substitution of Python expressions
 380         within attribute values, {x} -> str(eval(x)).  Example:
 381
 382         >>> self._eval_substitution("3 + 5 = {3 + 5} in Python")
 383         '3 + 5 = 8 in Python'
 384
 385         Multiple Python expressions in one string are supported as well.
 386         """
 387         new_str = []  # faster than continuously concatenating strings
 388         last_index = 0
 389         for match in self._eval_substitution_regex.finditer(string):
 390             new_str.append(string[last_index:match.start()])
 391             expression = match.group(1)
 392             try:
 393                 result = str(eval(expression, self.namespace))
 394             except:
 395                 if xml_element is not None:
 396                     print_xml_error(xml_element, code=expression)
 397                     print >>sys.stderr
 398                 raise
 399             new_str.append(result)
 400             last_index = match.end()
 401         new_str.append(string[last_index:])
 402         return "".join(new_str)
 403
 404     def _xm_addelements(self, xml_element):
 405         """
 406         Add subelements to, before, or after the element selected by XPath
 407         (@to, @before or @after).
 408
 409         Exactly one of (@to, @before, @after) must be specified.  And the
 410         XPath expression must return exactly one element.  These conditions
 411         are checked by assertions and will raise an exception if not met.
 412         """
 413         to     = xml_element.get("to")
 414         before = xml_element.get("before")
 415         after  = xml_element.get("after")
 416
 417         assert sum((to is None, before is None, after is None)) == 2
 418         select = to or before or after
 419
 420         selected_context_nodes = xml_element.xpath(select)
 421         assert len(selected_context_nodes) == 1
 422
 423         context_node = selected_context_nodes[0]
 424         replace_context_node = False
 425
 426         if to is not None:
 427             f = "append"
 428         if before is not None:
 429             f = "addprevious"
 430         if after is not None:
 431             f = "addnext"
 432             replace_context_node = True
 433
 434         for xml_sub_element in xml_element:
 435             getattr(context_node, f)(xml_sub_element)
 436             if replace_context_node:
 437                 context_node = xml_sub_element
 438
 439     def _xm_block(self, xml_element):
 440         """
 441         Create a scope to contain visibility of newly assigned Python
 442         variables.  This works the same way that Python itself scopes
 443         variables, i.e. by creating a shallow copy of the Python namespace.
 444         E.g. assignments to list items will be visible to outside scopes!
 445         """
 446         self._recurse_into(xml_element, self.namespace.copy())
 447         for xml_sub_node in xml_element[::-1]:  # get children reversed
 448             xml_element.addnext(xml_sub_node)
 449
 450     def _xm_comment(self, xml_element):
 451         """
 452         A comment that is removed by XML Merge.
 453         """
 454         pass  # that's it
 455
 456     def _xm_defaultvar(self, xml_element):
 457         """
 458         Set (zero or more) variables in the active Python namespace, if not
 459         already set.
 460         """
 461         ns = self.namespace
 462         for attr_name, attr_value in xml_element.items():  # attr map
 463             if not attr_name in ns:
 464                 try:
 465                     ns[attr_name] = eval(attr_value, ns)
 466                 except:
 467                     print_xml_error(xml_element, code=attr_value)
 468                     print >>sys.stderr
 469                     raise
 470
 471     def _xm_include(self, xml_element):
 472         """
 473         Include from the specified file (@file) the elements selected by
 474         XPath (@select) after preprocessing said file.
 475
 476         The @file attribute is the only required attribute.
 477
 478         Items can be imported from the included (and preprocessed) file's
 479         Python namespace into the current file's namespace using the
 480         @import attribute, which may either be a comma-separated list of
 481         identifiers, or '*' to import the complete namespace.
 482
 483         Remaining attributes will be treated as variable assignments and
 484         put in the Python namespace used for processing the included file.
 485         """
 486         attrib = xml_element.attrib
 487         file_   = attrib.pop("file", None)
 488         select  = attrib.pop("select", None)
 489         import_ = attrib.pop("import", None)
 490         assert file_ is not None
 491         remaining_attribs = dict(attrib.items())
 492
 493         # Load the to-be-included file:
 494         p = os.path
 495
 496         xml_input_dirname = p.dirname(self.xml_filename)
 497         xml_incl_filename = p.join(xml_input_dirname, file_)
 498         xml_incl_filename = p.normpath(xml_incl_filename)
 499         # Always use '/' for normalized tracing information:
 500         xml_incl_filename = xml_incl_filename.replace("\\", "/")
 501
 502         xml_incl = ET.parse(xml_incl_filename).getroot()
 503
 504         # Build the initial namespace from a copy of the current namespace
 505         # plus the remaining attributes of the <xm:Include/> element:
 506         current_ns = self.namespace
 507         initial_namespace = current_ns.copy()
 508         for attr_name, attr_value in remaining_attribs.items():  # attr map
 509             try:
 510                 initial_namespace[attr_name] = eval(attr_value, current_ns)
 511             except:
 512                 print_xml_error(xml_element, code=attr_value)
 513                 print >>sys.stderr
 514                 raise
 515
 516         # Preprocess the to-be-included file:
 517         proc = XMLPreprocess(initial_namespace=initial_namespace)
 518         proc(xml_incl, trace_includes=self.trace_includes,
 519              xml_filename=xml_incl_filename)
 520
 521         # Select elements to include:
 522         included_elements = []
 523         if select is not None:
 524             included_elements = xml_incl.xpath(select)
 525
 526         # Include the elements:
 527         context_node = xml_element
 528         for inc_elem in included_elements:
 529             context_node.addnext(inc_elem)
 530             context_node = inc_elem
 531
 532         # Import from included namespace:
 533         imported_namespace = {}
 534         if import_ is not None:
 535             import_ = [x.strip() for x in import_.split(",")]
 536             if "*" in import_:  # import all
 537                 imported_namespace = proc.namespace
 538             else:
 539                 ns = proc.namespace
 540                 imported_namespace = dict((x, ns[x]) for x in import_)
 541         self.namespace.update(imported_namespace)
 542
 543     def _xm_loop(self, xml_element):
 544         """
 545         Loop over a range of integer values.
 546
 547         The first attribute is evaluated as the loop counter.  Example:
 548
 549             i="range(5, 9)"  =>  iterates with i being 5, 6, 7, 8
 550
 551         WARNING: The loop counter attribute, as well as all substitutions
 552         in subelement attributes (XPath ".//@*": "...{foo_bar}...") will
 553         (wholly or partially) be evaluated as Python expressions using
 554         eval().
 555         """
 556         # Get the loop counter name and list:
 557         loop_counter_name = xml_element.keys()[0]
 558         loop_counter_expr = xml_element.get(loop_counter_name)
 559         try:
 560             loop_counter_list = eval(loop_counter_expr, self.namespace)
 561         except:
 562             print_xml_error(xml_element, code=loop_counter_expr)
 563             print >>sys.stderr
 564             raise
 565
 566         # Loop:
 567         context_node = xml_element  # for new elements
 568         for loop_counter_value in loop_counter_list:
 569             self.namespace[loop_counter_name] = loop_counter_value
 570             tailtext = xml_element.tail
 571             xml_element.tail = None  # xml_element regarded as document
 572             # xml_element_copy = copy.copy(xml_element)  # CRASH
 573             # The following line is the workaround for the preceeding one:
 574             xml_element_copy = ET.XML(ET.tostring(xml_element))
 575             xml_element.addnext(xml_element_copy)  # temporarily
 576             xml_element.tail = xml_element_copy.tail = tailtext
 577             self._recurse_into(xml_element_copy)
 578             xml_element_copy.getparent().remove(xml_element_copy)
 579             if xml_element_copy.text is not None:
 580                 if context_node.tail is None:
 581                     context_node.tail = u""
 582                 context_node.tail += xml_element_copy.text
 583             for xml_sub_node in xml_element_copy[:]:
 584                 context_node.addnext(xml_sub_node)
 585                 context_node = xml_sub_node
 586
 587     def _xm_pythoncode(self, xml_element):
 588         """
 589         Execute Python code in the current namespace.
 590
 591         'self' and 'xml_element' are supplied temporarily. They are added
 592         to the current namespace before the 'exec' statement, and removed
 593         again afterwards.
 594         """
 595         code = textwrap.dedent(xml_element.text).strip()
 596         self.namespace["self"] = self
 597         self.namespace["xml_element"] = xml_element
 598         try:
 599             exec code in self.namespace
 600         except:
 601             print_xml_error(xml_element, code=code)
 602             print >>sys.stderr
 603             raise
 604         del self.namespace["self"], self.namespace["xml_element"]
 605
 606     def _xm_removeattributes(self, xml_element):
 607         """
 608         Remove the attributes (@name) from the (zero or more) elements
 609         selected by XPath (@from or @select).
 610
 611         It is not considered an error if an attribute cannot be found on a
 612         selected element.
 613         """
 614         attr_name = xml_element.get("name")
 615         select_xpath = xml_element.get("from") or xml_element.get("select")
 616         for xml_element_selected in xml_element.xpath(select_xpath):
 617             # Can't find another way to remove an attribute than by using
 618             # 'attrib':
 619             attrib = xml_element_selected.attrib
 620             if attr_name in attrib:
 621                 del xml_element_selected.attrib[attr_name]
 622
 623     def _xm_removeelements(self, xml_element):
 624         """
 625         Remove (zero or more) elements selected by XPath (@select).
 626         """
 627         select = xml_element.get("select")
 628         assert select is not None
 629         elements = xml_element.xpath(select)
 630         for el in elements:
 631             el.getparent().remove(el)
 632
 633     def _xm_setattribute(self, xml_element):
 634         """
 635         Assign the value (@value) to the attribute (@name) of the element
 636         selected by XPath (@of or @select).
 637
 638         Example:
 639             <Object index="0x1234"/>
 640             <xm:SetAttribute of="../Object" name="otherattr" value="hallo"/>
 641
 642         Leads to:
 643             <Object index="0x1234" otherattr="hello"/>
 644         """
 645         select  = xml_element.get("select", xml_element.get("of"))
 646         name    = xml_element.get("name")
 647         value   = xml_element.get("value")
 648         assert sum((select is None, name is None, value is None)) == 0
 649         elements = xml_element.xpath(select)
 650         for el in elements:
 651             el.set(name, value)
 652
 653     def _xm_text(self, xml_element):
 654         """
 655         Perform '{}' substitution on text.
 656         """
 657         text = xml_element.text
 658         if text is None: return
 659         tail = self._eval_substitution(text, xml_element)
 660         tail += xml_element.tail or ""
 661         xml_element.tail = tail
 662
 663     def _xm_var(self, xml_element):
 664         """
 665         Set (zero or more) variables in the active Python namespace.
 666         """
 667         ns = self.namespace
 668         for attr_name, attr_value in xml_element.items():  # attr map
 669             try:
 670                 ns[attr_name] = eval(attr_value, ns)
 671             except:
 672                 print_xml_error(xml_element, code=attr_value)
 673                 print >>sys.stderr
 674                 raise
 675
 676
 677 ## MAIN FUNCTION
 678
 679 def main(argv, **kargs):
 680     """
 681     main(argv, **kargs) -> int
 682
 683     Process input to produce output according to the command line options
 684     (given in argv).  These keyword arguments (**kargs) are recognized:
 685
 686     initial_namespace
 687       Gets passed on as the initial Python namespace to XMLPreprocess().
 688
 689     After the XML Merge Manual, this is the first piece of the code a new
 690     developer will read. Keep this code as simple as possible if you change
 691     it in any way.
 692
 693     These are all possible exit status codes returned or raised (using
 694     SystemExit) by main or the functions it calls:
 695         - On success, and if all requested validations (-s, -r) match:
 696             return 0
 697         - On error, e.g. wrong options (see parse_command_line()):
 698             return 1
 699         - On mismatch (either XML Schema (-s) or reference (-r)):
 700             return mismatch_bitmap  # see end of main()
 701         - To aid understanding the bitmap: If N matching functions are
 702           provided, and all are requested and all fail to match the output
 703           file:
 704             return (2 ** N - 1) * 2  # mismatch_bitmap
 705     """
 706     # Parse command line to get options:
 707     options = parse_command_line(argv)
 708
 709     # Input file => preprocessing => output file:
 710     xml = read_input_file(options.input)
 711     proc = XMLPreprocess(**kargs)
 712     proc(xml, trace_includes=options.trace_includes,
 713          xml_filename=options.input)
 714     xml = postprocess_xml(xml)
 715     write_output_file(xml, options.output)
 716
 717     # If -s: Compare output to XML Schema file:
 718     matches_schema = True  # False means: match requested and negative
 719     if options.xml_schema is not None:
 720         xml_schema = read_xml_schema_file(options.xml_schema)
 721         matches_schema = match_against_schema(options, xml, xml_schema)
 722
 723     # If -r: Compare output to reference:
 724     matches_reference = True  # False means: match requested and negative
 725     if options.reference is not None:
 726         matches_reference = match_against_reference(options, xml)
 727
 728     # Calculate and return the mismatch bitmap:
 729     mismatch_bitmap = 0
 730     mismatch_bitmap |= int(not matches_schema)    << 1  # 2 on mismatch
 731     mismatch_bitmap |= int(not matches_reference) << 2  # 4 on mismatch
 732     return mismatch_bitmap
 733
 734
 735 if __name__ == "__main__":
 736     sys.exit(main(sys.argv))