xmlmerge.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 # XML Merge 2.0.1.git
   5
   6 # Copyright 2008,2009  Felix Rabe  <public@felixrabe.net>
   7
   8 # The main() function (search "def main") is a good starting point for
   9 # understanding the code.
  10
  11
  12 # This file is part of XML Merge.
  13
  14 # XML Merge is free software: you can redistribute it and/or modify it
  15 # under the terms of the GNU Lesser General Public License as published by
  16 # the Free Software Foundation, either version 3 of the License, or (at
  17 # your option) any later version.
  18
  19 # XML Merge is distributed in the hope that it will be useful, but
  20 # WITHOUT ANY WARRANTY; without even the implied warranty of
  21 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 # GNU Lesser General Public License for more details.
  23
  24 # You should have received a copy of the GNU Lesser General Public License
  25 # along with XML Merge.  If not, see <http://www.gnu.org/licenses/>.
  26
  27
  28 # Developed (i.e. tested) mainly on Win32 using Python 2.6.4 + lxml 2.2.2,
  29 # and to a lesser extent also with Python 2.5.4 + lxml 2.1.1.
  30
  31 """
  32 The purpose of XML Merge is to preprocess any kind of XML file with great
  33 flexibility.
  34
  35 XML Merge performs (among other things) recursive XML file inclusion and
  36 XML element and attribute modification.
  37
  38 XML Merge is a Python module. It is normally invoked as a program from the
  39 command line, but can equally well be used from within another Python
  40 program or module.
  41 """
  42
  43 __version_info__ = (2, 0, 99, "git")
  44 __version__ = ".".join(str(n) for n in __version_info__[:2])
  45
  46 ## IMPORTS AND CONSTANTS
  47
  48 import copy
  49 import itertools
  50 import optparse
  51 import os
  52 import re
  53 import sys
  54 import textwrap
  55
  56 import lxml.etree as ET
  57
  58 # Namespace mapping (can be directly used for lxml nsmap arguments):
  59 xmns = {"xm":   "tag:felixrabe.net,2011:xmlns:xmlmerge:preprocess",
  60         "xmt":  "tag:felixrabe.net,2011:xmlns:xmlmerge:inctrace"}
  61
  62
  63 ## COMMAND LINE OPTION PARSING
  64
  65 class OptionParser(optparse.OptionParser):
  66
  67     def __init__(self, *a, **kw):
  68         optparse.OptionParser.__init__(self, *a, **kw)
  69         self.add_option("-i", "--input",
  70                         help=("(REQUIRED) input XML file"))
  71         self.add_option("-o", "--output",
  72                         help=("output XML file (.out.xml if not given)"))
  73         self.add_option("-s", "--xml-schema",
  74                         help=("XML Schema (.xsd) to validate output " +
  75                               "against"))
  76         self.add_option("-r", "--reference",
  77                         help=("reference XML file to compare output " +
  78                               "against"))
  79         self.add_option("-d", "--html-diff", action="store_true",
  80                         help=("only with -r; if output and reference " +
  81                               "differ, produce a HTML file showing the " +
  82                               "differences"))
  83         self.add_option("-t", "--trace-includes", action="store_true",
  84                         help=("add tracing information to included " +
  85                               "XML fragments"))
  86         self.add_option("-v", "--verbose", action="store_const",
  87                         dest="verbose", const=3,
  88                         help=("show debugging messages"))
  89         self.add_option("-q", "--quiet", action="store_const",
  90                         dest="verbose", const=1,
  91                         help=("only show error messages"))
  92         self.set_defaults(verbose=2)
  93
  94         # Explanation: levels of verbosity
  95         # --quiet   -> self.verbose == 1  # only show error messages
  96         #           -> self.verbose == 2  # no verbosity option given
  97         # --verbose -> self.verbose == 3  # show debugging messages
  98
  99     def error(self, *a, **kw):
 100         self.print_help()
 101         return optparse.OptionParser.error(self, *a, **kw)
 102
 103
 104 def parse_command_line(argv):
 105     """
 106     parse_command_line(argv) -> optparse.Values
 107
 108     Parse argv and return an optparse.Values object containing the options.
 109
 110     This function performs all the necessary checks and conversions to make
 111     sure all necessary options are given.  The resulting options are provided in
 112     a normalized format.
 113
 114     It also tries to create the containing directory for the output file if
 115     it does not exist already.
 116     """
 117     # Parse options using OptionParser:
 118     option_parser = OptionParser()
 119     options, args = option_parser.parse_args(argv[1:])
 120
 121     # Make sure only options, and no other arguments, are passed on the
 122     # command line:
 123     try:
 124         assert args == []
 125         assert options.input is not None
 126     except:
 127         option_parser.error("Error: invalid argument list")
 128
 129     # If the output option has been omitted, build the output filename from
 130     # the input filename, resulting in the file extension ".out.xml":
 131     if options.output is None:
 132         if options.input.lower().endswith(".xml"):
 133             options.output = options.input[:-4] + ".out.xml"
 134         else:
 135             options.output = options.input      + ".out.xml"
 136
 137     # Convert all filename options to normalized absolutized pathnames:
 138     for n in "input output reference".split():
 139         if getattr(options, n) is None: continue  # if "-r" was not given
 140         setattr(options, n, os.path.abspath(getattr(options, n)))
 141
 142     # When --verbose, print all filename options:
 143     if options.verbose >= 3:
 144         print "Input:     %s" % options.input
 145         print "Output:    %s" % options.output
 146         print "Reference: %s" % options.reference
 147
 148     # Make sure there is a directory where the output XML file should go:
 149     try:
 150         os.makedirs(os.path.dirname(options.output))
 151     except:
 152         pass  # fail later if there still is no output directory now
 153
 154     return options
 155
 156
 157 ## XML PROCESSING AND COMPARISON
 158
 159 def read_input_file(input_filename):
 160     """
 161     read_input_file(input_filename) -> ET._Element
 162
 163     Read the input file, and return the corresponding XML Element object,
 164     the element tree root.
 165     """
 166     input_xml = ET.parse(input_filename).getroot()
 167     return input_xml
 168
 169 def postprocess_xml(output_xml):
 170     """
 171     postprocess_xml(output_xml) -> ET._Element
 172
 173     Remove unnecessary namespace declarations and whitespace. Returns a
 174     modified copy of output_xml. The argument may be modified by calling
 175     this function.
 176     """
 177     # Remove unused namespace declarations:
 178     # (http://codespeak.net/pipermail/lxml-dev/2009-September/004888.html)
 179     ns_root = ET.Element("NS_ROOT", nsmap=xmns)
 180     ns_root.append(output_xml)
 181     ns_root.remove(output_xml)
 182     # If you don't perform this copy, each output_xml element's
 183     # getroottree() will report the temporary tree containing the empty
 184     # NS_ROOT element. This is not a hack, this is about how lxml works.
 185     output_xml = ET.ElementTree(copy.copy(output_xml)).getroot()
 186
 187     # Make pretty-printing work by removing unnecessary whitespace:
 188     for el in output_xml.iter():
 189         if el.text and not el.text.strip():
 190             el.text = None
 191         if el.tail and not el.tail.strip():
 192             el.tail = None
 193
 194     return output_xml
 195
 196 def write_output_file(output_xml, output_filename):
 197     """
 198     Write the output XML Element to the specified output filename.
 199     """
 200     output_xmltree = output_xml.getroottree()
 201     output_xmltree.write(output_filename, pretty_print=True,
 202                          xml_declaration=True, encoding="utf-8")
 203
 204 def read_xml_schema_file(xml_schema_filename):
 205     """
 206     read_xml_schema_file(xml_schema_filename) -> ET.XMLSchema
 207
 208     Read the XML Schema file, and return the corresponding XML Schema
 209     object.
 210     """
 211     xml_schema_xmltree = ET.parse(xml_schema_filename)
 212     xml_schema = ET.XMLSchema(xml_schema_xmltree)
 213     return xml_schema
 214
 215 def match_against_schema(options, output_xml):
 216     """
 217     match_against_schema(options, output_xml) -> bool
 218
 219     Validate output against XML Schema (file options.xml_schema).
 220
 221     The result is True if the output XML Element (tree) matches the XML
 222     Schema, otherwise the result is False.
 223     """
 224     xml_schema = read_xml_schema_file(options.xml_schema)
 225     is_valid = xml_schema.validate(output_xml.getroottree())
 226     if options.verbose >= 2:
 227         if is_valid:
 228             print "Output matches XML Schema."
 229         else:
 230             print "Output invalid according to XML Schema."
 231             print xml_schema.error_log.last_error
 232     return is_valid
 233
 234 def match_against_reference(options, output_xml):
 235     """
 236     match_against_reference(options, output_xml) -> bool
 237
 238     Compare the output string (read from file options.output) to the
 239     reference string (read from options.reference). If they are not the
 240     same (bytewise), and if options.html_diff is True, create an HTML file
 241     showing the differences.
 242
 243     The result is True if output and reference are the same (bytewise),
 244     otherwise the result is False.
 245     """
 246     reference_filename = options.reference
 247     output_filename = options.output
 248     do_html_diff = options.html_diff
 249
 250     reference_str = file(reference_filename, "rb").read()
 251     output_str = file(output_filename, "rb").read()
 252     is_valid = (reference_str == output_str)
 253     if options.verbose >= 2:
 254         if is_valid:
 255             print "Output matches reference."
 256         elif not do_html_diff:
 257             print "Output and reference differ."
 258     if do_html_diff and not is_valid:
 259         html_filename = "%s.diff.html" % output_filename
 260         if options.verbose >= 2:
 261             print ("Output and reference differ - " +
 262                    "generating '%s'..." % html_filename)
 263         create_reference_diff_html(html_filename, reference_str,
 264                                    output_str)
 265     return is_valid
 266
 267 def create_reference_diff_html(html_filename, reference_str, output_str):
 268     """
 269     Create an HTML file (created at html_filename) showing the differrences
 270     between the reference string and the output string side-by-side.
 271     """
 272     reference_lines = reference_str.splitlines()
 273     output_lines    = output_str   .splitlines()
 274
 275     import difflib
 276     html_diff = difflib.HtmlDiff(wrapcolumn=75)
 277     html_str = html_diff.make_file(reference_lines, output_lines,
 278                                    "Reference",     "Output")
 279     file(html_filename, "w").write(html_str)
 280
 281
 282 ## VARIOUS FUNCTIONS
 283
 284 def print_xml_error(xml_element, code=None):
 285     print >>sys.stderr, "*** XML ERROR ***"
 286     tree = xml_element.getroottree()
 287     print >>sys.stderr, "File URL:", tree.docinfo.URL
 288     xpath = tree.getpath(xml_element)
 289     print >>sys.stderr, "Line:", xml_element.sourceline, " XPath:", xpath
 290     if code is not None:
 291         print >>sys.stderr, "Offending Python code / expression:"
 292         print >>sys.stderr, "    %s" % code.replace("\n", "\n    ")
 293
 294
 295 _brace_substitution_regex = re.compile(r"\{(.*?)\}")
 296
 297 def brace_substitution(string, xml_element=None, namespace=None):
 298     """
 299     Evaluate Python expressions within strings.
 300
 301     This internal method substitutes Python expressions embedded in strings for
 302     their evaluated (string) values, like {x} -> str(eval(x)).  Example:
 303
 304     >>> self._eval_substitution("3 + 5 = {3 + 5} in Python")
 305     '3 + 5 = 8 in Python'
 306
 307     Multiple Python expressions in one string are supported as well.  Nested
 308     Python expressions are not supported.
 309     """
 310     if namespace is None: namespace = {}
 311     new_str = []  # faster than continuously concatenating strings
 312     last_index = 0
 313     for match in _brace_substitution_regex.finditer(string):
 314         new_str.append(string[last_index:match.start()])
 315         expression = match.group(1)
 316         try:
 317             result = str(eval(expression, namespace))
 318         except:
 319             if xml_element is not None:
 320                 print_xml_error(xml_element, code=expression)
 321                 print >>sys.stderr
 322             raise
 323         new_str.append(result)
 324         last_index = match.end()
 325     new_str.append(string[last_index:])
 326     return "".join(new_str)
 327
 328
 329 ## XML PREPROCESS CLASS
 330
 331 class XMLPreprocess(object):
 332     """
 333     Use:
 334
 335     >>> proc = XMLPreprocess()
 336     >>> output_xml = proc(options, input_xml)  # input_xml may change
 337     """
 338
 339     def __init__(self, initial_namespace={}):
 340         super(XMLPreprocess, self).__init__()
 341         self._namespace_stack = [initial_namespace]
 342
 343     def __call__(self, xml_element, namespace=None,
 344                  trace_includes=False, xml_filename=None):
 345         """
 346         XMLPreprocess()(...)
 347
 348         Preprocess the input XML Element, xml_element. The element tree of
 349         xml_element will be modified in-place.
 350
 351         The namespace given should be a dict that can be used as a Python
 352         namespace. This namespace will be used in XML attribute
 353         substitution.
 354
 355         If trace_includes is True, the output will contain tags that
 356         surround included sections of the file. The xml_filename argument
 357         is then required.
 358
 359         Processing tags will recursively call this method (__call__) for
 360         preprocessing the included file and for recursive inclusion.
 361         """
 362         if namespace is not None:
 363             self._namespace_stack.append(namespace)
 364         self.namespace = self._namespace_stack[-1]
 365         self.trace_includes = trace_includes
 366         self.xml_filename = xml_filename
 367
 368         ns = "{%s}" % xmns["xm"]
 369         len_ns = len(ns)
 370
 371         # Evaluate Python expressions in the attributes of xml_element:
 372         for attr_name, attr_value in xml_element.items():  # attr map
 373             v = brace_substitution(attr_value, xml_element, self.namespace)
 374             xml_element.set(attr_name, v)
 375
 376         # If xml_element has xmns["xm"] as its namespace, proceed with the
 377         # appropriate method of this class:
 378         if xml_element.nsmap.get(xml_element.prefix) == xmns["xm"]:
 379             tag = xml_element.tag[len_ns:]  # just the tag without namespc
 380             method = "_xm_" + tag.lower()  # tolerate any case
 381             if not hasattr(self, method):
 382                 raise Exception, "cannot process <xm:%s/>" % tag
 383             getattr(self, method)(xml_element)  # call the method
 384             # Preserve tail text:
 385             tail = xml_element.tail
 386             if tail is not None:
 387                 prev = xml_element.getprevious()
 388                 parent = xml_element.getparent()
 389                 if prev is not None:
 390                     prev.tail = (prev.tail or "") + tail
 391                 else:
 392                     parent.text = (parent.text or "") + tail
 393             xml_element.getparent().remove(xml_element)
 394
 395         # If not, recurse:
 396         else:
 397             self._recurse_into(xml_element)
 398
 399         return None
 400
 401     def _recurse_into(self, xml_element, namespace=None):
 402         if namespace is not None:
 403             self._namespace_stack.append(namespace)
 404         for xml_sub_element in xml_element.xpath("*"):
 405             self(xml_sub_element, None,
 406                  self.trace_includes, self.xml_filename)
 407         if namespace is not None:
 408             self._namespace_stack.pop()
 409             self.namespace = self._namespace_stack[-1]
 410
 411     def _xm_addelements(self, xml_element):
 412         """
 413         Add subelements to, before, or after the element selected by XPath
 414         (@to, @before or @after).
 415
 416         Exactly one of (@to, @before, @after) must be specified.  And the
 417         XPath expression must return exactly one element.  These conditions
 418         are checked by assertions and will raise an exception if not met.
 419         """
 420         to     = xml_element.get("to")
 421         before = xml_element.get("before")
 422         after  = xml_element.get("after")
 423
 424         assert sum((to is None, before is None, after is None)) == 2
 425         select = to or before or after
 426
 427         selected_context_nodes = xml_element.xpath(select)
 428         assert len(selected_context_nodes) == 1
 429
 430         context_node = selected_context_nodes[0]
 431         replace_context_node = False
 432
 433         if to is not None:
 434             f = "append"
 435         if before is not None:
 436             f = "addprevious"
 437         if after is not None:
 438             f = "addnext"
 439             replace_context_node = True
 440
 441         for xml_sub_element in xml_element:
 442             getattr(context_node, f)(xml_sub_element)
 443             if replace_context_node:
 444                 context_node = xml_sub_element
 445
 446     def _xm_block(self, xml_element):
 447         """
 448         Create a scope to contain visibility of newly assigned Python
 449         variables.  This works the same way that Python itself scopes
 450         variables, i.e. by creating a shallow copy of the Python namespace.
 451         E.g. assignments to list items will be visible to outside scopes!
 452         """
 453         self._recurse_into(xml_element, self.namespace.copy())
 454         for xml_sub_node in xml_element[::-1]:  # get children reversed
 455             xml_element.addnext(xml_sub_node)
 456
 457     def _xm_comment(self, xml_element):
 458         """
 459         A comment that is removed by XML Merge.
 460         """
 461         pass  # that's it
 462
 463     def _xm_defaultvar(self, xml_element):
 464         """
 465         Set (zero or more) variables in the active Python namespace, if not
 466         already set.
 467         """
 468         ns = self.namespace
 469         for attr_name, attr_value in xml_element.items():  # attr map
 470             if not attr_name in ns:
 471                 try:
 472                     ns[attr_name] = eval(attr_value, ns)
 473                 except:
 474                     print_xml_error(xml_element, code=attr_value)
 475                     print >>sys.stderr
 476                     raise
 477
 478     def _xm_include(self, xml_element):
 479         """
 480         Include from the specified file (@file) the elements selected by
 481         XPath (@select) after preprocessing said file.
 482
 483         The @file attribute is the only required attribute.
 484
 485         Items can be imported from the included (and preprocessed) file's
 486         Python namespace into the current file's namespace using the
 487         @import attribute, which may either be a comma-separated list of
 488         identifiers, or '*' to import the complete namespace.
 489
 490         Remaining attributes will be treated as variable assignments and
 491         put in the Python namespace used for processing the included file.
 492         """
 493         attrib = xml_element.attrib
 494         file_   = attrib.pop("file", None)
 495         select  = attrib.pop("select", None)
 496         import_ = attrib.pop("import", None)
 497         assert file_ is not None
 498         remaining_attribs = dict(attrib.items())
 499
 500         # Load the to-be-included file:
 501         p = os.path
 502
 503         xml_input_dirname = p.dirname(self.xml_filename)
 504         xml_incl_filename = p.join(xml_input_dirname, file_)
 505         xml_incl_filename = p.normpath(xml_incl_filename)
 506         # Always use '/' for normalized tracing information:
 507         xml_incl_filename = xml_incl_filename.replace("\\", "/")
 508
 509         xml_incl = ET.parse(xml_incl_filename).getroot()
 510
 511         # Build the initial namespace from a copy of the current namespace
 512         # plus the remaining attributes of the <xm:Include/> element:
 513         current_ns = self.namespace
 514         initial_namespace = current_ns.copy()
 515         for attr_name, attr_value in remaining_attribs.items():  # attr map
 516             try:
 517                 initial_namespace[attr_name] = eval(attr_value, current_ns)
 518             except:
 519                 print_xml_error(xml_element, code=attr_value)
 520                 print >>sys.stderr
 521                 raise
 522
 523         # Preprocess the to-be-included file:
 524         proc = XMLPreprocess(initial_namespace=initial_namespace)
 525         proc(xml_incl, trace_includes=self.trace_includes,
 526              xml_filename=xml_incl_filename)
 527
 528         # Select elements to include:
 529         included_elements = []
 530         if select is not None:
 531             included_elements = xml_incl.xpath(select)
 532
 533         # Include the elements:
 534         context_node = xml_element
 535         for inc_elem in included_elements:
 536             context_node.addnext(inc_elem)
 537             context_node = inc_elem
 538
 539         # Import from included namespace:
 540         imported_namespace = {}
 541         if import_ is not None:
 542             import_ = [x.strip() for x in import_.split(",")]
 543             if "*" in import_:  # import all
 544                 imported_namespace = proc.namespace
 545             else:
 546                 ns = proc.namespace
 547                 imported_namespace = dict((x, ns[x]) for x in import_)
 548         self.namespace.update(imported_namespace)
 549
 550     def _xm_loop(self, xml_element):
 551         """
 552         Loop over a range of integer values.
 553
 554         The first attribute is evaluated as the loop counter.  Example:
 555
 556             i="range(5, 9)"  =>  iterates with i being 5, 6, 7, 8
 557
 558         WARNING: The loop counter attribute, as well as all substitutions
 559         in subelement attributes (XPath ".//@*": "...{foo_bar}...") will
 560         (wholly or partially) be evaluated as Python expressions using
 561         eval().
 562         """
 563         # Get the loop counter name and list:
 564         loop_counter_name = xml_element.keys()[0]
 565         loop_counter_expr = xml_element.get(loop_counter_name)
 566         try:
 567             loop_counter_list = eval(loop_counter_expr, self.namespace)
 568         except:
 569             print_xml_error(xml_element, code=loop_counter_expr)
 570             print >>sys.stderr
 571             raise
 572
 573         # Loop:
 574         context_node = xml_element  # for new elements
 575         for loop_counter_value in loop_counter_list:
 576             self.namespace[loop_counter_name] = loop_counter_value
 577             tailtext = xml_element.tail
 578             xml_element.tail = None  # xml_element regarded as document
 579             # xml_element_copy = copy.copy(xml_element)  # CRASH
 580             # The following line is the workaround for the preceeding one:
 581             xml_element_copy = ET.XML(ET.tostring(xml_element))
 582             xml_element.addnext(xml_element_copy)  # temporarily
 583             xml_element.tail = xml_element_copy.tail = tailtext
 584             self._recurse_into(xml_element_copy)
 585             xml_element_copy.getparent().remove(xml_element_copy)
 586             if xml_element_copy.text is not None:
 587                 if context_node.tail is None:
 588                     context_node.tail = u""
 589                 context_node.tail += xml_element_copy.text
 590             for xml_sub_node in xml_element_copy[:]:
 591                 context_node.addnext(xml_sub_node)
 592                 context_node = xml_sub_node
 593
 594     def _xm_pythoncode(self, xml_element):
 595         """
 596         Execute Python code in the current namespace.
 597
 598         'self' and 'xml_element' are supplied temporarily. They are added
 599         to the current namespace before the 'exec' statement, and removed
 600         again afterwards.
 601         """
 602         code = textwrap.dedent(xml_element.text).strip()
 603         self.namespace["self"] = self
 604         self.namespace["xml_element"] = xml_element
 605         try:
 606             exec code in self.namespace
 607         except:
 608             print_xml_error(xml_element, code=code)
 609             print >>sys.stderr
 610             raise
 611         del self.namespace["self"], self.namespace["xml_element"]
 612
 613     def _xm_removeattributes(self, xml_element):
 614         """
 615         Remove the attributes (@name) from the (zero or more) elements
 616         selected by XPath (@from or @select).
 617
 618         It is not considered an error if an attribute cannot be found on a
 619         selected element.
 620         """
 621         attr_name = xml_element.get("name")
 622         select_xpath = xml_element.get("from") or xml_element.get("select")
 623         for xml_element_selected in xml_element.xpath(select_xpath):
 624             # Can't find another way to remove an attribute than by using
 625             # 'attrib':
 626             attrib = xml_element_selected.attrib
 627             if attr_name in attrib:
 628                 del xml_element_selected.attrib[attr_name]
 629
 630     def _xm_removeelements(self, xml_element):
 631         """
 632         Remove (zero or more) elements selected by XPath (@select).
 633         """
 634         select = xml_element.get("select")
 635         assert select is not None
 636         elements = xml_element.xpath(select)
 637         for el in elements:
 638             el.getparent().remove(el)
 639
 640     def _xm_setattribute(self, xml_element):
 641         """
 642         Assign the value (@value) to the attribute (@name) of the element
 643         selected by XPath (@of or @select).
 644
 645         Example:
 646             <Object index="0x1234"/>
 647             <xm:SetAttribute of="../Object" name="otherattr" value="hallo"/>
 648
 649         Leads to:
 650             <Object index="0x1234" otherattr="hello"/>
 651         """
 652         select  = xml_element.get("select", xml_element.get("of"))
 653         name    = xml_element.get("name")
 654         value   = xml_element.get("value")
 655         assert sum((select is None, name is None, value is None)) == 0
 656         elements = xml_element.xpath(select)
 657         for el in elements:
 658             el.set(name, value)
 659
 660     def _xm_text(self, xml_element):
 661         """
 662         Perform '{}' substitution on text.
 663         """
 664         text = xml_element.text
 665         if text is None: return
 666         tail = brace_substitution(text, xml_element, self.namespace)
 667         tail += xml_element.tail or ""
 668         xml_element.tail = tail
 669
 670     def _xm_var(self, xml_element):
 671         """
 672         Set (zero or more) variables in the active Python namespace.
 673         """
 674         ns = self.namespace
 675         for attr_name, attr_value in xml_element.items():  # attr map
 676             try:
 677                 ns[attr_name] = eval(attr_value, ns)
 678             except:
 679                 print_xml_error(xml_element, code=attr_value)
 680                 print >>sys.stderr
 681                 raise
 682
 683
 684 ## MAIN FUNCTION
 685
 686 def main(argv, **kargs):
 687     """
 688     main(argv, **kargs) -> int
 689
 690     Process the input file to produce an output file according to the command
 691     line options, given in argv.  These keyword arguments (**kargs) are
 692     recognized:
 693
 694     initial_namespace
 695       Gets passed on as the initial Python namespace to XMLPreprocess().
 696
 697     After the XML Merge Manual, the code of this function is the first part of
 698     XML Merge any new developer should read.  So keep this code as simple as
 699     possible if you change it in any way.
 700
 701     These are all possible exit status codes returned or raised (using
 702     SystemExit) by main or the functions it calls:
 703         - On success, and if all requested validations (-s, -r) match:
 704             return 0
 705         - On error, e.g. wrong options (see parse_command_line()):
 706             return 1
 707         - On mismatch (either XML Schema (-s) or reference (-r)):
 708             return mismatch_bitmap  # see end of main()
 709         - To aid understanding the bitmap: If N matching functions are
 710           provided, and all are requested and all fail to match the output
 711           file:
 712             return (2 ** N - 1) * 2  # mismatch_bitmap
 713     """
 714     # Parse command line to get options:
 715     options = parse_command_line(argv)
 716
 717     # Input file => preprocessing => output file:
 718     xml = read_input_file(options.input)
 719     proc = XMLPreprocess(**kargs)
 720     proc(xml, trace_includes=options.trace_includes,
 721          xml_filename=options.input)
 722     xml = postprocess_xml(xml)
 723     write_output_file(xml, options.output)
 724
 725     # If -s: Compare output to XML Schema file:
 726     matches_schema = True  # False means: match requested and negative
 727     if options.xml_schema is not None:
 728         matches_schema = match_against_schema(options, xml)
 729
 730     # If -r: Compare output to reference:
 731     matches_reference = True  # False means: match requested and negative
 732     if options.reference is not None:
 733         matches_reference = match_against_reference(options, xml)
 734
 735     # Calculate and return the mismatch bitmap:
 736     mismatch_bitmap = 0
 737     mismatch_bitmap |= int(not matches_schema)    << 1  # 2 on mismatch
 738     mismatch_bitmap |= int(not matches_reference) << 2  # 4 on mismatch
 739     return mismatch_bitmap
 740
 741
 742 if __name__ == "__main__":
 743     sys.exit(main(sys.argv))