build/generator/ezt.py

   1 #!/usr/bin/env python
   2 """ezt.py -- easy templating
   3
   4 ezt templates are very similar to standard HTML files.  But additionally
   5 they contain directives sprinkled in between.  With these directives
   6 it is possible to generate the dynamic content from the ezt templates.
   7
   8 These directives are enclosed in square brackets.  If you are a
   9 C-programmer, you might be familar with the #ifdef directives of the
  10 C preprocessor 'cpp'.  ezt provides a similar concept for HTML.  Additionally
  11 EZT has a 'for' directive, which allows to iterate (repeat) certain
  12 subsections of the template according to sequence of data items
  13 provided by the application.
  14
  15 The HTML rendering is performed by the method generate() of the Template
  16 class.  Building template instances can either be done using external
  17 EZT files (convention: use the suffix .ezt for such files):
  18
  19     >>> template = Template("../templates/log.ezt")
  20
  21 or by calling the parse() method of a template instance directly with
  22 a EZT template string:
  23
  24     >>> template = Template()
  25     >>> template.parse('''<html><head>
  26     ... <title>[title_string]</title></head>
  27     ... <body><h1>[title_string]</h1>
  28     ...    [for a_sequence] <p>[a_sequence]</p>
  29     ...    [end] <hr>
  30     ...    The [person] is [if-any state]in[else]out[end].
  31     ... </body>
  32     ... </html>
  33     ... ''')
  34
  35 The application should build a dictionary 'data' and pass it together
  36 with the output fileobject to the templates generate method:
  37
  38     >>> data = {'title_string' : "A Dummy Page",
  39     ...         'a_sequence' : ['list item 1', 'list item 2', 'another element'],
  40     ...         'person': "doctor",
  41     ...         'state' : None }
  42     >>> import sys
  43     >>> template.generate(sys.stdout, data)
  44     <html><head>
  45     <title>A Dummy Page</title></head>
  46     <body><h1>A Dummy Page</h1>
  47      <p>list item 1</p>
  48      <p>list item 2</p>
  49      <p>another element</p>
  50      <hr>
  51     The doctor is out.
  52     </body>
  53     </html>
  54
  55 Template syntax error reporting should be improved.  Currently it is
  56 very sparse (template line numbers would be nice):
  57
  58     >>> Template().parse("[if-any where] foo [else] bar [end unexpected args]")
  59     Traceback (innermost last):
  60       File "<stdin>", line 1, in ?
  61       File "ezt.py", line 220, in parse
  62         self.program = self._parse(text)
  63       File "ezt.py", line 275, in _parse
  64         raise ArgCountSyntaxError(str(args[1:]))
  65     ArgCountSyntaxError: ['unexpected', 'args']
  66     >>> Template().parse("[if unmatched_end]foo[end]")
  67     Traceback (innermost last):
  68       File "<stdin>", line 1, in ?
  69       File "ezt.py", line 206, in parse
  70         self.program = self._parse(text)
  71       File "ezt.py", line 266, in _parse
  72         raise UnmatchedEndError()
  73     UnmatchedEndError
  74
  75
  76 Directives
  77 ==========
  78
  79  Several directives allow the use of dotted qualified names refering to objects
  80  or attributes of objects contained in the data dictionary given to the
  81  .generate() method.
  82
  83  Simple directives
  84  -----------------
  85
  86    [QUAL_NAME]
  87
  88    This directive is simply replaced by the value of identifier from the data
  89    dictionary.  QUAL_NAME might be a dotted qualified name refering to some
  90    instance attribute of objects contained in the dats dictionary.
  91    Numbers are converted to string though.
  92
  93    [include "filename"]  or [include QUAL_NAME]
  94
  95    This directive is replaced by content of the named include file.
  96
  97  Block directives
  98  ----------------
  99
 100    [for QUAL_NAME] ... [end]
 101
 102    The text within the [for ...] directive and the corresponding [end]
 103    is repeated for each element in the sequence referred to by the qualified
 104    name in the for directive.  Within the for block this identifiers now
 105    refers to the actual item indexed by this loop iteration.
 106
 107    [if-any QUAL_NAME [QUAL_NAME2 ...]] ... [else] ... [end]
 108
 109    Test if any QUAL_NAME value is not None or an empty string or list.
 110    The [else] clause is optional.  CAUTION: Numeric values are converted to
 111    string, so if QUAL_NAME refers to a numeric value 0, the then-clause is
 112    substituted!
 113
 114    [if-index INDEX_FROM_FOR odd] ... [else] ... [end]
 115    [if-index INDEX_FROM_FOR even] ... [else] ... [end]
 116    [if-index INDEX_FROM_FOR first] ... [else] ... [end]
 117    [if-index INDEX_FROM_FOR last] ... [else] ... [end]
 118    [if-index INDEX_FROM_FOR NUMBER] ... [else] ... [end]
 119
 120    These five directives work similar to [if-any], but are only useful
 121    within a [for ...]-block (see above).  The odd/even directives are
 122    for example useful to choose different background colors for adjacent rows
 123    in a table.  Similar the first/last directives might be used to
 124    remove certain parts (for example "Diff to previous" doesn't make sense,
 125    if there is no previous).
 126
 127    [is QUAL_NAME STRING] ... [else] ... [end]
 128    [is QUAL_NAME QUAL_NAME] ... [else] ... [end]
 129
 130    The [is ...] directive is similar to the other conditional directives
 131    above.  But it allows to compare two value references or a value reference
 132    with some constant string.
 133
 134 """
 135 #
 136 # Copyright (C) 2001-2002 Greg Stein. All Rights Reserved.
 137 #
 138 # Redistribution and use in source and binary forms, with or without
 139 # modification, are permitted provided that the following conditions are
 140 # met:
 141 #
 142 # * Redistributions of source code must retain the above copyright
 143 #   notice, this list of conditions and the following disclaimer.
 144 #
 145 # * Redistributions in binary form must reproduce the above copyright
 146 #   notice, this list of conditions and the following disclaimer in the
 147 #   documentation and/or other materials provided with the distribution.
 148 #
 149 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 150 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 151 # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 152 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
 153 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 154 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 155 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 156 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 157 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 158 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 159 # POSSIBILITY OF SUCH DAMAGE.
 160 #
 161 #
 162 # This software is maintained by Greg and is available at:
 163 #    http://viewcvs.sourceforge.net/
 164 # it is also used by the following projects:
 165 #    http://edna.sourceforge.net/
 166 #
 167
 168 import string
 169 import re
 170 from types import StringType, IntType, FloatType
 171 import os
 172
 173 #
 174 # This regular expression matches three alternatives:
 175 #   expr: DIRECTIVE | BRACKET | COMMENT
 176 #   DIRECTIVE: '[' ITEM (whitespace ITEM)* ']
 177 #   ITEM: STRING | NAME
 178 #   STRING: '"' (not-slash-or-dquote | '\' anychar)* '"'
 179 #   NAME: (alphanum | '_' | '-' | '.')+
 180 #   BRACKET: '[[]'
 181 #   COMMENT: '[#' not-rbracket* ']'
 182 #
 183 # When used with the split() method, the return value will be composed of
 184 # non-matching text and the two paren groups (DIRECTIVE and BRACKET). Since
 185 # the COMMENT matches are not placed into a group, they are considered a
 186 # "splitting" value and simply dropped.
 187 #
 188 _item = r'(?:"(?:[^\\"]|\\.)*"|[-\w.]+)'
 189 _re_parse = re.compile(r'\[(%s(?: +%s)*)\]|(\[\[\])|\[#[^\]]*\]' % (_item, _item))
 190
 191 _re_args = re.compile(r'"(?:[^\\"]|\\.)*"|[-\w.]+')
 192
 193 # block commands and their argument counts
 194 _block_cmd_specs = { 'if-index':2, 'for':1, 'is':2 }
 195 _block_cmds = _block_cmd_specs.keys()
 196
 197 # two regular expressions for compressing whitespace. the first is used to
 198 # compress any whitespace including a newline into a single newline. the
 199 # second regex is used to compress runs of whitespace into a single space.
 200 _re_newline = re.compile('[ \t\r\f\v]*\n\\s*')
 201 _re_whitespace = re.compile(r'\s\s+')
 202
 203 # this regex is used to substitute arguments into a value. we split the value,
 204 # replace the relevant pieces, and then put it all back together. splitting
 205 # will produce a list of: TEXT ( splitter TEXT )*. splitter will be '%' or
 206 # an integer.
 207 _re_subst = re.compile('%(%|[0-9]+)')
 208
 209 class Template:
 210
 211   def __init__(self, fname=None, compress_whitespace=1):
 212     self.compress_whitespace = compress_whitespace
 213     if fname:
 214       self.parse_file(fname)
 215
 216   def parse_file(self, fname):
 217     "fname -> a string object with pathname of file containg an EZT template."
 218
 219     self.program = self._parse(_FileReader(fname))
 220
 221   def parse(self, text_or_reader):
 222     """Parse the template specified by text_or_reader.
 223
 224     The argument should be a string containing the template, or it should
 225     specify a subclass of ezt.Reader which can read templates.
 226     """
 227     if not isinstance(text_or_reader, Reader):
 228       # assume the argument is a plain text string
 229       text_or_reader = _TextReader(text_or_reader)
 230     self.program = self._parse(text_or_reader)
 231
 232   def generate(self, fp, data):
 233     ctx = _context()
 234     ctx.data = data
 235     ctx.for_index = { }
 236     self._execute(self.program, fp, ctx)
 237
 238   def _parse(self, reader, for_names=None, file_args=()):
 239     """text -> string object containing the HTML template.
 240
 241     This is a private helper function doing the real work for method parse.
 242     It returns the parsed template as a 'program'.  This program is a sequence
 243     made out of strings or (function, argument) 2-tuples.
 244
 245     Note: comment directives [# ...] are automatically dropped by _re_parse.
 246     """
 247
 248     # parse the template program into: (TEXT DIRECTIVE BRACKET)* TEXT
 249     parts = _re_parse.split(reader.text)
 250
 251     program = [ ]
 252     stack = [ ]
 253     if not for_names:
 254        for_names = [ ]
 255
 256     for i in range(len(parts)):
 257       piece = parts[i]
 258       which = i % 3  # discriminate between: TEXT DIRECTIVE BRACKET
 259       if which == 0:
 260         # TEXT. append if non-empty.
 261         if piece:
 262           if self.compress_whitespace:
 263             piece = _re_whitespace.sub(' ', _re_newline.sub('\n', piece))
 264           program.append(piece)
 265       elif which == 2:
 266         # BRACKET directive. append '[' if present.
 267         if piece:
 268           program.append('[')
 269       elif piece:
 270         # DIRECTIVE is present.
 271         args = _re_args.findall(piece)
 272         cmd = args[0]
 273         if cmd == 'else':
 274           if len(args) > 1:
 275             raise ArgCountSyntaxError(str(args[1:]))
 276           ### check: don't allow for 'for' cmd
 277           idx = stack[-1][1]
 278           true_section = program[idx:]
 279           del program[idx:]
 280           stack[-1][3] = true_section
 281         elif cmd == 'end':
 282           if len(args) > 1:
 283             raise ArgCountSyntaxError(str(args[1:]))
 284           # note: true-section may be None
 285           try:
 286             cmd, idx, args, true_section = stack.pop()
 287           except IndexError:
 288             raise UnmatchedEndError()
 289           else_section = program[idx:]
 290           func = getattr(self, '_cmd_' + re.sub('-', '_', cmd))
 291           program[idx:] = [ (func, (args, true_section, else_section)) ]
 292           if cmd == 'for':
 293             for_names.pop()
 294         elif cmd in _block_cmds:
 295           if len(args) > _block_cmd_specs[cmd] + 1:
 296             raise ArgCountSyntaxError(str(args[1:]))
 297           ### this assumes arg1 is always a ref
 298           args[1] = _prepare_ref(args[1], for_names, file_args)
 299
 300           # handle arg2 for the 'is' command
 301           if cmd == 'is':
 302             args[2] = _prepare_ref(args[2], for_names, file_args)
 303           elif cmd == 'for':
 304             for_names.append(args[1][0])
 305
 306           # remember the cmd, current pos, args, and a section placeholder
 307           stack.append([cmd, len(program), args[1:], None])
 308         elif cmd == 'include':
 309           if args[1][0] == '"':
 310             include_filename = args[1][1:-1]
 311             f_args = [ ]
 312             for arg in args[2:]:
 313               f_args.append(_prepare_ref(arg, for_names, file_args))
 314             program.extend(self._parse(reader.read_other(include_filename),
 315                                        for_names,
 316                                        f_args))
 317           else:
 318             if len(args) != 2:
 319               raise ArgCountSyntaxError(str(args))
 320             program.append((self._cmd_include,
 321                             (_prepare_ref(args[1], for_names, file_args),
 322                              reader)))
 323         elif cmd == 'if-any':
 324           f_args = [ ]
 325           for arg in args[1:]:
 326             f_args.append(_prepare_ref(arg, for_names, file_args))
 327           stack.append(['if-any', len(program), f_args, None])
 328         else:
 329           # implied PRINT command
 330           if len(args) > 1:
 331             f_args = [ ]
 332             for arg in args:
 333               f_args.append(_prepare_ref(arg, for_names, file_args))
 334             program.append((self._cmd_format, (f_args[0], f_args[1:])))
 335           else:
 336             program.append((self._cmd_print,
 337                             _prepare_ref(args[0], for_names, file_args)))
 338
 339     if stack:
 340       ### would be nice to say which blocks...
 341       raise UnclosedBlocksError()
 342     return program
 343
 344   def _execute(self, program, fp, ctx):
 345     """This private helper function takes a 'program' sequence as created
 346     by the method '_parse' and executes it step by step.  strings are written
 347     to the file object 'fp' and functions are called.
 348     """
 349     for step in program:
 350       if isinstance(step, StringType):
 351         fp.write(step)
 352       else:
 353         step[0](step[1], fp, ctx)
 354
 355   def _cmd_print(self, valref, fp, ctx):
 356     value = _get_value(valref, ctx)
 357
 358     # if the value has a 'read' attribute, then it is a stream: copy it
 359     if hasattr(value, 'read'):
 360       while 1:
 361         chunk = value.read(16384)
 362         if not chunk:
 363           break
 364         fp.write(chunk)
 365     else:
 366       fp.write(value)
 367
 368   def _cmd_format(self, (valref, args), fp, ctx):
 369     fmt = _get_value(valref, ctx)
 370     parts = _re_subst.split(fmt)
 371     for i in range(len(parts)):
 372       piece = parts[i]
 373       if i%2 == 1 and piece != '%':
 374         idx = int(piece)
 375         if idx < len(args):
 376           piece = _get_value(args[idx], ctx)
 377         else:
 378           piece = '<undef>'
 379       fp.write(piece)
 380
 381   def _cmd_include(self, (valref, reader), fp, ctx):
 382     fname = _get_value(valref, ctx)
 383     ### note: we don't have the set of for_names to pass into this parse.
 384     ### I don't think there is anything to do but document it.
 385     self._execute(self._parse(reader.read_other(fname)), fp, ctx)
 386
 387   def _cmd_if_any(self, args, fp, ctx):
 388     "If any value is a non-empty string or non-empty list, then T else F."
 389     (valrefs, t_section, f_section) = args
 390     value = 0
 391     for valref in valrefs:
 392       if _get_value(valref, ctx):
 393         value = 1
 394         break
 395     self._do_if(value, t_section, f_section, fp, ctx)
 396
 397   def _cmd_if_index(self, args, fp, ctx):
 398     ((valref, value), t_section, f_section) = args
 399     list, idx = ctx.for_index[valref[0]]
 400     if value == 'even':
 401       value = idx % 2 == 0
 402     elif value == 'odd':
 403       value = idx % 2 == 1
 404     elif value == 'first':
 405       value = idx == 0
 406     elif value == 'last':
 407       value = idx == len(list)-1
 408     else:
 409       value = idx == int(value)
 410     self._do_if(value, t_section, f_section, fp, ctx)
 411
 412   def _cmd_is(self, args, fp, ctx):
 413     ((left_ref, right_ref), t_section, f_section) = args
 414     value = _get_value(right_ref, ctx)
 415     value = string.lower(_get_value(left_ref, ctx)) == string.lower(value)
 416     self._do_if(value, t_section, f_section, fp, ctx)
 417
 418   def _do_if(self, value, t_section, f_section, fp, ctx):
 419     if t_section is None:
 420       t_section = f_section
 421       f_section = None
 422     if value:
 423       section = t_section
 424     else:
 425       section = f_section
 426     if section is not None:
 427       self._execute(section, fp, ctx)
 428
 429   def _cmd_for(self, args, fp, ctx):
 430     ((valref,), unused, section) = args
 431     list = _get_value(valref, ctx)
 432     if isinstance(list, StringType):
 433       raise NeedSequenceError()
 434     refname = valref[0]
 435     ctx.for_index[refname] = idx = [ list, 0 ]
 436     for item in list:
 437       self._execute(section, fp, ctx)
 438       idx[1] = idx[1] + 1
 439     del ctx.for_index[refname]
 440
 441 def boolean(value):
 442   "Return a value suitable for [if-any bool_var] usage in a template."
 443   if value:
 444     return 'yes'
 445   return None
 446
 447
 448 def _prepare_ref(refname, for_names, file_args):
 449   """refname -> a string containing a dotted identifier. example:"foo.bar.bang"
 450   for_names -> a list of active for sequences.
 451
 452   Returns a `value reference', a 3-Tupel made out of (refname, start, rest),
 453   for fast access later.
 454   """
 455   # is the reference a string constant?
 456   if refname[0] == '"':
 457     return None, refname[1:-1], None
 458
 459   # if this is an include-argument, then just return the prepared ref
 460   if refname[:3] == 'arg':
 461     try:
 462       idx = int(refname[3:])
 463     except ValueError:
 464       pass
 465     else:
 466       if idx < len(file_args):
 467         return file_args[idx]
 468
 469   parts = string.split(refname, '.')
 470   start = parts[0]
 471   rest = parts[1:]
 472   while rest and (start in for_names):
 473     # check if the next part is also a "for name"
 474     name = start + '.' + rest[0]
 475     if name in for_names:
 476       start = name
 477       del rest[0]
 478     else:
 479       break
 480   return refname, start, rest
 481
 482 def _get_value((refname, start, rest), ctx):
 483   """(refname, start, rest) -> a prepared `value reference' (see above).
 484   ctx -> an execution context instance.
 485
 486   Does a name space lookup within the template name space.  Active
 487   for blocks take precedence over data dictionary members with the
 488   same name.
 489   """
 490   if rest is None:
 491     # it was a string constant
 492     return start
 493   if ctx.for_index.has_key(start):
 494     list, idx = ctx.for_index[start]
 495     ob = list[idx]
 496   elif ctx.data.has_key(start):
 497     ob = ctx.data[start]
 498   else:
 499     raise UnknownReference(refname)
 500
 501   # walk the rest of the dotted reference
 502   for attr in rest:
 503     try:
 504       ob = getattr(ob, attr)
 505     except AttributeError:
 506       raise UnknownReference(refname)
 507
 508   # make sure we return a string instead of some various Python types
 509   if isinstance(ob, IntType) or isinstance(ob, FloatType):
 510     return str(ob)
 511   if ob is None:
 512     return ''
 513
 514   # string or a sequence
 515   return ob
 516
 517
 518 class _context:
 519   """A container for the execution context"""
 520
 521
 522 class Reader:
 523   "Abstract class which allows EZT to detect Reader objects."
 524
 525 class _FileReader(Reader):
 526   """Reads templates from the filesystem."""
 527   def __init__(self, fname):
 528     self.text = open(fname, 'rb').read()
 529     self._dir = os.path.dirname(fname)
 530   def read_other(self, relative):
 531     return _FileReader(os.path.join(self._dir, relative))
 532
 533 class _TextReader(Reader):
 534   """'Reads' a template from provided text."""
 535   def __init__(self, text):
 536     self.text = text
 537   def read_other(self, relative):
 538     raise BaseUnavailableError()
 539
 540
 541 class EZTException(Exception):
 542   """Parent class of all EZT exceptions."""
 543
 544 class ArgCountSyntaxError(EZTException):
 545   """A bracket directive got the wrong number of arguments."""
 546
 547 class UnknownReference(EZTException):
 548   """The template references an object not contained in the data dictionary."""
 549
 550 class NeedSequenceError(EZTException):
 551   """The object dereferenced by the template is no sequence (tuple or list)."""
 552
 553 class UnclosedBlocksError(EZTException):
 554   """This error may be simply a missing [end]."""
 555
 556 class UnmatchedEndError(EZTException):
 557   """This error may be caused by a misspelled if directive."""
 558
 559 class BaseUnavailableError(EZTException):
 560   """Base location is unavailable, which disables includes."""
 561
 562
 563 # --- standard test environment ---
 564 def test_parse():
 565   assert _re_parse.split('[a]') == ['', '[a]', None, '']
 566   assert _re_parse.split('[a] [b]') == \
 567          ['', '[a]', None, ' ', '[b]', None, '']
 568   assert _re_parse.split('[a c] [b]') == \
 569          ['', '[a c]', None, ' ', '[b]', None, '']
 570   assert _re_parse.split('x [a] y [b] z') == \
 571          ['x ', '[a]', None, ' y ', '[b]', None, ' z']
 572   assert _re_parse.split('[a "b" c "d"]') == \
 573          ['', '[a "b" c "d"]', None, '']
 574   assert _re_parse.split(r'["a \"b[foo]" c.d f]') == \
 575          ['', '["a \\"b[foo]" c.d f]', None, '']
 576
 577 def _test(argv):
 578   import doctest, ezt
 579   verbose = "-v" in argv
 580   return doctest.testmod(ezt, verbose=verbose)
 581
 582 if __name__ == "__main__":
 583   # invoke unit test for this module:
 584   import sys
 585   sys.exit(_test(sys.argv)[0])