pylit.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf8 -*-
   3 from __future__ import print_function
   4
   5 # pylit.py
   6 # ********
   7 # Literate programming with reStructuredText
   8 # ++++++++++++++++++++++++++++++++++++++++++
   9 #
  10 # :Copyright: © 2005, 2007, 2015, 2021 Günter Milde.
  11 #             Released without warranty under the terms of the
  12 #             GNU General Public License (v. 2 or later)
  13 #
  14 # ::
  15
  16 """pylit: bidirectional text <-> code converter
  17
  18 Covert between a *text source* with embedded computer code and a *code source*
  19 with embedded documentation.
  20 """
  21
  22 # .. contents::
  23 #
  24 # Frontmatter
  25 # ===========
  26 #
  27 # Changelog
  28 # ---------
  29 #
  30 # .. class:: borderless
  31 #
  32 # ======  ==========  ==========================================================
  33 # 0.1     2005-06-29  Initial version.
  34 # 0.1.1   2005-06-30  First literate version.
  35 # 0.1.2   2005-07-01  Object orientated script using generators.
  36 # 0.1.3   2005-07-10  Two state machine (later added 'header' state).
  37 # 0.2b    2006-12-04  Start of work on version 0.2 (code restructuring).
  38 # 0.2     2007-01-23  Published at http://pylit.berlios.de.
  39 # 0.2.1   2007-01-25  Outsourced non-core documentation to the PyLit pages.
  40 # 0.2.2   2007-01-26  New behaviour of `diff` function.
  41 # 0.2.3   2007-01-29  New `header` methods after suggestion by Riccardo Murri.
  42 # 0.2.4   2007-01-31  Raise Error if code indent is too small.
  43 # 0.2.5   2007-02-05  New command line option --comment-string.
  44 # 0.2.6   2007-02-09  Add section with open questions,
  45 #                     Code2Text: let only blank lines (no comment str)
  46 #                     separate text and code,
  47 #                     fix `Code2Text.header`.
  48 # 0.2.7   2007-02-19  Simplify `Code2Text.header`,
  49 #                     new `iter_strip` method replacing a lot of ``if``-s.
  50 # 0.2.8   2007-02-22  Set `mtime` of outfile to the one of infile.
  51 # 0.3     2007-02-27  New `Code2Text` converter after an idea by Riccardo Murri,
  52 #                     explicit `option_defaults` dict for easier customisation.
  53 # 0.3.1   2007-03-02  Expand hard-tabs to prevent errors in indentation,
  54 #                     `Text2Code` now also works on blocks,
  55 #                     removed dependency on SimpleStates module.
  56 # 0.3.2   2007-03-06  Bug fix: do not set `language` in `option_defaults`
  57 #                     renamed `code_languages` to `languages`.
  58 # 0.3.3   2007-03-16  New language css,
  59 #                     option_defaults -> defaults = optparse.Values(),
  60 #                     simpler PylitOptions: don't store parsed values,
  61 #                     don't parse at initialisation,
  62 #                     OptionValues: return `None` for non-existing attributes,
  63 #                     removed -infile and -outfile, use positional arguments.
  64 # 0.3.4   2007-03-19  Documentation update,
  65 #                     separate `execute` function.
  66 #         2007-03-21  Code cleanup in `Text2Code.__iter__`.
  67 # 0.3.5   2007-03-23  Removed "css" from known languages after learning that
  68 #                     there is no C++ style "// " comment string in CSS2.
  69 # 0.3.6   2007-04-24  Documentation update.
  70 # 0.4     2007-05-18  Implement Converter.__iter__ as stack of iterator
  71 #                     generators. Iterating over a converter instance now
  72 #                     yields lines instead of blocks.
  73 #                     Provide "hooks" for pre- and postprocessing filters.
  74 #                     Rename states to reduce confusion with formats:
  75 #                     "text" -> "documentation", "code" -> "code_block".
  76 # 0.4.1   2007-05-22  Converter.__iter__: cleanup and reorganisation,
  77 #                     rename parent class Converter -> TextCodeConverter.
  78 # 0.4.2   2007-05-23  Merged Text2Code.converter and Code2Text.converter into
  79 #                     TextCodeConverter.converter.
  80 # 0.4.3   2007-05-30  Replaced use of defaults.code_extensions with
  81 #                     values.languages.keys().
  82 #                     Removed spurious `print` statement in code_block_handler.
  83 #                     Added basic support for 'c' and 'css' languages
  84 #                     with `dumb_c_preprocessor`_ and `dumb_c_postprocessor`_.
  85 # 0.5     2007-06-06  Moved `collect_blocks`_ out of `TextCodeConverter`_,
  86 #                     bug fix: collect all trailing blank lines into a block.
  87 #                     Expand tabs with `expandtabs_filter`_.
  88 # 0.6     2007-06-20  Configurable code-block marker (default ``::``)
  89 # 0.6.1   2007-06-28  Bug fix: reset self.code_block_marker_missing.
  90 # 0.7     2007-12-12  prepending an empty string to sys.path in run_doctest()
  91 #                     to allow imports from the current working dir.
  92 # 0.7.1   2008-01-07  If outfile does not exist, do a round-trip conversion
  93 #                     and report differences (as with outfile=='-').
  94 # 0.7.2   2008-01-28  Do not add missing code-block separators with
  95 #                     `doctest_run` on the code source. Keeps lines consistent.
  96 # 0.7.3   2008-04-07  Use value of code_block_marker for insertion of missing
  97 #                     transition marker in Code2Text.code_block_handler
  98 #                     Add "shell" to defaults.languages
  99 # 0.7.4   2008-06-23  Add "latex" to defaults.languages
 100 # 0.7.5   2009-05-14  Bugfix: ignore blank lines in test for end of code block
 101 # 0.7.6   2009-12-15  language-dependent code-block markers (after a
 102 #                     `feature request and patch by jrioux`_),
 103 #                     use DefaultDict for language-dependent defaults,
 104 #                     new defaults setting `add_missing_marker`_.
 105 # 0.7.7   2010-06-23  New command line option --codeindent.
 106 # 0.7.8   2011-03-30  Do not overwrite custom `add_missing_marker` value,
 107 #                     allow directive options following the 'code' directive.
 108 # 0.7.9   2011-04-05  Decode doctest string if 'magic comment' gives encoding.
 109 # 0.7.10  2013-06-07  Add "lua" to defaults.languages
 110 # 0.7.11  2020-10-10  Return 0, if input and output file are of same age.
 111 # 0.8.0   unpublishd  Fix ``--execute`` behaviour and tests.
 112 #                     Use collections.defaultdict.
 113 #                     Change default `codeindent` to 2.
 114 # ======  ==========  ==========================================================
 115 #
 116 # ::
 117
 118 _version = "0.8.0dev"
 119
 120 __docformat__ = 'restructuredtext'
 121
 122
 123 # Introduction
 124 # ------------
 125 #
 126 # PyLit is a bidirectional converter between two formats of a computer
 127 # program source:
 128 #
 129 # * a (reStructured) text document with program code embedded in
 130 #   *code blocks*, and
 131 # * a compilable (or executable) code source with *documentation*
 132 #   embedded in comment blocks
 133 #
 134 #
 135 # Requirements
 136 # ------------
 137 #
 138 # ::
 139
 140 from collections import defaultdict
 141 import os
 142 import re
 143 import optparse
 144 import sys
 145
 146
 147 # Defaults
 148 # ========
 149 #
 150 # The `defaults` object provides a central repository for default
 151 # values and their customisation. ::
 152
 153 defaults = optparse.Values()
 154
 155 # It is used for
 156 #
 157 # * the initialisation of data arguments in TextCodeConverter_ and
 158 #   PylitOptions_
 159 #
 160 # * completion of command line options in `PylitOptions.complete_values`_.
 161 #
 162 # This allows the easy creation of back-ends that customise the
 163 # defaults and then call `main`_ e.g.:
 164 #
 165 # >>> import pylit
 166 # >>> pylit.defaults.comment_string = "## "
 167 # >>> pylit.defaults.codeindent = 4
 168 # >>> pylit.main()
 169 #
 170 # The following default values are defined in pylit.py:
 171 #
 172 # languages
 173 # ---------
 174 #
 175 # Mapping of code file extensions to code language::
 176
 177 defaults.languages  = defaultdict(lambda: "python", # fallback language
 178                                   {".c":   "c",
 179                                    ".cc":  "c++",
 180                                    ".css": "css",
 181                                    ".lua": "lua",
 182                                    ".py":  "python",
 183                                    ".sh":  "shell",
 184                                    ".sl":  "slang",
 185                                    ".sty": "latex",
 186                                    ".tex": "latex"
 187                                   })
 188
 189 # Will be overridden by the ``--language`` command line option.
 190 #
 191 # The first argument is the fallback language, used if there is no
 192 # matching extension (e.g. if pylit is used as filter) and no
 193 # ``--language`` is specified. It can be changed programmatically by
 194 # assignment to the ``.default`` attribute, e.g.
 195 #
 196 # >>> defaults.languages.default='c++'
 197 #
 198 #
 199 # .. _text_extension:
 200 #
 201 # text_extensions
 202 # ---------------
 203 #
 204 # List of known extensions of (reStructured) text files. The first
 205 # extension in this list is used by the `_get_outfile_name`_ method to
 206 # generate a text output filename::
 207
 208 defaults.text_extensions = [".txt", ".rst"]
 209
 210
 211 # comment_strings
 212 # ---------------
 213 #
 214 # Comment strings for known languages. Used in Code2Text_ to recognise
 215 # text blocks and in Text2Code_ to format text blocks as comments.
 216 # Defaults to ``'# '``.
 217 #
 218 # **Comment strings include trailing whitespace.** ::
 219
 220 defaults.comment_strings = defaultdict(lambda: '# ',
 221                                        {"css":    '// ',
 222                                         "c":      '// ',
 223                                         "c++":    '// ',
 224                                         "lua":    '-- ',
 225                                         "latex":  '% ',
 226                                         "python": '# ',
 227                                         "shell":  '# ',
 228                                         "slang":  '% '
 229                                        })
 230
 231
 232 # header_string
 233 # -------------
 234 #
 235 # Marker string for a header code block in the text source. No trailing
 236 # whitespace needed as indented code follows.
 237 # Must be a valid rst directive that accepts code on the same line, e.g.
 238 # ``'..admonition::'``.
 239 #
 240 # Default is a comment marker::
 241
 242 defaults.header_string = '..'
 243
 244
 245 # .. _code_block_marker:
 246 #
 247 # code_block_markers
 248 # ------------------
 249 #
 250 # Markup at the end of a documentation block.
 251 # Default is Docutils' marker for a `literal block`_::
 252
 253 defaults.code_block_markers = defaultdict(lambda: '::')
 254
 255 # The `code_block_marker` string is `inserted into a regular expression`_.
 256 # Language-specific markers can be defined programmatically, e.g. in a
 257 # wrapper script.
 258 #
 259 # In a document where code examples are only one of several uses of
 260 # literal blocks, it is more appropriate to single out the source code
 261 # ,e.g. with the double colon at a separate line ("expanded form")
 262 #
 263 #   ``defaults.code_block_marker.default = ':: *'``
 264 #
 265 # or a dedicated ``.. code-block::`` directive [#]_
 266 #
 267 #   ``defaults.code_block_marker['c++'] = '.. code-block:: *c++'``
 268 #
 269 # The latter form also allows code in different languages kept together
 270 # in one literate source file.
 271 #
 272 # .. [#] The ``.. code-block::`` directive is not (yet) supported by
 273 #    standard Docutils.  It is provided by several add-ons, including
 274 #    the `code-block directive`_ project in the Docutils Sandbox and
 275 #    Sphinx_.
 276 #
 277 #
 278 # strip
 279 # -----
 280 #
 281 # Export to the output format stripping documentation or code blocks::
 282
 283 defaults.strip = False
 284
 285 # strip_marker
 286 # ------------
 287 #
 288 # Strip literal marker from the end of documentation blocks when
 289 # converting  to code format. Makes the code more concise but looses the
 290 # synchronisation of line numbers in text and code formats. Can also be used
 291 # (together with the auto-completion of the code-text conversion) to change
 292 # the `code_block_marker`::
 293
 294 defaults.strip_marker = False
 295
 296 # add_missing_marker
 297 # ------------------
 298 #
 299 # When converting from code format to text format, add a `code_block_marker`
 300 # at the end of documentation blocks if it is missing::
 301
 302 defaults.add_missing_marker = True
 303
 304 # Keep this at ``True``, if you want to re-convert to code format later!
 305 #
 306 #
 307 # .. _defaults.preprocessors:
 308 #
 309 # preprocessors
 310 # -------------
 311 #
 312 # Preprocess the data with language-specific filters_
 313 # Set below in Filters_::
 314
 315 defaults.preprocessors = {}
 316
 317 # .. _defaults.postprocessors:
 318 #
 319 # postprocessors
 320 # --------------
 321 #
 322 # Postprocess the data with language-specific filters_::
 323
 324 defaults.postprocessors = {}
 325
 326 # .. _defaults.codeindent:
 327 #
 328 # codeindent
 329 # ----------
 330 #
 331 # Number of spaces to indent code blocks in `Code2Text.code_block_handler`_::
 332
 333 defaults.codeindent = 2
 334
 335 # In `Text2Code.code_block_handler`_, the codeindent is determined by the
 336 # first recognised code line (header or first indented literal block
 337 # of the text source).
 338 #
 339 # overwrite
 340 # ---------
 341 #
 342 # What to do if the outfile already exists? (ignored if `outfile` == '-')::
 343
 344 defaults.overwrite = 'update'
 345
 346 # Recognised values:
 347 #
 348 #  :'yes':    overwrite eventually existing `outfile`,
 349 #  :'update': fail if the `outfile` is newer than `infile`,
 350 #  :'no':     fail if `outfile` exists.
 351 #
 352 #
 353 # Extensions
 354 # ==========
 355 #
 356 # Try to import optional extensions::
 357
 358 try:
 359     import pylit_elisp
 360 except ImportError:
 361     pass
 362
 363
 364 # Converter Classes
 365 # =================
 366 #
 367 # The converter classes implement a simple state machine to separate and
 368 # transform documentation and code blocks. For this task, only a very limited
 369 # parsing is needed. PyLit's parser assumes:
 370 #
 371 # * `indented literal blocks`_ in a text source are code blocks.
 372 #
 373 # * comment blocks in a code source where every line starts with a matching
 374 #   comment string are documentation blocks.
 375 #
 376 # TextCodeConverter
 377 # -----------------
 378 # ::
 379
 380 class TextCodeConverter(object):
 381     """Parent class for the converters `Text2Code` and `Code2Text`.
 382     """
 383
 384 # The parent class defines data attributes and functions used in both
 385 # `Text2Code`_ converting a text source to executable code source, and
 386 # `Code2Text`_ converting commented code to a text source.
 387 #
 388 # Data attributes
 389 # ~~~~~~~~~~~~~~~
 390 #
 391 # Class default values are fetched from the `defaults`_ object and can be
 392 # overridden by matching keyword arguments during class instantiation. This
 393 # also works with keyword arguments to `get_converter`_ and `main`_, as these
 394 # functions pass on unused keyword args to the instantiation of a converter
 395 # class. ::
 396
 397     language = defaults.languages[None]
 398     comment_strings = defaults.comment_strings
 399     comment_string = "" # set in __init__ (if empty)
 400     codeindent =  defaults.codeindent
 401     header_string = defaults.header_string
 402     code_block_markers = defaults.code_block_markers
 403     code_block_marker = "" # set in __init__ (if empty)
 404     strip = defaults.strip
 405     strip_marker = defaults.strip_marker
 406     add_missing_marker = defaults.add_missing_marker
 407     directive_option_regexp = re.compile(r' +:(\w|[-._+:])+:( |$)')
 408     state = "" # type of current block, see `TextCodeConverter.convert`_
 409
 410 # Interface methods
 411 # ~~~~~~~~~~~~~~~~~
 412 #
 413 # .. _TextCodeConverter.__init__:
 414 #
 415 # __init__
 416 # """"""""
 417 #
 418 # Initialising sets the `data` attribute, an iterable object yielding lines of
 419 # the source to convert. [#]_
 420 #
 421 # .. [#] The most common choice of data is a `file` object with the text
 422 #        or code source.
 423 #
 424 #        To convert a string into a suitable object, use its splitlines()
 425 #        method like ``"2 lines\nof source".splitlines(True)``.
 426 #
 427 #
 428 # Additional keyword arguments are stored as instance variables,
 429 # overwriting the class defaults::
 430
 431     def __init__(self, data, **keyw):
 432         """data   --  iterable data object
 433                       (list, file, generator, string, ...)
 434            **keyw --  remaining keyword arguments are
 435                       stored as data-attributes
 436         """
 437         self.data = data
 438         self.__dict__.update(keyw)
 439
 440 # If empty, `code_block_marker` and `comment_string` are set according
 441 # to the `language`::
 442
 443         if not self.code_block_marker:
 444             self.code_block_marker = self.code_block_markers[self.language]
 445         if not self.comment_string:
 446             self.comment_string = self.comment_strings[self.language]
 447         self.stripped_comment_string = self.comment_string.rstrip()
 448
 449 # Pre- and postprocessing filters are set (with
 450 # `TextCodeConverter.get_filter`_)::
 451
 452         self.preprocessor = self.get_filter("preprocessors", self.language)
 453         self.postprocessor = self.get_filter("postprocessors", self.language)
 454
 455 # .. _inserted into a regular expression:
 456 #
 457 # Finally, a regular_expression for the `code_block_marker` is compiled
 458 # to find valid cases of `code_block_marker` in a given line and return
 459 # the groups: ``\1 prefix, \2 code_block_marker, \3 remainder`` ::
 460
 461         marker = self.code_block_marker
 462         if marker == '::':
 463             # the default marker may occur at the end of a text line
 464             self.marker_regexp = re.compile('^( *(?!\.\.).*)(::)([ \n]*)$')
 465         else:
 466             # marker must be on a separate line
 467             self.marker_regexp = re.compile('^( *)(%s)(.*\n?)$' % marker)
 468
 469 # .. _TextCodeConverter.__iter__:
 470 #
 471 # __iter__
 472 # """"""""
 473 #
 474 # Return an iterator for the instance. Iteration yields lines of converted
 475 # data.
 476 #
 477 # The iterator is a chain of iterators acting on `self.data` that does
 478 #
 479 # * preprocessing
 480 # * text<->code format conversion
 481 # * postprocessing
 482 #
 483 # Pre- and postprocessing are only performed, if filters for the current
 484 # language are registered in `defaults.preprocessors`_ and|or
 485 # `defaults.postprocessors`_. The filters must accept an iterable as first
 486 # argument and yield the processed input data line-wise.
 487 # ::
 488
 489     def __iter__(self):
 490         """Iterate over input data source and yield converted lines
 491         """
 492         return self.postprocessor(self.convert(self.preprocessor(self.data)))
 493
 494
 495 # .. _TextCodeConverter.__call__:
 496 #
 497 # __call__
 498 # """"""""
 499 # The special `__call__` method allows the use of class instances as callable
 500 # objects. It returns the converted data as list of lines::
 501
 502     def __call__(self):
 503         """Iterate over state-machine and return results as list of lines"""
 504         return [line for line in self]
 505
 506
 507 # .. _TextCodeConverter.__str__:
 508 #
 509 # __str__
 510 # """""""
 511 # Return converted data as string::
 512
 513     def __str__(self):
 514         return "".join(self())
 515
 516
 517 # Helpers and convenience methods
 518 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 519 #
 520 # .. _TextCodeConverter.convert:
 521 #
 522 # convert
 523 # """""""
 524 #
 525 # The `convert` method generates an iterator that does the actual  code <-->
 526 # text format conversion. The converted data is yielded line-wise and the
 527 # instance's `status` argument indicates whether the current line is "header",
 528 # "documentation", or "code_block"::
 529
 530     def convert(self, lines):
 531         """Iterate over lines of a program document and convert
 532         between "text" and "code" format
 533         """
 534
 535 # Initialise internal data arguments. (Done here, so that every new iteration
 536 # re-initialises them.)
 537 #
 538 # `state`
 539 #   the "type" of the currently processed block of lines. One of
 540 #
 541 #   :"":              initial state: check for header,
 542 #   :"header":        leading code block: strip `header_string`,
 543 #   :"documentation": documentation part: comment out,
 544 #   :"code_block":    literal blocks containing source code: unindent.
 545 #
 546 # ::
 547
 548         self.state = ""
 549
 550 # `_codeindent`
 551 #   * Do not confuse the internal attribute `_codeindent` with the configurable
 552 #     `codeindent` (without the leading underscore).
 553 #   * `_codeindent` is set in `Text2Code.code_block_handler`_ to the indent of
 554 #     first non-blank "code_block" line and stripped from all "code_block" lines
 555 #     in the text-to-code conversion,
 556 #   * `codeindent` is set in `__init__` to `defaults.codeindent`_ and added to
 557 #     "code_block" lines in the code-to-text conversion.
 558 #
 559 # ::
 560
 561         self._codeindent = 0
 562
 563 # `_textindent`
 564 #   * set by `Text2Code.documentation_handler`_ to the minimal indent of a
 565 #     documentation block,
 566 #   * used in `Text2Code.set_state`_ to find the end of a code block.
 567 #
 568 # ::
 569
 570         self._textindent = 0
 571
 572 # `_add_code_block_marker`
 573 #   If the last paragraph of a documentation block does not end with a
 574 #   code_block_marker_, it should be added (otherwise, the back-conversion
 575 #   fails.).
 576 #
 577 #   `_add_code_block_marker` is set by `Code2Text.documentation_handler`_
 578 #   and evaluated by `Code2Text.code_block_handler`_, because the
 579 #   documentation_handler does not know whether the next block will be
 580 #   documentation (with no need for a code_block_marker) or a code block.
 581 #
 582 # ::
 583
 584         self._add_code_block_marker = False
 585
 586
 587
 588 # Determine the state of the block and convert with the matching "handler"::
 589
 590         for block in collect_blocks(expandtabs_filter(lines)):
 591             try:
 592                 self.set_state(block)
 593             except StopIteration:
 594                 return
 595             for line in getattr(self, self.state+"_handler")(block):
 596                 yield line
 597
 598
 599 # .. _TextCodeConverter.get_filter:
 600 #
 601 # get_filter
 602 # """"""""""
 603 # ::
 604
 605     def get_filter(self, filter_set, language):
 606         """Return language specific filter"""
 607         if self.__class__ == Text2Code:
 608             key = "text2"+language
 609         elif self.__class__ == Code2Text:
 610             key = language+"2text"
 611         else:
 612             key = ""
 613         try:
 614             return getattr(defaults, filter_set)[key]
 615         except (AttributeError, KeyError):
 616             # print("there is no %r filter in %r"%(key, filter_set))
 617             pass
 618         return identity_filter
 619
 620
 621 # get_indent
 622 # """"""""""
 623 # Return the number of leading spaces in `line`::
 624
 625     def get_indent(self, line):
 626         """Return the indentation of `string`.
 627         """
 628         return len(line) - len(line.lstrip())
 629
 630
 631 # Text2Code
 632 # ---------
 633 #
 634 # The `Text2Code` converter separates *code-blocks* [#]_ from *documentation*.
 635 # Code blocks are unindented, documentation is commented (or filtered, if the
 636 # ``strip`` option is True).
 637 #
 638 # .. [#] Only `indented literal blocks`_ are considered code-blocks. `quoted
 639 #        literal blocks`_, `parsed-literal blocks`_, and `doctest blocks`_ are
 640 #        treated as part of the documentation. This allows the inclusion of
 641 #        examples:
 642 #
 643 #           >>> 23 + 3
 644 #           26
 645 #
 646 #        Mark that there is no double colon before the doctest block in the
 647 #        text source.
 648 #
 649 # The class inherits the interface and helper functions from
 650 # TextCodeConverter_ and adds functions specific to the text-to-code format
 651 # conversion::
 652
 653 class Text2Code(TextCodeConverter):
 654     """Convert a (reStructured) text source to code source
 655     """
 656
 657 # .. _Text2Code.set_state:
 658 #
 659 # set_state
 660 # ~~~~~~~~~
 661 # ::
 662
 663     def set_state(self, block):
 664         """Determine state of `block`. Set `self.state`
 665         """
 666
 667 # `set_state` is used inside an iteration. Hence, if we are out of data, a
 668 # StopItertion exception should be raised::
 669
 670         if not block:
 671             raise StopIteration
 672
 673 # The new state depends on the active state (from the last block) and
 674 # features of the current block. It is either "header", "documentation", or
 675 # "code_block".
 676 #
 677 # If the current state is "" (first block), check for
 678 # the  `header_string` indicating a leading code block::
 679
 680         if self.state == "":
 681             # print("set state for %r"%block)
 682             if block[0].startswith(self.header_string):
 683                 self.state = "header"
 684             else:
 685                 self.state = "documentation"
 686
 687 # If the current state is "documentation", the next block is also
 688 # documentation. The end of a documentation part is detected in the
 689 # `Text2Code.documentation_handler`_::
 690
 691         # elif self.state == "documentation":
 692         #    self.state = "documentation"
 693
 694 # A "code_block" ends with the first less indented, non-blank line.
 695 # `_textindent` is set by the documentation handler to the indent of the
 696 # preceding documentation block::
 697
 698         elif self.state in ["code_block", "header"]:
 699             indents = [self.get_indent(line) for line in block
 700                        if line.rstrip()]
 701             # print("set_state:", indents, self._textindent)
 702             if indents and min(indents) <= self._textindent:
 703                 self.state = 'documentation'
 704             else:
 705                 self.state = 'code_block'
 706
 707 # TODO: (or not to do?) insert blank line before the first line with too-small
 708 # codeindent using self.ensure_trailing_blank_line(lines, line) (would need
 709 # split and push-back of the documentation part)?
 710 #
 711 # .. _Text2Code.header_handler:
 712 #
 713 # header_handler
 714 # ~~~~~~~~~~~~~~
 715 #
 716 # Sometimes code needs to remain on the first line(s) of the document to be
 717 # valid. The most common example is the "shebang" line that tells a POSIX
 718 # shell how to process an executable file::
 719
 720 #!/usr/bin/env python
 721
 722 # In Python, the special comment to indicate the encoding, e.g.
 723 # ``# -*- coding: iso-8859-1 -*-``, must occur before any other comment
 724 # or code too.
 725 #
 726 # If we want to keep the line numbers in sync for text and code source, the
 727 # reStructured Text markup for these header lines must start at the same line
 728 # as the first header line. Therefore, header lines could not be marked as
 729 # literal block (this would require the ``::`` and an empty line above the
 730 # code_block).
 731 #
 732 # OTOH, a comment may start at the same line as the comment marker and it
 733 # includes subsequent indented lines. Comments are visible in the reStructured
 734 # Text source but hidden in the pretty-printed output.
 735 #
 736 # With a header converted to comment in the text source, everything before
 737 # the first documentation block (i.e. before the first paragraph using the
 738 # matching comment string) will be hidden away (in HTML or PDF output).
 739 #
 740 # This seems a good compromise, the advantages
 741 #
 742 # * line numbers are kept
 743 # * the "normal" code_block conversion rules (indent/unindent by `codeindent` apply
 744 # * greater flexibility: you can hide a repeating header in a project
 745 #   consisting of many source files.
 746 #
 747 # set off the disadvantages
 748 #
 749 # - it may come as surprise if a part of the file is not "printed",
 750 # - one more syntax element to learn for rst newbies to start with pylit,
 751 #   (however, starting from the code source, this will be auto-generated)
 752 #
 753 # In the case that there is no matching comment at all, the complete code
 754 # source will become a comment -- however, in this case it is not very likely
 755 # the source is a literate document anyway.
 756 #
 757 # If needed for the documentation, it is possible to quote the header in (or
 758 # after) the first documentation block, e.g. as `parsed literal`.
 759 # ::
 760
 761     def header_handler(self, lines):
 762         """Format leading code block"""
 763         # strip header string from first line
 764         lines[0] = lines[0].replace(self.header_string, "", 1)
 765         # yield remaining lines formatted as code-block
 766         for line in self.code_block_handler(lines):
 767             yield line
 768
 769
 770 # .. _Text2Code.documentation_handler:
 771 #
 772 # documentation_handler
 773 # ~~~~~~~~~~~~~~~~~~~~~
 774 #
 775 # The 'documentation' handler processes everything that is not recognised as
 776 # "code_block". Documentation is quoted with `self.comment_string`
 777 # (or filtered with `--strip=True`).
 778 #
 779 # If end-of-documentation marker is detected,
 780 #
 781 # * set state to 'code_block'
 782 # * set `self._textindent` (needed by `Text2Code.set_state`_ to find the
 783 #   next "documentation" block)
 784 #
 785 # ::
 786
 787     def documentation_handler(self, lines):
 788         """Convert documentation blocks from text to code format
 789         """
 790         for line in lines:
 791             # test lines following the code-block marker for false positives
 792             if (self.state == "code_block" and line.rstrip()
 793                 and not self.directive_option_regexp.search(line)):
 794                 self.state = "documentation"
 795             # test for end of documentation block
 796             if self.marker_regexp.search(line):
 797                 self.state = "code_block"
 798                 self._textindent = self.get_indent(line)
 799             # yield lines
 800             if self.strip:
 801                 continue
 802             # do not comment blank lines preceding a code block
 803             if line.rstrip():
 804                 yield self.comment_string + line
 805             else:
 806                 if self.state == "code_block":
 807                     yield line
 808                 else:
 809                     yield self.comment_string.rstrip() + line
 810
 811
 812
 813 # .. _Text2Code.code_block_handler:
 814 #
 815 # code_block_handler
 816 # ~~~~~~~~~~~~~~~~~~
 817 #
 818 # The "code_block" handler is called with an indented literal block. It
 819 # removes leading whitespace up to the indentation of the first code line in
 820 # the file (this deviation from Docutils behaviour allows indented blocks of
 821 # Python code). ::
 822
 823     def code_block_handler(self, block):
 824         """Convert indented literal blocks to source code format
 825         """
 826
 827 # If still unset, determine the indentation of code blocks from first non-blank
 828 # code line::
 829
 830         if self._codeindent == 0:
 831             self._codeindent = self.get_indent(block[0])
 832
 833 # Yield unindented lines after check whether we can safely unindent. If the
 834 # line is less indented then `_codeindent`, something got wrong. ::
 835
 836         for line in block:
 837             if line.lstrip() and self.get_indent(line) < self._codeindent:
 838                 raise ValueError("code block contains line less indented "
 839                             "than %d spaces \n%r"%(self._codeindent, block))
 840             yield line.replace(" "*self._codeindent, "", 1)
 841
 842
 843 # Code2Text
 844 # ---------
 845 #
 846 # The `Code2Text` converter does the opposite of `Text2Code`_ -- it processes
 847 # a source in "code format" (i.e. in a programming language), extracts
 848 # documentation from comment blocks, and puts program code in literal blocks.
 849 #
 850 # The class inherits the interface and helper functions from
 851 # TextCodeConverter_ and adds functions specific to the text-to-code  format
 852 # conversion::
 853
 854 class Code2Text(TextCodeConverter):
 855     """Convert code source to text source
 856     """
 857
 858 # set_state
 859 # ~~~~~~~~~
 860 #
 861 # Check if block is "header", "documentation", or "code_block":
 862 #
 863 # A paragraph is "documentation", if every non-blank line starts with a
 864 # matching comment string (including whitespace except for commented blank
 865 # lines) ::
 866
 867     def set_state(self, block):
 868         """Determine state of `block`."""
 869         for line in block:
 870             # skip documentation lines (commented, blank or blank comment)
 871             if (line.startswith(self.comment_string)
 872                 or not line.rstrip()
 873                 or line.rstrip() == self.comment_string.rstrip()
 874                ):
 875                 continue
 876             # non-commented line found:
 877             if self.state == "":
 878                 self.state = "header"
 879             else:
 880                 self.state = "code_block"
 881             break
 882         else:
 883             # no code line found
 884             # keep state if the block is just a blank line
 885             # if len(block) == 1 and self._is_blank_codeline(line):
 886             #     return
 887             self.state = "documentation"
 888
 889
 890 # header_handler
 891 # ~~~~~~~~~~~~~~
 892 #
 893 # Handle a leading code block. (See `Text2Code.header_handler`_ for a
 894 # discussion of the "header" state.) ::
 895
 896     def header_handler(self, lines):
 897         """Format leading code block"""
 898         if self.strip == True:
 899             return
 900         # get iterator over the lines that formats them as code-block
 901         lines = iter(self.code_block_handler(lines))
 902         # prepend header string to first line
 903         yield self.header_string + next(lines)
 904         # yield remaining lines
 905         for line in lines:
 906             yield line
 907
 908 # .. _Code2Text.documentation_handler:
 909 #
 910 # documentation_handler
 911 # ~~~~~~~~~~~~~~~~~~~~~
 912 #
 913 # The *documentation state* handler converts a comment to a documentation
 914 # block by stripping the leading `comment string` from every line::
 915
 916     def documentation_handler(self, block):
 917         """Uncomment documentation blocks in source code
 918         """
 919
 920 # Strip comment strings::
 921
 922         lines = [self.uncomment_line(line) for line in block]
 923
 924 # If the code block is stripped, the literal marker would lead to an
 925 # error when the text is converted with Docutils. Strip it as well. ::
 926
 927         if self.strip or self.strip_marker:
 928             self.strip_code_block_marker(lines)
 929
 930 # Otherwise, check for the `code_block_marker`_ at the end of the
 931 # documentation block (skipping directive options that might follow it)::
 932
 933         elif self.add_missing_marker:
 934             for line in lines[::-1]:
 935                 if self.marker_regexp.search(line):
 936                     self._add_code_block_marker = False
 937                     break
 938                 if (line.rstrip() and
 939                     not self.directive_option_regexp.search(line)):
 940                     self._add_code_block_marker = True
 941                     break
 942             else:
 943                 self._add_code_block_marker = True
 944
 945 # Yield lines::
 946
 947         for line in lines:
 948             yield line
 949
 950 # uncomment_line
 951 # ~~~~~~~~~~~~~~
 952 #
 953 # Return documentation line after stripping comment string. Consider the
 954 # case that a blank line has a comment string without trailing whitespace::
 955
 956     def uncomment_line(self, line):
 957         """Return uncommented documentation line"""
 958         line = line.replace(self.comment_string, "", 1)
 959         if line.rstrip() == self.stripped_comment_string:
 960             line = line.replace(self.stripped_comment_string, "", 1)
 961         return line
 962
 963 # .. _Code2Text.code_block_handler:
 964 #
 965 # code_block_handler
 966 # ~~~~~~~~~~~~~~~~~~
 967 #
 968 # The `code_block` handler returns the code block as indented literal
 969 # block (or filters it, if ``self.strip == True``). The amount of the code
 970 # indentation is controlled by `self.codeindent` (default 2).  ::
 971
 972     def code_block_handler(self, lines):
 973         """Covert code blocks to text format (indent or strip)
 974         """
 975         if self.strip == True:
 976             return
 977         # eventually insert transition marker
 978         if self._add_code_block_marker:
 979             self.state = "documentation"
 980             yield self.code_block_marker + "\n"
 981             yield "\n"
 982             self._add_code_block_marker = False
 983             self.state = "code_block"
 984         for line in lines:
 985             yield " "*self.codeindent + line
 986
 987
 988
 989 # strip_code_block_marker
 990 # ~~~~~~~~~~~~~~~~~~~~~~~
 991 #
 992 # Replace the literal marker with the equivalent of Docutils replace rules
 993 #
 994 # * strip ``::``-line (and preceding blank line) if on a line on its own
 995 # * strip ``::`` if it is preceded by whitespace.
 996 # * convert ``::`` to a single colon if preceded by text
 997 #
 998 # `lines` is a list of documentation lines (with a trailing blank line).
 999 # It is modified in-place::
1000
1001     def strip_code_block_marker(self, lines):
1002         try:
1003             line = lines[-2]
1004         except IndexError:
1005             return # just one line (no trailing blank line)
1006
1007         # match with regexp: `match` is None or has groups
1008         # \1 leading text, \2 code_block_marker, \3 remainder
1009         match = self.marker_regexp.search(line)
1010
1011         if not match:                 # no code_block_marker present
1012             return
1013         if not match.group(1):        # `code_block_marker` on an extra line
1014             del(lines[-2])
1015             # delete preceding line if it is blank
1016             if len(lines) >= 2 and not lines[-2].lstrip():
1017                 del(lines[-2])
1018         elif match.group(1).rstrip() < match.group(1):
1019             # '::' follows whitespace
1020             lines[-2] = match.group(1).rstrip() + match.group(3)
1021         else:                         # '::' follows text
1022             lines[-2] = match.group(1).rstrip() + ':' + match.group(3)
1023
1024 # Filters
1025 # =======
1026 #
1027 # Filters allow pre- and post-processing of the data to bring it in a format
1028 # suitable for the "normal" text<->code conversion. An example is conversion
1029 # of `C` ``/*`` ``*/`` comments into C++ ``//`` comments (and back).
1030 # Another example is the conversion of `C` ``/*`` ``*/`` comments into C++
1031 # ``//`` comments (and back).
1032 #
1033 # Filters are generator functions that return an iterator acting on a
1034 # `data` iterable and yielding processed `data` lines.
1035 #
1036 # identity_filter
1037 # ---------------
1038 #
1039 # The most basic filter is the identity filter, that returns its argument as
1040 # iterator::
1041
1042 def identity_filter(data):
1043     """Return data iterator without any processing"""
1044     return iter(data)
1045
1046 # expandtabs_filter
1047 # -----------------
1048 #
1049 # Expand hard-tabs in every line of `data` (cf. `str.expandtabs`).
1050 #
1051 # This filter is applied to the input data by `TextCodeConverter.convert`_ as
1052 # hard tabs can lead to errors when the indentation is changed. ::
1053
1054 def expandtabs_filter(data):
1055     """Yield data tokens with hard-tabs expanded"""
1056     for line in data:
1057         yield line.expandtabs()
1058
1059
1060 # collect_blocks
1061 # --------------
1062 #
1063 # A filter to aggregate "paragraphs" (blocks separated by blank
1064 # lines). Yields lists of lines::
1065
1066 def collect_blocks(lines):
1067     """collect lines in a list
1068
1069     yield list for each paragraph, i.e. block of lines separated by a
1070     blank line (whitespace only).
1071
1072     Trailing blank lines are collected as well.
1073     """
1074     blank_line_reached = False
1075     block = []
1076     for line in lines:
1077         if blank_line_reached and line.rstrip():
1078             yield block
1079             blank_line_reached = False
1080             block = [line]
1081             continue
1082         if not line.rstrip():
1083             blank_line_reached = True
1084         block.append(line)
1085     yield block
1086
1087
1088
1089 # dumb_c_preprocessor
1090 # -------------------
1091 #
1092 # This is a basic filter to convert `C` to `C++` comments. Works line-wise and
1093 # only converts lines that
1094 #
1095 # * start with "/\* " and end with " \*/" (followed by whitespace only)
1096 #
1097 # A more sophisticated version would also
1098 #
1099 # * convert multi-line comments
1100 #
1101 #   + Keep indentation or strip 3 leading spaces?
1102 #
1103 # * account for nested comments
1104 #
1105 # * only convert comments that are separated from code by a blank line
1106 #
1107 # ::
1108
1109 def dumb_c_preprocessor(data):
1110     """change `C` ``/* `` `` */`` comments into C++ ``// `` comments"""
1111     comment_string = defaults.comment_strings["c++"]
1112     boc_string = "/* "
1113     eoc_string = " */"
1114     for line in data:
1115         if (line.startswith(boc_string)
1116             and line.rstrip().endswith(eoc_string)
1117            ):
1118             line = line.replace(boc_string, comment_string, 1)
1119             line = "".join(line.rsplit(eoc_string, 1))
1120         yield line
1121
1122 # Unfortunately, the `replace` method of strings does not support negative
1123 # numbers for the `count` argument:
1124 #
1125 #   >>> "foo */ baz */ bar".replace(" */", "", -1) == "foo */ baz bar"
1126 #   False
1127 #
1128 # However, there is the `rsplit` method, that can be used together with `join`:
1129 #
1130 #   >>> "".join("foo */ baz */ bar".rsplit(" */", 1)) == "foo */ baz bar"
1131 #   True
1132 #
1133 # dumb_c_postprocessor
1134 # --------------------
1135 #
1136 # Undo the preparations by the dumb_c_preprocessor and re-insert valid comment
1137 # delimiters ::
1138
1139 def dumb_c_postprocessor(data):
1140     """change C++ ``// `` comments into `C` ``/* `` `` */`` comments"""
1141     comment_string = defaults.comment_strings["c++"]
1142     boc_string = "/* "
1143     eoc_string = " */"
1144     for line in data:
1145         if line.rstrip() == comment_string.rstrip():
1146             line = line.replace(comment_string, "", 1)
1147         elif line.startswith(comment_string):
1148             line = line.replace(comment_string, boc_string, 1)
1149             line = line.rstrip() + eoc_string + "\n"
1150         yield line
1151
1152
1153 # register filters
1154 # ----------------
1155 #
1156 # ::
1157
1158 defaults.preprocessors['c2text'] = dumb_c_preprocessor
1159 defaults.preprocessors['css2text'] = dumb_c_preprocessor
1160 defaults.postprocessors['text2c'] = dumb_c_postprocessor
1161 defaults.postprocessors['text2css'] = dumb_c_postprocessor
1162
1163
1164 # Command line use
1165 # ================
1166 #
1167 # Using this script from the command line will convert a file according to its
1168 # extension. This default can be overridden by a couple of options.
1169 #
1170 # Dual source handling
1171 # --------------------
1172 #
1173 # How to determine which source is up-to-date?
1174 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1175 #
1176 # - set modification date of `outfile` to the one of `infile`
1177 #
1178 #   Points out that the source files are 'synchronised'.
1179 #
1180 #   * Are there problems to expect from "backdating" a file? Which?
1181 #
1182 #     Looking at http://www.unix.com/showthread.php?t=20526, it seems
1183 #     perfectly legal to set `mtime` (while leaving `ctime`) as `mtime` is a
1184 #     description of the "actuality" of the data in the file.
1185 #
1186 #   * Should this become a default or an option?
1187 #
1188 # - alternatively move input file to a backup copy (with option: `--replace`)
1189 #
1190 # - check modification date before overwriting
1191 #   (with option: `--overwrite=update`)
1192 #
1193 # - check modification date before editing (implemented as `Jed editor`_
1194 #   function `pylit_check()` in `pylit.sl`_)
1195 #
1196 # .. _Jed editor: http://www.jedsoft.org/jed/
1197 # .. _pylit.sl: http://jedmodes.sourceforge.net/mode/pylit/
1198 #
1199 # Recognised Filename Extensions
1200 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1201 #
1202 # Instead of defining a new extension for "pylit" literate programs,
1203 # by default ``.txt`` will be appended for the text source and stripped by
1204 # the conversion to the code source. I.e. for a Python program foo:
1205 #
1206 # * the code source is called ``foo.py``
1207 # * the text source is called ``foo.py.txt``
1208 # * the html rendering is called ``foo.py.html``
1209 #
1210 #
1211 # OptionValues
1212 # ------------
1213 #
1214 # The following class adds `as_dict`_, `complete`_ and `__getattr__`_
1215 # methods to `optparse.Values`::
1216
1217 class OptionValues(optparse.Values):
1218
1219 # .. _OptionValues.as_dict:
1220 #
1221 # as_dict
1222 # ~~~~~~~
1223 #
1224 # For use as keyword arguments, it is handy to have the options in a
1225 # dictionary. `as_dict` returns a copy of the instances object dictionary::
1226
1227     def as_dict(self):
1228         """Return options as dictionary object"""
1229         return self.__dict__.copy()
1230
1231 # .. _OptionValues.complete:
1232 #
1233 # complete
1234 # ~~~~~~~~
1235 #
1236 # ::
1237
1238     def complete(self, **keyw):
1239         """
1240         Complete the option values with keyword arguments.
1241
1242         Do not overwrite existing values. Only use arguments that do not
1243         have a corresponding attribute in `self`,
1244         """
1245         for key in keyw:
1246             try:
1247                 self.__dict__[key]
1248             except KeyError:
1249                 setattr(self, key, keyw[key])
1250
1251 # .. _OptionValues.__getattr__:
1252 #
1253 # __getattr__
1254 # ~~~~~~~~~~~
1255 #
1256 # To replace calls using ``options.ensure_value("OPTION", None)`` with the
1257 # more concise ``options.OPTION``, we define `__getattr__` [#]_ ::
1258
1259     def __getattr__(self, name):
1260         """Return default value for non existing options"""
1261         return None
1262
1263
1264 # .. [#] The special method `__getattr__` is only called when an attribute
1265 #        look-up has not found the attribute in the usual places (i.e. it is
1266 #        not an instance attribute nor is it found in the class tree for
1267 #        self).
1268 #
1269 #
1270 # PylitOptions
1271 # ------------
1272 #
1273 # The `PylitOptions` class comprises an option parser and methods for parsing
1274 # and completion of command line options::
1275
1276 class PylitOptions(object):
1277     """Storage and handling of command line options for pylit"""
1278
1279 # Instantiation
1280 # ~~~~~~~~~~~~~
1281 #
1282 # ::
1283
1284     def __init__(self):
1285         """Set up an `OptionParser` instance for pylit command line options
1286
1287         """
1288         p = optparse.OptionParser(usage=main.__doc__, version=_version)
1289
1290         # Conversion settings
1291
1292         p.add_option("-c", "--code2txt", dest="txt2code", action="store_false",
1293                      help="convert code source to text source")
1294         p.add_option("-t", "--txt2code", action="store_true",
1295                      help="convert text source to code source")
1296         p.add_option("--language",
1297                      choices = list(defaults.comment_strings.keys()),
1298                      help="use LANGUAGE native comment style")
1299         p.add_option("--comment-string", dest="comment_string",
1300                      help="documentation block marker in code source "
1301                      "(including trailing whitespace, "
1302                      "default: language dependent)")
1303         p.add_option("-m", "--code-block-marker", dest="code_block_marker",
1304                      help="syntax token starting a code block. (default '::')")
1305         p.add_option("--codeindent", type="int",
1306                      help="Number of spaces to indent code blocks with "
1307                      "code2text (default %d)" % defaults.codeindent)
1308
1309         # Output file handling
1310
1311         p.add_option("--overwrite", action="store",
1312                      choices = ["yes", "update", "no"],
1313                      help="overwrite output file (default 'update')")
1314         p.add_option("--replace", action="store_true",
1315                      help="move infile to a backup copy (appending '~')")
1316         # TODO: do we need this? If yes, make mtime update depend on it!
1317         # p.add_option("--keep-mtime", action="store_true",
1318         #              help="do not set the modification time of the outfile "
1319         #              "to the corresponding value of the infile")
1320         p.add_option("-s", "--strip", action="store_true",
1321                      help='"export" by stripping documentation or code')
1322
1323         # Special actions
1324
1325         p.add_option("-d", "--diff", action="store_true",
1326                      help="test for differences to existing file")
1327         p.add_option("--doctest", action="store_true",
1328                      help="run doctest.testfile() on the text version")
1329         p.add_option("-e", "--execute", action="store_true",
1330                      help="execute code (Python only)")
1331
1332         self.parser = p
1333
1334 # .. _PylitOptions.parse_args:
1335 #
1336 # parse_args
1337 # ~~~~~~~~~~
1338 #
1339 # The `parse_args` method calls the `optparse.OptionParser` on command
1340 # line or provided args and returns the result as `PylitOptions.Values`
1341 # instance. Defaults can be provided as keyword arguments::
1342
1343     def parse_args(self, args=sys.argv[1:], **keyw):
1344         """parse command line arguments using `optparse.OptionParser`
1345
1346            parse_args(args, **keyw) -> OptionValues instance
1347
1348             args --  list of command line arguments.
1349             keyw --  keyword arguments or dictionary of option defaults
1350         """
1351         # parse arguments
1352         (values, args) = self.parser.parse_args(args, OptionValues(keyw))
1353         # Convert FILE and OUTFILE positional args to option values
1354         # (other positional arguments are ignored)
1355         try:
1356             values.infile = args[0]
1357             values.outfile = args[1]
1358         except IndexError:
1359             pass
1360
1361         return values
1362
1363 # .. _PylitOptions.complete_values:
1364 #
1365 # complete_values
1366 # ~~~~~~~~~~~~~~~
1367 #
1368 # Complete an OptionValues instance `values`.  Use module-level defaults and
1369 # context information to set missing option values to sensible defaults (if
1370 # possible) ::
1371
1372     def complete_values(self, values):
1373         """complete option values with module and context sensible defaults
1374
1375         x.complete_values(values) -> values
1376         values -- OptionValues instance
1377         """
1378
1379 # Complete with module-level defaults_::
1380
1381         values.complete(**defaults.__dict__)
1382
1383 # Ensure infile is a string::
1384
1385         values.ensure_value("infile", "")
1386
1387 # Guess conversion direction from `infile` filename::
1388
1389         if values.txt2code is None:
1390             in_extension = os.path.splitext(values.infile)[1]
1391             if in_extension in values.text_extensions:
1392                 values.txt2code = True
1393             elif in_extension in values.languages.keys():
1394                 values.txt2code = False
1395
1396 # Auto-determine the output file name::
1397
1398         values.ensure_value("outfile", self._get_outfile_name(values))
1399
1400 # Second try: Guess conversion direction from outfile filename::
1401
1402         if values.txt2code is None:
1403             out_extension = os.path.splitext(values.outfile)[1]
1404             values.txt2code = not (out_extension in values.text_extensions)
1405
1406 # Set the language of the code::
1407
1408         if values.txt2code is True:
1409             code_extension = os.path.splitext(values.outfile)[1]
1410         elif values.txt2code is False:
1411             code_extension = os.path.splitext(values.infile)[1]
1412         values.ensure_value("language", values.languages[code_extension])
1413
1414         return values
1415
1416 # _get_outfile_name
1417 # ~~~~~~~~~~~~~~~~~
1418 #
1419 # Construct a matching filename for the output file. The output filename is
1420 # constructed from `infile` by the following rules:
1421 #
1422 # * '-' (stdin) results in '-' (stdout)
1423 # * strip the `text_extension`_ (txt2code) or
1424 # * add the `text_extension`_ (code2txt)
1425 # * fallback: if no guess can be made, add ".out"
1426 #
1427 #   .. TODO: use values.outfile_extension if it exists?
1428 #
1429 # ::
1430
1431     def _get_outfile_name(self, values):
1432         """Return a matching output filename for `infile`
1433         """
1434         # if input is stdin, default output is stdout
1435         if values.infile == '-':
1436             return '-'
1437
1438         # Derive from `infile` name: strip or add text extension
1439         (base, ext) = os.path.splitext(values.infile)
1440         if ext in values.text_extensions:
1441             return base # strip
1442         if ext and ext in values.languages or values.txt2code == False:
1443             return values.infile + values.text_extensions[0] # add
1444         # give up
1445         return values.infile + ".out"
1446
1447 # .. _PylitOptions.__call__:
1448 #
1449 # __call__
1450 # ~~~~~~~~
1451 #
1452 # The special `__call__` method allows to use PylitOptions instances as
1453 # *callables*: Calling an instance parses the argument list to extract option
1454 # values and completes them based on "context-sensitive defaults".  Keyword
1455 # arguments are passed to `PylitOptions.parse_args`_ as default values. ::
1456
1457     def __call__(self, args=sys.argv[1:], **keyw):
1458         """parse and complete command line args return option values
1459         """
1460         values = self.parse_args(args, **keyw)
1461         return self.complete_values(values)
1462
1463
1464
1465 # Helper functions
1466 # ----------------
1467 #
1468 # open_streams
1469 # ~~~~~~~~~~~~
1470 #
1471 # Return file objects for in- and output. If the input path is missing,
1472 # write usage and abort. (An alternative would be to use stdin as default.
1473 # However,  this leaves the uninitiated user with a non-responding application
1474 # if (s)he just tries the script without any arguments) ::
1475
1476 def open_streams(infile = '-', outfile = '-', overwrite='update', **keyw):
1477     """Open and return the input and output stream
1478
1479     open_streams(infile, outfile) -> (in_stream, out_stream)
1480
1481     in_stream   --  file(infile) or sys.stdin
1482     out_stream  --  file(outfile) or sys.stdout
1483     overwrite   --  'yes': overwrite eventually existing `outfile`,
1484                     'update': fail if the `outfile` is newer than `infile`,
1485                     'no': fail if `outfile` exists.
1486
1487                     Irrelevant if `outfile` == '-'.
1488     """
1489     if overwrite not in ('yes', 'no', 'update'):
1490         raise ValueError('Argument "overwrite" must be yes, no, or update".')
1491     if not infile:
1492         strerror = "Missing input file name ('-' for stdin; -h for help)"
1493         raise IOError(2, strerror, infile)
1494     if infile == '-':
1495         in_stream = sys.stdin
1496     else:
1497         in_stream = open(infile, 'r')
1498     if outfile == '-':
1499         out_stream = sys.stdout
1500     elif overwrite == 'no' and os.path.exists(outfile):
1501         raise IOError(17, "Output file exists!", outfile)
1502     elif overwrite == 'update' and is_newer(outfile, infile) is None:
1503         raise IOError(0, "Output file is as old as input file!", outfile)
1504     elif overwrite == 'update' and is_newer(outfile, infile):
1505         raise IOError(1, "Output file is newer than input file!", outfile)
1506     else:
1507         out_stream = open(outfile, 'w')
1508     return (in_stream, out_stream)
1509
1510 # is_newer
1511 # ~~~~~~~~
1512 #
1513 # ::
1514
1515 def is_newer(path1, path2):
1516     """Check if `path1` is newer than `path2` (using mtime)
1517
1518     Compare modification time of files at path1 and path2.
1519
1520     Non-existing files are considered oldest: Return False if path1 does not
1521     exist and True if path2 does not exist.
1522
1523     Return None if the modification time differs less than 1/10 second.
1524     (This evaluates to False in a Boolean context but allows a test
1525     for equality.)
1526     """
1527     try:
1528         mtime1 = os.path.getmtime(path1)
1529     except OSError:
1530         mtime1 = -1
1531     try:
1532         mtime2 = os.path.getmtime(path2)
1533     except OSError:
1534         mtime2 = -1
1535     if abs(mtime1 - mtime2) < 0.1:
1536         return None
1537     return mtime1 > mtime2
1538
1539
1540 # get_converter
1541 # ~~~~~~~~~~~~~
1542 #
1543 # Get an instance of the converter state machine::
1544
1545 def get_converter(data, txt2code=True, **keyw):
1546     if txt2code:
1547         return Text2Code(data, **keyw)
1548     else:
1549         return Code2Text(data, **keyw)
1550
1551
1552 # Use cases
1553 # ---------
1554 #
1555 # run_doctest
1556 # ~~~~~~~~~~~
1557 # ::
1558
1559 def run_doctest(infile="-", txt2code=True,
1560                 globs={}, verbose=False, optionflags=0, **keyw):
1561     """run doctest on the text source
1562     """
1563
1564 # Allow imports from the current working dir by prepending an empty string to
1565 # sys.path (see doc of sys.path())::
1566
1567     sys.path.insert(0, '')
1568
1569 # Import classes from the doctest module::
1570
1571     from doctest import DocTestParser, DocTestRunner
1572
1573 # Read in source. Make sure it is in text format, as tests in comments are not
1574 # found by doctest::
1575
1576     (data, out_stream) = open_streams(infile, "-")
1577     if txt2code is False:
1578         keyw.update({'add_missing_marker': False})
1579         converter = Code2Text(data, **keyw)
1580         docstring = str(converter)
1581     else:
1582         docstring = data.read()
1583
1584 # decode doc string if there is a "magic comment" in the first or second line
1585 # (http://docs.python.org/reference/lexical_analysis.html#encoding-declarations)
1586 # ::
1587
1588     if sys.version_info < (3,0):
1589         firstlines = ' '.join(docstring.splitlines()[:2])
1590         match = re.search('coding[=:]\s*([-\w.]+)', firstlines)
1591         if match:
1592             docencoding = match.group(1)
1593             docstring = docstring.decode(docencoding)
1594
1595 # Use the doctest Advanced API to run all doctests in the source text::
1596
1597     test = DocTestParser().get_doctest(docstring, globs, name="",
1598                                        filename=infile, lineno=0)
1599     runner = DocTestRunner(verbose, optionflags)
1600     runner.run(test)
1601     runner.summarize
1602     # give feedback also if no failures occurred
1603     if not runner.failures:
1604         print("%d failures in %d tests"%(runner.failures, runner.tries))
1605     return runner.failures, runner.tries
1606
1607
1608 # diff
1609 # ~~~~
1610 #
1611 # ::
1612
1613 def diff(infile='-', outfile='-', txt2code=True, **keyw):
1614     """Report differences between converted infile and existing outfile
1615
1616     If outfile does not exist or is '-', do a round-trip conversion and
1617     report differences.
1618     """
1619
1620     import difflib
1621
1622     instream = open(infile)
1623     # for diffing, we need a copy of the data as list::
1624     data = instream.readlines()
1625     # convert
1626     converter = get_converter(data, txt2code, **keyw)
1627     new = converter()
1628
1629     if outfile != '-' and os.path.exists(outfile):
1630         outstream = open(outfile)
1631         old = outstream.readlines()
1632         oldname = outfile
1633         newname = "<conversion of %s>"%infile
1634     else:
1635         old = data
1636         oldname = infile
1637         # back-convert the output data
1638         converter = get_converter(new, not txt2code)
1639         new = converter()
1640         newname = "<round-conversion of %s>"%infile
1641
1642     # find and print the differences
1643     is_different = False
1644     # print(type(old), old)
1645     # print(type(new), new)
1646     delta = difflib.unified_diff(old, new,
1647     # delta = difflib.unified_diff(["heute\n", "schon\n"], ["heute\n", "noch\n"],
1648                                       fromfile=oldname, tofile=newname)
1649     for line in delta:
1650         is_different = True
1651         print(line, end=' ') #sys.stdout.write(line + ' ')
1652     if not is_different:
1653         print(oldname)
1654         print(newname)
1655         print("no differences found")
1656     return is_different
1657
1658
1659 # execute
1660 # ~~~~~~~
1661 #
1662 # Works only for python code.
1663 #
1664 # Does not work with `eval`, as code is not just one expression. ::
1665
1666 def execute(infile="-", txt2code=True, **keyw):
1667     """Execute the input file. Convert first, if it is a text source.
1668     """
1669
1670     with open(infile) as f:
1671         data = f.readlines()
1672     if txt2code:
1673         data = str(Text2Code(data, **keyw))
1674     exec(''.join(data))
1675
1676
1677 # main
1678 # ----
1679 #
1680 # If this script is called from the command line, the `main` function will
1681 # convert the input (file or stdin) between text and code formats.
1682 #
1683 # Option default values for the conversion can be given as keyword arguments
1684 # to `main`_.  The option defaults will be updated by command line options and
1685 # extended with "intelligent guesses" by `PylitOptions`_ and passed on to
1686 # helper functions and the converter instantiation.
1687 #
1688 # This allows easy customisation for programmatic use -- just call `main`
1689 # with the appropriate keyword options, e.g. ``pylit.main(comment_string="## ")``
1690 #
1691 # ::
1692
1693 def main(args=sys.argv[1:], **defaults):
1694     """%prog [options] INFILE [OUTFILE]
1695
1696     Convert between (reStructured) text source with embedded code,
1697     and code source with embedded documentation (comment blocks)
1698
1699     The special filename '-' stands for standard in and output.
1700     """
1701
1702 # Parse and complete the options::
1703
1704     options = PylitOptions()(args, **defaults)
1705     # print("infile", repr(options.infile))
1706
1707 # Special actions with early return::
1708
1709     if options.doctest:
1710         return run_doctest(**options.as_dict())
1711
1712     if options.diff:
1713         return diff(**options.as_dict())
1714
1715     if options.execute:
1716         return execute(**options.as_dict())
1717
1718 # Open in- and output streams::
1719
1720     try:
1721         (data, out_stream) = open_streams(**options.as_dict())
1722     except IOError as ex:
1723         print("IOError: %s %s" % (ex.filename, ex.strerror))
1724         sys.exit(ex.errno)
1725
1726 # Get a converter instance::
1727
1728     converter = get_converter(data, **options.as_dict())
1729
1730 # Convert and write to out_stream::
1731
1732     out_stream.write(str(converter))
1733
1734     if out_stream is not sys.stdout:
1735         print("output written to", out_stream.name)
1736         out_stream.close()
1737
1738 # If input and output are from files, set the modification time (`mtime`) of
1739 # the output file to the one of the input file to indicate that the contained
1740 # information is equal. [#]_ ::
1741
1742
1743         # print("fractions?", os.stat_float_times())
1744         try:
1745             os.utime(options.outfile, (os.path.getatime(options.outfile),
1746                                        os.path.getmtime(options.infile))
1747                     )
1748         except OSError:
1749             pass
1750
1751     ## print("mtime", os.path.getmtime(options.infile),  options.infile)
1752     ## print("mtime", os.path.getmtime(options.outfile), options.outfile)
1753
1754
1755 # .. [#] Make sure the corresponding file object (here `out_stream`) is
1756 #        closed, as otherwise the change will be overwritten when `close` is
1757 #        called afterwards (either explicitly or at program exit).
1758 #
1759 #
1760 # Rename the infile to a backup copy if ``--replace`` is set::
1761
1762     if options.replace:
1763         os.rename(options.infile, options.infile + "~")
1764
1765
1766 # Run main, if called from the command line::
1767
1768 if __name__ == '__main__':
1769     main()
1770
1771
1772 # Open questions
1773 # ==============
1774 #
1775 # Open questions and ideas for further development
1776 #
1777 # Clean code
1778 # ----------
1779 #
1780 # * can we gain from using "shutils" over "os.path" and "os"?
1781 # * use pylint or pyChecker to enforce a consistent style?
1782 #
1783 # Options
1784 # -------
1785 #
1786 # * Use templates for the "intelligent guesses" (with Python syntax for string
1787 #   replacement with dicts: ``"hello %(what)s" % {'what': 'world'}``)
1788 #
1789 # * Is it sensible to offer the `header_string` option also as command line
1790 #   option?
1791 #
1792 # treatment of blank lines
1793 # ------------------------
1794 #
1795 # Alternatives: Keep blank lines blank
1796 #
1797 # - "never" (current setting) -> "visually merges" all documentation
1798 #    if there is no interjacent code
1799 #
1800 # - "always" -> disrupts documentation blocks,
1801 #
1802 # - "if empty" (no whitespace). Comment if there is whitespace.
1803 #
1804 #   This would allow non-obstructing markup but unfortunately this is (in
1805 #   most editors) also non-visible markup.
1806 #
1807 # + "if double" (if there is more than one consecutive blank line)
1808 #
1809 #   With this handling, the "visual gap" remains in both, text and code
1810 #   source.
1811 #
1812 #
1813 # Parsing Problems
1814 # ----------------
1815 #
1816 # * Ignore "matching comments" in literal strings?
1817 #
1818 #   Too complicated: Would need a specific detection algorithm for every
1819 #   language that supports multi-line literal strings (C++, PHP, Python)
1820 #
1821 # * Warn if a comment in code will become documentation after round-trip?
1822 #
1823 #
1824 # docstrings in code blocks
1825 # -------------------------
1826 #
1827 # * How to handle docstrings in code blocks? (it would be nice to convert them
1828 #   to rst-text if ``__docformat__ == restructuredtext``)
1829 #
1830 # TODO: Ask at Docutils users|developers
1831 #
1832 # Plug-ins
1833 # --------
1834 #
1835 # Specify a path for user additions and plug-ins. This would require to
1836 # convert Pylit from a pure module to a package...
1837 #
1838 #   6.4.3 Packages in Multiple Directories
1839 #
1840 #   Packages support one more special attribute, __path__. This is initialized
1841 #   to be a list containing the name of the directory holding the package's
1842 #   __init__.py before the code in that file is executed. This
1843 #   variable can be modified; doing so affects future searches for modules and
1844 #   subpackages contained in the package.
1845 #
1846 #   While this feature is not often needed, it can be used to extend the set
1847 #   of modules found in a package.
1848 #
1849 #
1850 # .. References
1851 #
1852 # .. _Docutils: http://docutils.sourceforge.net/
1853 # .. _Sphinx: http://sphinx.pocoo.org
1854 # .. _Pygments: http://pygments.org/
1855 # .. _code-block directive:
1856 #     http://docutils.sourceforge.net/sandbox/code-block-directive/
1857 # .. _literal block:
1858 # .. _literal blocks:
1859 #     http://docutils.sf.net/docs/ref/rst/restructuredtext.html#literal-blocks
1860 # .. _indented literal block:
1861 # .. _indented literal blocks:
1862 #     http://docutils.sf.net/docs/ref/rst/restructuredtext.html#indented-literal-blocks
1863 # .. _quoted literal block:
1864 # .. _quoted literal blocks:
1865 #     http://docutils.sf.net/docs/ref/rst/restructuredtext.html#quoted-literal-blocks
1866 # .. _parsed-literal blocks:
1867 #     http://docutils.sf.net/docs/ref/rst/directives.html#parsed-literal-block
1868 # .. _doctest block:
1869 # .. _doctest blocks:
1870 #     http://docutils.sf.net/docs/ref/rst/restructuredtext.html#doctest-blocks
1871 #
1872 # .. _feature request and patch by jrioux:
1873 #     http://developer.berlios.de/feature/?func=detailfeature&feature_id=4890&group_id=7974