pylit.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf8 -*-
   3
   4 """pylit: bidirectional text <-> code converter
   5
   6 Covert between a *text source* with embedded computer code
   7 and a *code source* with embedded documentation.
   8 """
   9
  10 from __future__ import print_function
  11
  12 # pylit.py
  13 # ********
  14 # Literate programming with reStructuredText
  15 # ++++++++++++++++++++++++++++++++++++++++++
  16 #
  17 # :Copyright: © 2005, 2007, 2015, 2021 Günter Milde.
  18 #             Released without warranty under the terms of the
  19 #             GNU General Public License (v. 3 or later)
  20 #
  21 # .. contents::
  22 #
  23 # Frontmatter
  24 # ===========
  25 #
  26 # Changelog
  27 # ---------
  28 #
  29 # .. class:: borderless
  30 #
  31 # ======  ==========  ==========================================================
  32 # 0.1     2005-06-29  Initial version.
  33 # 0.1.1   2005-06-30  First literate version.
  34 # 0.1.2   2005-07-01  Object oriented script using generators.
  35 # 0.1.3   2005-07-10  Two state machine (later added 'header' state).
  36 # 0.2b    2006-12-04  Start of work on version 0.2 (code restructuring).
  37 # 0.2     2007-01-23  Published at ``pylit.berlios.de``.
  38 # 0.2.1   2007-01-25  Outsourced non-core documentation to the PyLit pages.
  39 # 0.2.2   2007-01-26  New behaviour of `diff` function.
  40 # 0.2.3   2007-01-29  New `header` methods after suggestion by Riccardo Murri.
  41 # 0.2.4   2007-01-31  Raise Error if code indent is too small.
  42 # 0.2.5   2007-02-05  New command line option --comment-string.
  43 # 0.2.6   2007-02-09  Add section with open questions,
  44 #                     Code2Text: let only blank lines (no comment str)
  45 #                     separate text and code,
  46 #                     fix `Code2Text.header`.
  47 # 0.2.7   2007-02-19  Simplify `Code2Text.header`,
  48 #                     new `iter_strip` method replacing a lot of ``if``-s.
  49 # 0.2.8   2007-02-22  Set `mtime` of outfile to the one of infile.
  50 # 0.3     2007-02-27  New `Code2Text` converter after an idea by Riccardo Murri,
  51 #                     explicit `option_defaults` dict for easier customisation.
  52 # 0.3.1   2007-03-02  Expand hard-tabs to prevent errors in indentation,
  53 #                     `Text2Code` now also works on blocks,
  54 #                     removed dependency on SimpleStates module.
  55 # 0.3.2   2007-03-06  Bug fix: do not set `language` in `option_defaults`
  56 #                     renamed `code_languages` to `languages`.
  57 # 0.3.3   2007-03-16  New language css,
  58 #                     option_defaults -> defaults = optparse.Values(),
  59 #                     simpler PylitOptions: don't store parsed values,
  60 #                     don't parse at initialisation,
  61 #                     OptionValues: return `None` for non-existing attributes,
  62 #                     removed -infile and -outfile, use positional arguments.
  63 # 0.3.4   2007-03-19  Documentation update,
  64 #                     separate `execute` function.
  65 #         2007-03-21  Code cleanup in `Text2Code.__iter__`.
  66 # 0.3.5   2007-03-23  Removed "css" from known languages after learning that
  67 #                     there is no C++ style "// " comment string in CSS2.
  68 # 0.3.6   2007-04-24  Documentation update.
  69 # 0.4     2007-05-18  Implement Converter.__iter__ as stack of iterator
  70 #                     generators. Iterating over a converter instance now
  71 #                     yields lines instead of blocks.
  72 #                     Provide "hooks" for pre- and postprocessing filters.
  73 #                     Rename states to reduce confusion with formats:
  74 #                     "text" -> "documentation", "code" -> "code_block".
  75 # 0.4.1   2007-05-22  Converter.__iter__: cleanup and reorganisation,
  76 #                     rename parent class Converter -> TextCodeConverter.
  77 # 0.4.2   2007-05-23  Merged Text2Code.converter and Code2Text.converter into
  78 #                     TextCodeConverter.converter.
  79 # 0.4.3   2007-05-30  Replaced use of defaults.code_extensions with
  80 #                     values.languages.keys().
  81 #                     Removed spurious `print` statement in code_block_handler.
  82 #                     Added basic support for 'c' and 'css' languages
  83 #                     with `dumb_c_preprocessor`_ and `dumb_c_postprocessor`_.
  84 # 0.5     2007-06-06  Moved `collect_blocks`_ out of `TextCodeConverter`_,
  85 #                     bug fix: collect all trailing blank lines into a block.
  86 #                     Expand tabs with `expandtabs_filter`_.
  87 # 0.6     2007-06-20  Configurable code-block marker (default ``::``)
  88 # 0.6.1   2007-06-28  Bug fix: reset self.code_block_marker_missing.
  89 # 0.7     2007-12-12  prepending an empty string to sys.path in run_doctest()
  90 #                     to allow imports from the current working dir.
  91 # 0.7.1   2008-01-07  If outfile does not exist, do a round-trip conversion
  92 #                     and report differences (as with outfile=='-').
  93 # 0.7.2   2008-01-28  Do not add missing code-block separators with
  94 #                     `doctest_run` on the code source. Keeps lines consistent.
  95 # 0.7.3   2008-04-07  Use value of code_block_marker for insertion of missing
  96 #                     transition marker in Code2Text.code_block_handler
  97 #                     Add "shell" to defaults.languages
  98 # 0.7.4   2008-06-23  Add "latex" to defaults.languages
  99 # 0.7.5   2009-05-14  Bugfix: ignore blank lines in test for end of code block
 100 # 0.7.6   2009-12-15  language-dependent code-block markers (after a
 101 #                     feature request and patch by `jrioux`),
 102 #                     use DefaultDict for language-dependent defaults,
 103 #                     new defaults setting `add_missing_marker`_.
 104 # 0.7.7   2010-06-23  New command line option --codeindent.
 105 # 0.7.8   2011-03-30  Do not overwrite custom `add_missing_marker` value,
 106 #                     allow directive options following the 'code' directive.
 107 # 0.7.9   2011-04-05  Decode doctest string if 'magic comment' gives encoding.
 108 # 0.7.10  2013-06-07  Add "lua" to defaults.languages
 109 # 0.7.11  2020-10-10  Return 0, if input and output file are of same age.
 110 # 0.8.0   unpublishd  Fix ``--execute`` behaviour and tests.
 111 #                     Change default `codeindent` to 2.
 112 # ======  ==========  ==========================================================
 113 #
 114 # ::
 115
 116 __version__ = "0.8.0dev"
 117
 118 __docformat__ = 'restructuredtext'
 119
 120
 121 # Introduction
 122 # ------------
 123 #
 124 # PyLit is a bidirectional converter between two formats of a computer
 125 # program source:
 126 #
 127 # * a (reStructured) text document with program code embedded in
 128 #   *code blocks*, and
 129 # * a compilable (or executable) code source with *documentation*
 130 #   embedded in comment blocks
 131 #
 132 #
 133 # Requirements
 134 # ------------
 135 #
 136 # ::
 137
 138 #import argparse
 139 import optparse
 140 import os
 141 import re
 142 import sys
 143
 144
 145 # DefaultDict
 146 # ~~~~~~~~~~~
 147 #
 148 # As `collections.defaultdict` adds key/value pairs when the default
 149 # constructor is called,  we  define an alternative that does not mutate the
 150 # dict as side-effect. ::
 151
 152 class DefaultDict(dict):
 153     """Dictionary with default value."""
 154
 155     default = 'python'
 156
 157     def __missing__(self, key):
 158         # cf. file:///usr/share/doc/python3/html/library/stdtypes.html#dict
 159         return self.default
 160
 161
 162 # defaults
 163 # ========
 164 #
 165 # The `defaults` object provides a central repository for default
 166 # values and their customisation. ::
 167
 168 defaults = optparse.Values()
 169
 170 # It is used for
 171 #
 172 # * the initialisation of data arguments in TextCodeConverter_ and
 173 #   PylitOptions_
 174 #
 175 # * completion of command line options in `PylitOptions.complete_values`_.
 176 #
 177 # This allows the easy creation of back-ends that customise the
 178 # defaults and then call `main`_ e.g.:
 179 #
 180 # >>> import pylit
 181 # >>> pylit.defaults.comment_string = "## "
 182 # >>> pylit.defaults.codeindent = 4
 183 # >>> pylit.main()
 184 # 0 failures in 0 tests
 185 # (0, 0)
 186 #
 187 # The following default values are defined in pylit.py:
 188 #
 189 # languages
 190 # ---------
 191 #
 192 # Mapping of code file extensions to code language::
 193
 194 defaults.languages  = DefaultDict({".c":   "c",
 195                                    ".cc":  "c++",
 196                                    ".css": "css",
 197                                    ".lua": "lua",
 198                                    ".py":  "python",
 199                                    ".sh":  "shell",
 200                                    ".sl":  "slang",
 201                                    ".sty": "latex",
 202                                    ".tex": "latex"
 203                                   })
 204 defaults.languages.default = 'python'
 205
 206 # The result can be overridden by the ``--language`` command line option.
 207 #
 208 # The fallback language, used if there is no matching extension (e.g. if pylit
 209 # is used as filter) and no ``--language`` is specified is ``"python"``.
 210 # It can be changed programmatically by changing the ``.default``
 211 # attribute, e.g.
 212 #
 213 # >>> pylit.defaults.languages['.parrot']
 214 # 'python'
 215 # >>> pylit.defaults.languages.default = 'c++'
 216 # >>> pylit.defaults.languages['.camel']
 217 # 'c++'
 218 #
 219 # .. _text_extension:
 220 #
 221 # text_extensions
 222 # ---------------
 223 #
 224 # List of known extensions of (reStructured) text files. The first
 225 # extension in this list is used by the `_get_outfile_name`_ method to
 226 # generate a text output filename::
 227
 228 defaults.text_extensions = [".txt", ".rst"]
 229
 230
 231 # comment_strings
 232 # ---------------
 233 #
 234 # Comment strings for known languages. Used in Code2Text_ to recognise
 235 # text blocks and in Text2Code_ to format text blocks as comments.
 236 # Defaults to ``'# '``.
 237 #
 238 # **Comment strings include trailing whitespace.** ::
 239
 240 defaults.comment_strings = DefaultDict({"css":    '// ',
 241                                         "c":      '// ',
 242                                         "c++":    '// ',
 243                                         "lua":    '-- ',
 244                                         "latex":  '% ',
 245                                         "python": '# ',
 246                                         "shell":  '# ',
 247                                         "slang":  '% '
 248                                        })
 249 defaults.comment_strings.default = '# '
 250
 251 # header_string
 252 # -------------
 253 #
 254 # Marker string for a header code block in the text source. No trailing
 255 # whitespace needed as indented code follows.
 256 # Must be a valid rst directive that accepts code on the same line, e.g.
 257 # ``'..admonition::'``.
 258 #
 259 # Default is a comment marker::
 260
 261 defaults.header_string = '..'
 262
 263
 264 # .. _code_block_marker:
 265 #
 266 # code_block_markers
 267 # ------------------
 268 #
 269 # Markup at the end of a documentation block.
 270 # Default is Docutils' marker for a `literal block`_::
 271
 272 defaults.code_block_markers = DefaultDict()
 273 defaults.code_block_markers.default = '::'
 274
 275 # The `code_block_marker` string is `inserted into a regular expression`_.
 276 # Language-specific markers can be defined programmatically, e.g. in a
 277 # wrapper script.
 278 #
 279 # In a document where code examples are only one of several uses of
 280 # literal blocks, it is more appropriate to single out the source code
 281 # ,e.g. with the double colon at a separate line ("expanded form")
 282 #
 283 #   ``defaults.code_block_marker.default = ':: *'``
 284 #
 285 # or a dedicated ``.. code-block::`` directive [#]_
 286 #
 287 #   ``defaults.code_block_marker['c++'] = '.. code-block:: *c++'``
 288 #
 289 # The latter form also allows code in different languages kept together
 290 # in one literate source file.
 291 #
 292 # .. [#] The ``.. code-block::`` directive is not (yet) supported by
 293 #    standard Docutils.  It is provided by several add-ons, including
 294 #    the `code-block directive`_ project in the Docutils Sandbox and
 295 #    Sphinx_.
 296 #
 297 #
 298 # strip
 299 # -----
 300 #
 301 # Export to the output format stripping documentation or code blocks::
 302
 303 defaults.strip = False
 304
 305 # strip_marker
 306 # ------------
 307 #
 308 # Strip literal marker from the end of documentation blocks when
 309 # converting  to code format. Makes the code more concise but looses the
 310 # synchronisation of line numbers in text and code formats. Can also be used
 311 # (together with the auto-completion of the code-text conversion) to change
 312 # the `code_block_marker`::
 313
 314 defaults.strip_marker = False
 315
 316 # add_missing_marker
 317 # ------------------
 318 #
 319 # When converting from code format to text format, add a `code_block_marker`
 320 # at the end of documentation blocks if it is missing::
 321
 322 defaults.add_missing_marker = True
 323
 324 # Keep this at ``True``, if you want to re-convert to code format later!
 325 #
 326 #
 327 # .. _defaults.preprocessors:
 328 #
 329 # preprocessors
 330 # -------------
 331 #
 332 # Preprocess the data with language-specific filters_
 333 # Set below in Filters_::
 334
 335 defaults.preprocessors = {}
 336
 337 # .. _defaults.postprocessors:
 338 #
 339 # postprocessors
 340 # --------------
 341 #
 342 # Postprocess the data with language-specific filters_::
 343
 344 defaults.postprocessors = {}
 345
 346 # .. _defaults.codeindent:
 347 #
 348 # codeindent
 349 # ----------
 350 #
 351 # Number of spaces to indent code blocks in `Code2Text.code_block_handler`_::
 352
 353 defaults.codeindent = 2
 354
 355 # In `Text2Code.code_block_handler`_, the codeindent is determined by the
 356 # first recognised code line (header or first indented literal block
 357 # of the text source).
 358 #
 359 # overwrite
 360 # ---------
 361 #
 362 # What to do if the outfile already exists? (ignored if `outfile` == '-')::
 363
 364 defaults.overwrite = 'update'
 365
 366 # Recognised values:
 367 #
 368 #  :'yes':    overwrite eventually existing `outfile`,
 369 #  :'update': fail if the `outfile` is newer than `infile`,
 370 #  :'no':     fail if `outfile` exists.
 371 #
 372 #
 373 # Extensions
 374 # ==========
 375 #
 376 # Try to import optional extensions::
 377
 378 try:
 379     import pylit_elisp
 380 except ImportError:
 381     pass
 382
 383
 384 # Converter Classes
 385 # =================
 386 #
 387 # The converter classes implement a simple state machine to separate and
 388 # transform documentation and code blocks. For this task, only a very limited
 389 # parsing is needed. PyLit's parser assumes:
 390 #
 391 # * `indented literal blocks`_ in a text source are code blocks.
 392 #
 393 # * comment blocks in a code source where every line starts with a matching
 394 #   comment string are documentation blocks.
 395 #
 396 # TextCodeConverter
 397 # -----------------
 398 # ::
 399
 400 class TextCodeConverter(object):
 401     """Parent class for the converters `Text2Code` and `Code2Text`.
 402     """
 403
 404 # The parent class defines data attributes and functions used in both
 405 # `Text2Code`_ converting a text source to executable code source, and
 406 # `Code2Text`_ converting commented code to a text source.
 407 #
 408 # Data attributes
 409 # ~~~~~~~~~~~~~~~
 410 #
 411 # Class default values are fetched from the `defaults`_ object and can be
 412 # overridden by matching keyword arguments during class instantiation. This
 413 # also works with keyword arguments to `get_converter`_ and `main`_, as these
 414 # functions pass on unused keyword args to the instantiation of a converter
 415 # class. ::
 416
 417     language = defaults.languages[None]
 418     comment_strings = defaults.comment_strings
 419     comment_string = "" # set in __init__ (if empty)
 420     codeindent =  defaults.codeindent
 421     header_string = defaults.header_string
 422     code_block_markers = defaults.code_block_markers
 423     code_block_marker = "" # set in __init__ (if empty)
 424     strip = defaults.strip
 425     strip_marker = defaults.strip_marker
 426     add_missing_marker = defaults.add_missing_marker
 427     directive_option_regexp = re.compile(r' +:(\w|[-._+:])+:( |$)')
 428     state = "" # type of current block, see `TextCodeConverter.convert`_
 429
 430 # Interface methods
 431 # ~~~~~~~~~~~~~~~~~
 432 #
 433 # .. _TextCodeConverter.__init__:
 434 #
 435 # __init__
 436 # """"""""
 437 #
 438 # Initialising sets the `data` attribute, an iterable object yielding lines of
 439 # the source to convert. [#]_
 440 #
 441 # .. [#] The most common choice of data is a `file` object with the text
 442 #        or code source.
 443 #
 444 #        To convert a string into a suitable object, use its splitlines()
 445 #        method like ``"2 lines\nof source".splitlines(True)``.
 446 #
 447 #
 448 # Additional keyword arguments are stored as instance variables,
 449 # overwriting the class defaults::
 450
 451     def __init__(self, data, **keyw):
 452         """data   --  iterable data object
 453                       (list, file, generator, string, ...)
 454            **keyw --  remaining keyword arguments are
 455                       stored as data-attributes
 456         """
 457         self.data = data
 458         self.__dict__.update(keyw)
 459
 460 # If empty, `code_block_marker` and `comment_string` are set according
 461 # to the `language`::
 462
 463         if not self.code_block_marker:
 464             self.code_block_marker = self.code_block_markers[self.language]
 465         if not self.comment_string:
 466             self.comment_string = self.comment_strings[self.language]
 467         self.stripped_comment_string = self.comment_string.rstrip()
 468
 469 # Pre- and postprocessing filters are set (with
 470 # `TextCodeConverter.get_filter`_)::
 471
 472         self.preprocessor = self.get_filter("preprocessors", self.language)
 473         self.postprocessor = self.get_filter("postprocessors", self.language)
 474
 475 # .. _inserted into a regular expression:
 476 #
 477 # Finally, a regular_expression for the `code_block_marker` is compiled
 478 # to find valid cases of `code_block_marker` in a given line and return
 479 # the groups: ``\1 prefix, \2 code_block_marker, \3 remainder`` ::
 480
 481         marker = self.code_block_marker
 482         if marker == '::':
 483             # the default marker may occur at the end of a text line
 484             self.marker_regexp = re.compile('^( *(?!\.\.).*)(::)([ \n]*)$')
 485         else:
 486             # marker must be on a separate line
 487             self.marker_regexp = re.compile('^( *)(%s)(.*\n?)$' % marker)
 488
 489 # .. _TextCodeConverter.__iter__:
 490 #
 491 # __iter__
 492 # """"""""
 493 #
 494 # Return an iterator for the instance. Iteration yields lines of converted
 495 # data.
 496 #
 497 # The iterator is a chain of iterators acting on `self.data` that does
 498 #
 499 # * preprocessing
 500 # * text<->code format conversion
 501 # * postprocessing
 502 #
 503 # Pre- and postprocessing are only performed, if filters for the current
 504 # language are registered in `defaults.preprocessors`_ and|or
 505 # `defaults.postprocessors`_. The filters must accept an iterable as first
 506 # argument and yield the processed input data line-wise.
 507 # ::
 508
 509     def __iter__(self):
 510         """Iterate over input data source and yield converted lines
 511         """
 512         return self.postprocessor(self.convert(self.preprocessor(self.data)))
 513
 514
 515 # .. _TextCodeConverter.__call__:
 516 #
 517 # __call__
 518 # """"""""
 519 # The special `__call__` method allows the use of class instances as callable
 520 # objects. It returns the converted data as list of lines::
 521
 522     def __call__(self):
 523         """Iterate over state-machine and return results as list of lines"""
 524         return [line for line in self]
 525
 526
 527 # .. _TextCodeConverter.__str__:
 528 #
 529 # __str__
 530 # """""""
 531 # Return converted data as string::
 532
 533     def __str__(self):
 534         return "".join(self())
 535
 536
 537 # Helpers and convenience methods
 538 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 539 #
 540 # .. _TextCodeConverter.convert:
 541 #
 542 # convert
 543 # """""""
 544 #
 545 # The `convert` method generates an iterator that does the actual  code <-->
 546 # text format conversion. The converted data is yielded line-wise and the
 547 # instance's `status` argument indicates whether the current line is "header",
 548 # "documentation", or "code_block"::
 549
 550     def convert(self, lines):
 551         """Iterate over lines of a program document and convert
 552         between "text" and "code" format
 553         """
 554
 555 # Initialise internal data arguments. (Done here, so that every new iteration
 556 # re-initialises them.)
 557 #
 558 # `state`
 559 #   the "type" of the currently processed block of lines. One of
 560 #
 561 #   :"":              initial state: check for header,
 562 #   :"header":        leading code block: strip `header_string`,
 563 #   :"documentation": documentation part: comment out,
 564 #   :"code_block":    literal blocks containing source code: unindent.
 565 #
 566 # ::
 567
 568         self.state = ""
 569
 570 # `_codeindent`
 571 #   * Do not confuse the internal attribute `_codeindent` with the configurable
 572 #     `codeindent` (without the leading underscore).
 573 #   * `_codeindent` is set in `Text2Code.code_block_handler`_ to the indent of
 574 #     first non-blank "code_block" line and stripped from all "code_block" lines
 575 #     in the text-to-code conversion,
 576 #   * `codeindent` is set in `__init__` to `defaults.codeindent`_ and added to
 577 #     "code_block" lines in the code-to-text conversion.
 578 #
 579 # ::
 580
 581         self._codeindent = 0
 582
 583 # `_textindent`
 584 #   * set by `Text2Code.documentation_handler`_ to the minimal indent of a
 585 #     documentation block,
 586 #   * used in `Text2Code.set_state`_ to find the end of a code block.
 587 #
 588 # ::
 589
 590         self._textindent = 0
 591
 592 # `_add_code_block_marker`
 593 #   If the last paragraph of a documentation block does not end with a
 594 #   code_block_marker_, it should be added (otherwise, the back-conversion
 595 #   fails.).
 596 #
 597 #   `_add_code_block_marker` is set by `Code2Text.documentation_handler`_
 598 #   and evaluated by `Code2Text.code_block_handler`_, because the
 599 #   documentation_handler does not know whether the next block will be
 600 #   documentation (with no need for a code_block_marker) or a code block.
 601 #
 602 # ::
 603
 604         self._add_code_block_marker = False
 605
 606
 607
 608 # Determine the state of the block and convert with the matching "handler"::
 609
 610         for block in collect_blocks(expandtabs_filter(lines)):
 611             try:
 612                 self.set_state(block)
 613             except StopIteration:
 614                 return
 615             for line in getattr(self, self.state+"_handler")(block):
 616                 yield line
 617
 618
 619 # .. _TextCodeConverter.get_filter:
 620 #
 621 # get_filter
 622 # """"""""""
 623 # ::
 624
 625     def get_filter(self, filter_set, language):
 626         """Return language specific filter"""
 627         if self.__class__ == Text2Code:
 628             key = "text2"+language
 629         elif self.__class__ == Code2Text:
 630             key = language+"2text"
 631         else:
 632             key = ""
 633         try:
 634             return getattr(defaults, filter_set)[key]
 635         except (AttributeError, KeyError, TypeError):
 636             # print("there is no %r filter in %r"%(key, filter_set))
 637             pass
 638         return identity_filter
 639
 640
 641 # get_indent
 642 # """"""""""
 643 # Return the number of leading spaces in `line`::
 644
 645     def get_indent(self, line):
 646         """Return the indentation of `string`.
 647         """
 648         return len(line) - len(line.lstrip())
 649
 650
 651 # Text2Code
 652 # ---------
 653 #
 654 # The `Text2Code` converter separates *code-blocks* [#]_ from *documentation*.
 655 # Code blocks are unindented, documentation is commented (or filtered, if the
 656 # ``strip`` option is True).
 657 #
 658 # .. [#] Only `indented literal blocks`_ are considered code-blocks. `quoted
 659 #        literal blocks`_, `parsed-literal blocks`_, and `doctest blocks`_ are
 660 #        treated as part of the documentation. This allows the inclusion of
 661 #        examples:
 662 #
 663 #           >>> 23 + 3
 664 #           26
 665 #
 666 #        Mark that there is no double colon before the doctest block in the
 667 #        text source.
 668 #
 669 # The class inherits the interface and helper functions from
 670 # TextCodeConverter_ and adds functions specific to the text-to-code format
 671 # conversion::
 672
 673 class Text2Code(TextCodeConverter):
 674     """Convert a (reStructured) text source to code source
 675     """
 676
 677 # .. _Text2Code.set_state:
 678 #
 679 # set_state
 680 # ~~~~~~~~~
 681 # ::
 682
 683     def set_state(self, block):
 684         """Determine state of `block`. Set `self.state`
 685         """
 686
 687 # `set_state` is used inside an iteration. Hence, if we are out of data, a
 688 # StopItertion exception should be raised::
 689
 690         if not block:
 691             raise StopIteration
 692
 693 # The new state depends on the active state (from the last block) and
 694 # features of the current block. It is either "header", "documentation", or
 695 # "code_block".
 696 #
 697 # If the current state is "" (first block), check for
 698 # the  `header_string` indicating a leading code block::
 699
 700         if self.state == "":
 701             # print("set state for %r"%block)
 702             if block[0].startswith(self.header_string):
 703                 self.state = "header"
 704             else:
 705                 self.state = "documentation"
 706
 707 # If the current state is "documentation", the next block is also
 708 # documentation. The end of a documentation part is detected in the
 709 # `Text2Code.documentation_handler`_::
 710
 711         # elif self.state == "documentation":
 712         #    self.state = "documentation"
 713
 714 # A "code_block" ends with the first less indented, non-blank line.
 715 # `_textindent` is set by the documentation handler to the indent of the
 716 # preceding documentation block::
 717
 718         elif self.state in ["code_block", "header"]:
 719             indents = [self.get_indent(line) for line in block
 720                        if line.rstrip()]
 721             # print("set_state:", indents, self._textindent)
 722             if indents and min(indents) <= self._textindent:
 723                 self.state = 'documentation'
 724             else:
 725                 self.state = 'code_block'
 726
 727 # TODO: (or not to do?) insert blank line before the first line with too-small
 728 # codeindent using self.ensure_trailing_blank_line(lines, line) (would need
 729 # split and push-back of the documentation part)?
 730 #
 731 # .. _Text2Code.header_handler:
 732 #
 733 # header_handler
 734 # ~~~~~~~~~~~~~~
 735 #
 736 # Sometimes code needs to remain on the first line(s) of the document to be
 737 # valid. The most common example is the "shebang" line that tells a POSIX
 738 # shell how to process an executable file::
 739
 740 #!/usr/bin/env python
 741
 742 # In Python, the special comment to indicate the encoding, e.g.
 743 # ``# -*- coding: iso-8859-1 -*-``, must occur before any other comment
 744 # or code too.
 745 #
 746 # If we want to keep the line numbers in sync for text and code source, the
 747 # reStructured Text markup for these header lines must start at the same line
 748 # as the first header line. Therefore, header lines could not be marked as
 749 # literal block (this would require the ``::`` and an empty line above the
 750 # code_block).
 751 #
 752 # OTOH, a comment may start at the same line as the comment marker and it
 753 # includes subsequent indented lines. Comments are visible in the reStructured
 754 # Text source but hidden in the pretty-printed output.
 755 #
 756 # With a header converted to comment in the text source, everything before
 757 # the first documentation block (i.e. before the first paragraph using the
 758 # matching comment string) will be hidden away (in HTML or PDF output).
 759 #
 760 # This seems a good compromise, the advantages
 761 #
 762 # * line numbers are kept
 763 # * the "normal" code_block conversion rules (indent/unindent by `codeindent` apply
 764 # * greater flexibility: you can hide a repeating header in a project
 765 #   consisting of many source files.
 766 #
 767 # set off the disadvantages
 768 #
 769 # - it may come as surprise if a part of the file is not "printed",
 770 # - one more syntax element to learn for rst newbies to start with pylit,
 771 #   (however, starting from the code source, this will be auto-generated)
 772 #
 773 # In the case that there is no matching comment at all, the complete code
 774 # source will become a comment -- however, in this case it is not very likely
 775 # the source is a literate document anyway.
 776 #
 777 # If needed for the documentation, it is possible to quote the header in (or
 778 # after) the first documentation block, e.g. as `parsed literal`.
 779 # ::
 780
 781     def header_handler(self, lines):
 782         """Format leading code block"""
 783         # strip header string from first line
 784         lines[0] = lines[0].replace(self.header_string, "", 1)
 785         # yield remaining lines formatted as code-block
 786         for line in self.code_block_handler(lines):
 787             yield line
 788
 789
 790 # .. _Text2Code.documentation_handler:
 791 #
 792 # documentation_handler
 793 # ~~~~~~~~~~~~~~~~~~~~~
 794 #
 795 # The 'documentation' handler processes everything that is not recognised as
 796 # "code_block". Documentation is quoted with `self.comment_string`
 797 # (or filtered with `--strip=True`).
 798 #
 799 # If end-of-documentation marker is detected,
 800 #
 801 # * set state to 'code_block'
 802 # * set `self._textindent` (needed by `Text2Code.set_state`_ to find the
 803 #   next "documentation" block)
 804 #
 805 # ::
 806
 807     def documentation_handler(self, lines):
 808         """Convert documentation blocks from text to code format
 809         """
 810         for line in lines:
 811             # test lines following the code-block marker for false positives
 812             if (self.state == "code_block" and line.rstrip()
 813                 and not self.directive_option_regexp.search(line)):
 814                 self.state = "documentation"
 815             # test for end of documentation block
 816             if self.marker_regexp.search(line):
 817                 self.state = "code_block"
 818                 self._textindent = self.get_indent(line)
 819             # yield lines
 820             if self.strip:
 821                 continue
 822             # do not comment blank lines preceding a code block
 823             if line.rstrip():
 824                 yield self.comment_string + line
 825             else:
 826                 if self.state == "code_block":
 827                     yield line
 828                 else:
 829                     yield self.comment_string.rstrip() + line
 830
 831
 832
 833 # .. _Text2Code.code_block_handler:
 834 #
 835 # code_block_handler
 836 # ~~~~~~~~~~~~~~~~~~
 837 #
 838 # The "code_block" handler is called with an indented literal block. It
 839 # removes leading whitespace up to the indentation of the first code line in
 840 # the file (this deviation from Docutils behaviour allows indented blocks of
 841 # Python code). ::
 842
 843     def code_block_handler(self, block):
 844         """Convert indented literal blocks to source code format
 845         """
 846
 847 # If still unset, determine the indentation of code blocks from first non-blank
 848 # code line::
 849
 850         if self._codeindent == 0:
 851             self._codeindent = self.get_indent(block[0])
 852
 853 # Yield unindented lines after check whether we can safely unindent. If the
 854 # line is less indented then `_codeindent`, something got wrong. ::
 855
 856         for line in block:
 857             if line.lstrip() and self.get_indent(line) < self._codeindent:
 858                 raise ValueError("code block contains line less indented "
 859                             "than %d spaces \n%r"%(self._codeindent, block))
 860             yield line.replace(" "*self._codeindent, "", 1)
 861
 862
 863 # Code2Text
 864 # ---------
 865 #
 866 # The `Code2Text` converter does the opposite of `Text2Code`_ -- it processes
 867 # a source in "code format" (i.e. in a programming language), extracts
 868 # documentation from comment blocks, and puts program code in literal blocks.
 869 #
 870 # The class inherits the interface and helper functions from
 871 # TextCodeConverter_ and adds functions specific to the text-to-code  format
 872 # conversion::
 873
 874 class Code2Text(TextCodeConverter):
 875     """Convert code source to text source
 876     """
 877
 878 # set_state
 879 # ~~~~~~~~~
 880 #
 881 # Check if block is "header", "documentation", or "code_block":
 882 #
 883 # A paragraph is "documentation", if every non-blank line starts with a
 884 # matching comment string (including whitespace except for commented blank
 885 # lines) ::
 886
 887     def set_state(self, block):
 888         """Determine state of `block`."""
 889         for line in block:
 890             # skip documentation lines (commented, blank or blank comment)
 891             if (line.startswith(self.comment_string)
 892                 or not line.rstrip()
 893                 or line.rstrip() == self.comment_string.rstrip()
 894                ):
 895                 continue
 896             # non-commented line found:
 897             if self.state == "":
 898                 self.state = "header"
 899             else:
 900                 self.state = "code_block"
 901             break
 902         else:
 903             # no code line found
 904             # keep state if the block is just a blank line
 905             # if len(block) == 1 and self._is_blank_codeline(line):
 906             #     return
 907             self.state = "documentation"
 908
 909
 910 # header_handler
 911 # ~~~~~~~~~~~~~~
 912 #
 913 # Handle a leading code block. (See `Text2Code.header_handler`_ for a
 914 # discussion of the "header" state.) ::
 915
 916     def header_handler(self, lines):
 917         """Format leading code block"""
 918         if self.strip == True:
 919             return
 920         # get iterator over the lines that formats them as code-block
 921         lines = iter(self.code_block_handler(lines))
 922         # prepend header string to first line
 923         yield self.header_string + next(lines)
 924         # yield remaining lines
 925         for line in lines:
 926             yield line
 927
 928 # .. _Code2Text.documentation_handler:
 929 #
 930 # documentation_handler
 931 # ~~~~~~~~~~~~~~~~~~~~~
 932 #
 933 # The *documentation state* handler converts a comment to a documentation
 934 # block by stripping the leading `comment string` from every line::
 935
 936     def documentation_handler(self, block):
 937         """Uncomment documentation blocks in source code
 938         """
 939
 940 # Strip comment strings::
 941
 942         lines = [self.uncomment_line(line) for line in block]
 943
 944 # If the code block is stripped, the literal marker would lead to an
 945 # error when the text is converted with Docutils. Strip it as well. ::
 946
 947         if self.strip or self.strip_marker:
 948             self.strip_code_block_marker(lines)
 949
 950 # Otherwise, check for the `code_block_marker`_ at the end of the
 951 # documentation block (skipping directive options that might follow it)::
 952
 953         elif self.add_missing_marker:
 954             for line in lines[::-1]:
 955                 if self.marker_regexp.search(line):
 956                     self._add_code_block_marker = False
 957                     break
 958                 if (line.rstrip() and
 959                     not self.directive_option_regexp.search(line)):
 960                     self._add_code_block_marker = True
 961                     break
 962             else:
 963                 self._add_code_block_marker = True
 964
 965 # Yield lines::
 966
 967         for line in lines:
 968             yield line
 969
 970 # uncomment_line
 971 # ~~~~~~~~~~~~~~
 972 #
 973 # Return documentation line after stripping comment string. Consider the
 974 # case that a blank line has a comment string without trailing whitespace::
 975
 976     def uncomment_line(self, line):
 977         """Return uncommented documentation line"""
 978         line = line.replace(self.comment_string, "", 1)
 979         if line.rstrip() == self.stripped_comment_string:
 980             line = line.replace(self.stripped_comment_string, "", 1)
 981         return line
 982
 983 # .. _Code2Text.code_block_handler:
 984 #
 985 # code_block_handler
 986 # ~~~~~~~~~~~~~~~~~~
 987 #
 988 # The `code_block` handler returns the code block as indented literal
 989 # block (or filters it, if ``self.strip == True``). The amount of the code
 990 # indentation is controlled by `self.codeindent` (default 2).  ::
 991
 992     def code_block_handler(self, lines):
 993         """Covert code blocks to text format (indent or strip)
 994         """
 995         if self.strip == True:
 996             return
 997         # eventually insert transition marker
 998         if self._add_code_block_marker:
 999             self.state = "documentation"
1000             yield self.code_block_marker + "\n"
1001             yield "\n"
1002             self._add_code_block_marker = False
1003             self.state = "code_block"
1004         for line in lines:
1005             yield " "*self.codeindent + line
1006
1007
1008
1009 # strip_code_block_marker
1010 # ~~~~~~~~~~~~~~~~~~~~~~~
1011 #
1012 # Replace the literal marker with the equivalent of Docutils replace rules
1013 #
1014 # * strip ``::``-line (and preceding blank line) if on a line on its own
1015 # * strip ``::`` if it is preceded by whitespace.
1016 # * convert ``::`` to a single colon if preceded by text
1017 #
1018 # `lines` is a list of documentation lines (with a trailing blank line).
1019 # It is modified in-place::
1020
1021     def strip_code_block_marker(self, lines):
1022         try:
1023             line = lines[-2]
1024         except IndexError:
1025             return # just one line (no trailing blank line)
1026
1027         # match with regexp: `match` is None or has groups
1028         # \1 leading text, \2 code_block_marker, \3 remainder
1029         match = self.marker_regexp.search(line)
1030
1031         if not match:                 # no code_block_marker present
1032             return
1033         if not match.group(1):        # `code_block_marker` on an extra line
1034             del(lines[-2])
1035             # delete preceding line if it is blank
1036             if len(lines) >= 2 and not lines[-2].lstrip():
1037                 del(lines[-2])
1038         elif match.group(1).rstrip() < match.group(1):
1039             # '::' follows whitespace
1040             lines[-2] = match.group(1).rstrip() + match.group(3)
1041         else:                         # '::' follows text
1042             lines[-2] = match.group(1).rstrip() + ':' + match.group(3)
1043
1044 # Filters
1045 # =======
1046 #
1047 # Filters allow pre- and post-processing of the data to bring it in a format
1048 # suitable for the "normal" text<->code conversion. An example is conversion
1049 # of `C` ``/*`` ``*/`` comments into C++ ``//`` comments (and back).
1050 # Another example is the conversion of `C` ``/*`` ``*/`` comments into C++
1051 # ``//`` comments (and back).
1052 #
1053 # Filters are generator functions that return an iterator acting on a
1054 # `data` iterable and yielding processed `data` lines.
1055 #
1056 # identity_filter
1057 # ---------------
1058 #
1059 # The most basic filter is the identity filter, that returns its argument as
1060 # iterator::
1061
1062 def identity_filter(data):
1063     """Return data iterator without any processing"""
1064     return iter(data)
1065
1066 # expandtabs_filter
1067 # -----------------
1068 #
1069 # Expand hard-tabs in every line of `data` (cf. `str.expandtabs`).
1070 #
1071 # This filter is applied to the input data by `TextCodeConverter.convert`_ as
1072 # hard tabs can lead to errors when the indentation is changed. ::
1073
1074 def expandtabs_filter(data):
1075     """Yield data tokens with hard-tabs expanded"""
1076     for line in data:
1077         yield line.expandtabs()
1078
1079
1080 # collect_blocks
1081 # --------------
1082 #
1083 # A filter to aggregate "paragraphs" (blocks separated by blank
1084 # lines). Yields lists of lines::
1085
1086 def collect_blocks(lines):
1087     """collect lines in a list
1088
1089     yield list for each paragraph, i.e. block of lines separated by a
1090     blank line (whitespace only).
1091
1092     Trailing blank lines are collected as well.
1093     """
1094     blank_line_reached = False
1095     block = []
1096     for line in lines:
1097         if blank_line_reached and line.rstrip():
1098             yield block
1099             blank_line_reached = False
1100             block = [line]
1101             continue
1102         if not line.rstrip():
1103             blank_line_reached = True
1104         block.append(line)
1105     yield block
1106
1107
1108
1109 # dumb_c_preprocessor
1110 # -------------------
1111 #
1112 # This is a basic filter to convert `C` to `C++` comments. Works line-wise and
1113 # only converts lines that
1114 #
1115 # * start with "/\* " and end with " \*/" (followed by whitespace only)
1116 #
1117 # A more sophisticated version would also
1118 #
1119 # * convert multi-line comments
1120 #
1121 #   + Keep indentation or strip 3 leading spaces?
1122 #
1123 # * account for nested comments
1124 #
1125 # * only convert comments that are separated from code by a blank line
1126 #
1127 # ::
1128
1129 def dumb_c_preprocessor(data):
1130     """change `C` ``/* `` `` */`` comments into C++ ``// `` comments"""
1131     comment_string = defaults.comment_strings["c++"]
1132     boc_string = "/* "
1133     eoc_string = " */"
1134     for line in data:
1135         if (line.startswith(boc_string)
1136             and line.rstrip().endswith(eoc_string)
1137            ):
1138             line = line.replace(boc_string, comment_string, 1)
1139             line = "".join(line.rsplit(eoc_string, 1))
1140         yield line
1141
1142 # Unfortunately, the `replace` method of strings does not support negative
1143 # numbers for the `count` argument:
1144 #
1145 #   >>> "foo */ baz */ bar".replace(" */", "", -1) == "foo */ baz bar"
1146 #   False
1147 #
1148 # However, there is the `rsplit` method, that can be used together with `join`:
1149 #
1150 #   >>> "".join("foo */ baz */ bar".rsplit(" */", 1)) == "foo */ baz bar"
1151 #   True
1152 #
1153 # dumb_c_postprocessor
1154 # --------------------
1155 #
1156 # Undo the preparations by the dumb_c_preprocessor and re-insert valid comment
1157 # delimiters ::
1158
1159 def dumb_c_postprocessor(data):
1160     """change C++ ``// `` comments into `C` ``/* `` `` */`` comments"""
1161     comment_string = defaults.comment_strings["c++"]
1162     boc_string = "/* "
1163     eoc_string = " */"
1164     for line in data:
1165         if line.rstrip() == comment_string.rstrip():
1166             line = line.replace(comment_string, "", 1)
1167         elif line.startswith(comment_string):
1168             line = line.replace(comment_string, boc_string, 1)
1169             line = line.rstrip() + eoc_string + "\n"
1170         yield line
1171
1172
1173 # register filters
1174 # ----------------
1175 #
1176 # ::
1177
1178 defaults.preprocessors['c2text'] = dumb_c_preprocessor
1179 defaults.preprocessors['css2text'] = dumb_c_preprocessor
1180 defaults.postprocessors['text2c'] = dumb_c_postprocessor
1181 defaults.postprocessors['text2css'] = dumb_c_postprocessor
1182
1183
1184 # Command line use
1185 # ================
1186 #
1187 # Using this script from the command line will convert a file according to its
1188 # extension. This default can be overridden by a couple of options.
1189 #
1190 # Dual source handling
1191 # --------------------
1192 #
1193 # How to determine which source is up-to-date?
1194 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1195 #
1196 # - set modification date of `outfile` to the one of `infile`
1197 #
1198 #   Points out that the source files are 'synchronised'.
1199 #
1200 #   * Are there problems to expect from "backdating" a file? Which?
1201 #
1202 #     Looking at http://www.unix.com/showthread.php?t=20526, it seems
1203 #     perfectly legal to set `mtime` (while leaving `ctime`) as `mtime` is a
1204 #     description of the "actuality" of the data in the file.
1205 #
1206 #   * Should this become a default or an option?
1207 #
1208 # - alternatively move input file to a backup copy (with option: `--replace`)
1209 #
1210 # - check modification date before overwriting
1211 #   (with option: `--overwrite=update`)
1212 #
1213 # - check modification date before editing (implemented as `Jed editor`_
1214 #   function `pylit_check()` in `pylit.sl`_)
1215 #
1216 # .. _Jed editor: http://www.jedsoft.org/jed/
1217 # .. _pylit.sl: http://jedmodes.sourceforge.net/mode/pylit/
1218 #
1219 # Recognised Filename Extensions
1220 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1221 #
1222 # Instead of defining a new extension for "pylit" literate programs,
1223 # by default ``.txt`` will be appended for the text source and stripped by
1224 # the conversion to the code source. I.e. for a Python program foo:
1225 #
1226 # * the code source is called ``foo.py``
1227 # * the text source is called ``foo.py.txt``
1228 # * the html rendering is called ``foo.py.html``
1229 #
1230 #
1231 # OptionValues
1232 # ------------
1233 #
1234 # The following class adds `complete`_ and `__getattr__`_
1235 # methods to `optparse.Values`::
1236
1237 class OptionValues(optparse.Values):
1238
1239 # .. _OptionValues.complete:
1240 #
1241 # complete
1242 # ~~~~~~~~
1243 #
1244 # ::
1245
1246     def complete(self, **keyw):
1247         """
1248         Complete the data attributes from keyword arguments.
1249
1250         Do not overwrite existing attributes.
1251         Drop keyword arguments that correspond to data attributes in `self`.
1252         """
1253         for key, value in keyw.items():
1254             try:
1255                 self.__dict__[key]
1256             except KeyError:
1257                 setattr(self, key, value)
1258
1259 # .. _OptionValues.__getattr__:
1260 #
1261 # __getattr__
1262 # ~~~~~~~~~~~
1263 #
1264 # To replace calls using ``<instance>.ensure_value("OPTION", None)`` with the
1265 # more concise ``<instance>.OPTION``, we define `__getattr__` [#]_ ::
1266
1267     def __getattr__(self, name):
1268         """Return default value for non existing options"""
1269         return None
1270
1271
1272 # .. [#] The special method `__getattr__` is only called when an attribute
1273 #        look-up has not found the attribute in the usual places (i.e. it is
1274 #        not an instance attribute nor is it found in the class tree for
1275 #        self).
1276 #
1277 #
1278 # PylitOptions
1279 # ------------
1280 #
1281 # The `PylitOptions` class comprises an option parser and methods for parsing
1282 # and completion of command line options::
1283
1284 class PylitOptions(object):
1285     """Storage and handling of command line options for pylit"""
1286
1287 # Instantiation
1288 # ~~~~~~~~~~~~~
1289 #
1290 # ::
1291
1292     def __init__(self):
1293         """Set up an `OptionParser` instance for pylit command line options
1294
1295         """
1296         p = optparse.OptionParser(usage=main.__doc__, version=__version__)
1297
1298         # Conversion settings
1299
1300         p.add_option("-c", "--code2txt", dest="txt2code", action="store_false",
1301                      help="convert code source to text source")
1302         p.add_option("-t", "--txt2code", action="store_true",
1303                      help="convert text source to code source")
1304         p.add_option("--language",
1305                      choices = list(defaults.comment_strings.keys()),
1306                      help="use LANGUAGE native comment style")
1307         p.add_option("--comment-string", dest="comment_string",
1308                      help="documentation block marker in code source "
1309                      "(including trailing whitespace, "
1310                      "default: language dependent)")
1311         p.add_option("-m", "--code-block-marker", dest="code_block_marker",
1312                      help="syntax token starting a code block. (default '::')")
1313         p.add_option("--codeindent", type="int",
1314                      help="Number of spaces to indent code blocks with "
1315                      "code2text (default %d)" % defaults.codeindent)
1316
1317         # Output file handling
1318
1319         p.add_option("--overwrite", action="store",
1320                      choices = ["yes", "update", "no"],
1321                      help="overwrite output file (default 'update')")
1322         p.add_option("--replace", action="store_true",
1323                      help="move infile to a backup copy (appending '~')")
1324         # TODO: do we need this? If yes, make mtime update depend on it!
1325         # p.add_option("--keep-mtime", action="store_true",
1326         #              help="do not set the modification time of the outfile "
1327         #              "to the corresponding value of the infile")
1328         p.add_option("-s", "--strip", action="store_true",
1329                      help='"export" by stripping documentation or code')
1330
1331         # Special actions
1332
1333         p.add_option("-d", "--diff", action="store_true",
1334                      help="test for differences to existing file")
1335         p.add_option("--doctest", action="store_true",
1336                      help="run doctest.testfile() on the text version")
1337         p.add_option("-e", "--execute", action="store_true",
1338                      help="execute code (Python only)")
1339
1340         self.parser = p
1341
1342 # .. _PylitOptions.parse_args:
1343 #
1344 # parse_args
1345 # ~~~~~~~~~~
1346 #
1347 # The `parse_args` method calls the `optparse.OptionParser` on command
1348 # line or provided args and returns the result as `PylitOptions.Values`
1349 # instance. Defaults can be provided as keyword arguments::
1350
1351     def parse_args(self, args=sys.argv[1:], values=None):
1352         """Parse command line arguments using `optparse.OptionParser`.
1353
1354            parse_args(args, **keyw) -> OptionValues instance
1355
1356            args --  list of command line arguments.
1357            values -- object to store the option's values
1358         """
1359         # parse arguments
1360         (values, args) = self.parser.parse_args(args, values)
1361         # Convert FILE and OUTFILE positional args to option values
1362         # (other positional arguments are ignored)
1363         try:
1364             values.infile = args[0]
1365             values.outfile = args[1]
1366         except IndexError:
1367             pass
1368
1369         return values
1370
1371 # .. _PylitOptions.complete_values:
1372 #
1373 # complete_values
1374 # ~~~~~~~~~~~~~~~
1375 #
1376 # Complete an OptionValues instance `values`.  Use module-level defaults and
1377 # context information to set missing option values to sensible defaults (if
1378 # possible) ::
1379
1380     def complete_values(self, values):
1381         """complete option values with module and context sensible defaults
1382
1383         x.complete_values(values) -> values
1384         values -- OptionValues instance
1385         """
1386
1387 # Complete with module-level defaults_::
1388
1389         values.complete(**vars(defaults).copy())
1390
1391 # Ensure infile is a string::
1392
1393         values.ensure_value("infile", "")
1394
1395 # Guess conversion direction from `infile` filename::
1396
1397         if getattr(values, 'txt2code', None) is None:
1398             in_extension = os.path.splitext(values.infile)[1]
1399             if in_extension in values.text_extensions:
1400                 print('text extension %r found' % in_extension)
1401                 values.txt2code = True
1402             elif in_extension in values.languages.keys():
1403                 values.txt2code = False
1404             else:
1405                 values.txt2code = None
1406
1407 # Auto-determine the output file name::
1408
1409         values.ensure_value("outfile", self._get_outfile_name(values))
1410
1411 # Second try: Guess conversion direction from outfile filename::
1412
1413         if values.txt2code is None:
1414             out_extension = os.path.splitext(values.outfile)[1]
1415             values.txt2code = not (out_extension in values.text_extensions)
1416
1417 # Set the language of the code::
1418
1419         if values.txt2code is True:
1420             code_extension = os.path.splitext(values.outfile)[1]
1421         elif values.txt2code is False:
1422             code_extension = os.path.splitext(values.infile)[1]
1423         values.ensure_value("language", values.languages[code_extension])
1424
1425         return values
1426
1427 # _get_outfile_name
1428 # ~~~~~~~~~~~~~~~~~
1429 #
1430 # Construct a matching filename for the output file. The output filename is
1431 # constructed from `infile` by the following rules:
1432 #
1433 # * '-' (stdin) results in '-' (stdout)
1434 # * strip the `text_extension`_ (txt2code) or
1435 # * add the `text_extension`_ (code2txt)
1436 # * fallback: if no guess can be made, add ".out"
1437 #
1438 #   .. TODO: use values.outfile_extension if it exists?
1439 #
1440 # ::
1441
1442     def _get_outfile_name(self, values):
1443         """Return a matching output filename for `infile`
1444         """
1445         # if input is stdin, default output is stdout
1446         if values.infile == '-':
1447             return '-'
1448
1449         # Derive from `infile` name: strip or add text extension
1450         (base, ext) = os.path.splitext(values.infile)
1451         if ext in values.text_extensions:
1452             return base # strip
1453         if ext and ext in values.languages or values.txt2code == False:
1454             return values.infile + values.text_extensions[0] # add
1455         # give up
1456         return values.infile + ".out"
1457
1458 # .. _PylitOptions.__call__:
1459 #
1460 # __call__
1461 # ~~~~~~~~
1462 #
1463 # The special `__call__` method allows to use PylitOptions instances as
1464 # *callables*: Calling an instance parses the argument list to extract option
1465 # values and completes them based on "context-sensitive defaults".  Keyword
1466 # arguments are used as default values. ::
1467
1468     def __call__(self, args=sys.argv[1:], **keyw):
1469         """parse and complete command line args, return option values
1470         """
1471         values = OptionValues(keyw)
1472         args = self.parse_args(args, values)
1473         return self.complete_values(args)
1474
1475
1476 # Helper functions
1477 # ----------------
1478 #
1479 # open_streams
1480 # ~~~~~~~~~~~~
1481 #
1482 # Return file objects for in- and output. If the input path is missing,
1483 # write usage and abort. (An alternative would be to use stdin as default.
1484 # However,  this leaves the uninitiated user with a non-responding application
1485 # if (s)he just tries the script without any arguments) ::
1486
1487 def open_streams(infile = '-', outfile = '-', overwrite='update', **keyw):
1488     """Open and return the input and output stream
1489
1490     open_streams(infile, outfile) -> (in_stream, out_stream)
1491
1492     in_stream   --  file(infile) or sys.stdin
1493     out_stream  --  file(outfile) or sys.stdout
1494     overwrite   --  'yes': overwrite eventually existing `outfile`,
1495                     'update': fail if the `outfile` is newer than `infile`,
1496                     'no': fail if `outfile` exists.
1497
1498                     Irrelevant if `outfile` == '-'.
1499     """
1500     if overwrite not in ('yes', 'no', 'update'):
1501         raise ValueError('Argument "overwrite" must be "yes", "no",'
1502                          ' or update, not "%s".' % overwrite)
1503     if not infile:
1504         strerror = "Missing input file name ('-' for stdin; -h for help)"
1505         raise IOError(2, strerror, infile)
1506     if infile == '-':
1507         in_stream = sys.stdin
1508     else:
1509         in_stream = open(infile, 'r')
1510     if outfile == '-':
1511         out_stream = sys.stdout
1512     elif overwrite == 'no' and os.path.exists(outfile):
1513         raise IOError(17, "Output file exists!", outfile)
1514     elif overwrite == 'update' and is_newer(outfile, infile) is None:
1515         raise IOError(0, "Output file is as old as input file!", outfile)
1516     elif overwrite == 'update' and is_newer(outfile, infile):
1517         raise IOError(1, "Output file is newer than input file!", outfile)
1518     else:
1519         out_stream = open(outfile, 'w')
1520     return (in_stream, out_stream)
1521
1522 # is_newer
1523 # ~~~~~~~~
1524 #
1525 # ::
1526
1527 def is_newer(path1, path2):
1528     """Check if `path1` is newer than `path2` (using mtime)
1529
1530     Compare modification time of files at path1 and path2.
1531
1532     Non-existing files are considered oldest: Return False if path1 does not
1533     exist and True if path2 does not exist.
1534
1535     Return None if the modification time differs less than 1/10 second.
1536     (This evaluates to False in a Boolean context but allows a test
1537     for equality.)
1538     """
1539     try:
1540         mtime1 = os.path.getmtime(path1)
1541     except OSError:
1542         mtime1 = -1
1543     try:
1544         mtime2 = os.path.getmtime(path2)
1545     except OSError:
1546         mtime2 = -1
1547     if abs(mtime1 - mtime2) < 0.1:
1548         return None
1549     return mtime1 > mtime2
1550
1551
1552 # get_converter
1553 # ~~~~~~~~~~~~~
1554 #
1555 # Get an instance of the converter state machine::
1556
1557 def get_converter(data, txt2code=True, **keyw):
1558     if txt2code:
1559         return Text2Code(data, **keyw)
1560     else:
1561         return Code2Text(data, **keyw)
1562
1563
1564 # Use cases
1565 # ---------
1566 #
1567 # run_doctest
1568 # ~~~~~~~~~~~
1569 # ::
1570
1571 def run_doctest(infile="-", txt2code=True,
1572                 globs={}, verbose=False, optionflags=0, **keyw):
1573     """run doctest on the text source
1574     """
1575
1576 # Allow imports from the current working dir by prepending an empty string to
1577 # sys.path (see doc of sys.path())::
1578
1579     sys.path.insert(0, '')
1580
1581 # Import classes from the doctest module::
1582
1583     from doctest import DocTestParser, DocTestRunner
1584
1585 # Read in source. Make sure it is in text format, as tests in comments are not
1586 # found by doctest::
1587
1588     (data, out_stream) = open_streams(infile, "-")
1589     if txt2code is False:
1590         keyw.update({'add_missing_marker': False})
1591         converter = Code2Text(data, **keyw)
1592         docstring = str(converter)
1593     else:
1594         docstring = data.read()
1595
1596 # decode doc string if there is a "magic comment" in the first or second line
1597 # (http://docs.python.org/reference/lexical_analysis.html#encoding-declarations)
1598 # ::
1599
1600     if sys.version_info < (3,0):
1601         firstlines = ' '.join(docstring.splitlines()[:2])
1602         match = re.search('coding[=:]\s*([-\w.]+)', firstlines)
1603         if match:
1604             docencoding = match.group(1)
1605             docstring = docstring.decode(docencoding)
1606
1607 # Use the doctest Advanced API to run all doctests in the source text::
1608
1609     test = DocTestParser().get_doctest(docstring, globs, name="",
1610                                        filename=infile, lineno=0)
1611     runner = DocTestRunner(verbose, optionflags)
1612     runner.run(test)
1613     runner.summarize()
1614     # give feedback also if no failures occurred
1615     if not runner.failures:
1616         print("%d failures in %d tests"%(runner.failures, runner.tries))
1617     return runner.failures, runner.tries
1618
1619
1620 # diff
1621 # ~~~~
1622 #
1623 # ::
1624
1625 def diff(infile='-', outfile='-', txt2code=True, **keyw):
1626     """Report differences between converted infile and existing outfile
1627
1628     If outfile does not exist or is '-', do a round-trip conversion and
1629     report differences.
1630     """
1631
1632     import difflib
1633
1634     instream = open(infile)
1635     # for diffing, we need a copy of the data as list::
1636     data = instream.readlines()
1637     # convert
1638     converter = get_converter(data, txt2code, **keyw)
1639     new = converter()
1640
1641     if outfile != '-' and os.path.exists(outfile):
1642         outstream = open(outfile)
1643         old = outstream.readlines()
1644         oldname = outfile
1645         newname = "<conversion of %s>"%infile
1646     else:
1647         old = data
1648         oldname = infile
1649         # back-convert the output data
1650         converter = get_converter(new, not txt2code)
1651         new = converter()
1652         newname = "<round-conversion of %s>"%infile
1653
1654     # find and print the differences
1655     is_different = False
1656     # print(type(old), old)
1657     # print(type(new), new)
1658     delta = difflib.unified_diff(old, new,
1659     # delta = difflib.unified_diff(["heute\n", "schon\n"], ["heute\n", "noch\n"],
1660                                       fromfile=oldname, tofile=newname)
1661     for line in delta:
1662         is_different = True
1663         print(line, end=' ') #sys.stdout.write(line + ' ')
1664     if not is_different:
1665         print(oldname)
1666         print(newname)
1667         print("no differences found")
1668     return is_different
1669
1670
1671 # execute
1672 # ~~~~~~~
1673 #
1674 # Works only for python code.
1675 #
1676 # Does not work with `eval`, as code is not just one expression. ::
1677
1678 def execute(infile="-", txt2code=True, **keyw):
1679     """Execute the input file. Convert first, if it is a text source.
1680     """
1681
1682     with open(infile) as f:
1683         data = f.readlines()
1684     if txt2code:
1685         data = str(Text2Code(data, **keyw))
1686     exec(''.join(data))
1687
1688
1689 # main
1690 # ----
1691 #
1692 # If this script is called from the command line, the `main` function will
1693 # convert the input (file or stdin) between text and code formats.
1694 #
1695 # Option default values for the conversion can be given as keyword arguments
1696 # to `main`_.  The option defaults will be updated by command line options and
1697 # extended with "intelligent guesses" by `PylitOptions`_ and passed on to
1698 # helper functions and the converter instantiation.
1699 #
1700 # This allows easy customisation for programmatic use -- just call `main`
1701 # with the appropriate keyword options, e.g. ``pylit.main(comment_string="## ")``
1702 #
1703 # ::
1704
1705 def main(args=sys.argv[1:], **defaults):
1706     """%prog [options] INFILE [OUTFILE]
1707
1708     Convert between (reStructured) text source with embedded code,
1709     and code source with embedded documentation (comment blocks)
1710
1711     The special filename '-' stands for standard in and output.
1712     """
1713
1714 # Parse and complete the options::
1715
1716     options = PylitOptions()(args, **defaults)
1717     # print("infile", repr(options.infile))
1718     # print("doctest", repr(options.doctest))
1719
1720 # Special actions with early return::
1721
1722     if options.doctest:
1723         return run_doctest(**vars(options).copy())
1724
1725     if options.diff:
1726         return diff(**vars(options).copy())
1727
1728     if options.execute:
1729         return execute(**vars(options).copy())
1730
1731 # Open in- and output streams::
1732
1733     try:
1734         (data, out_stream) = open_streams(**vars(options).copy())
1735     except IOError as ex:
1736         print("IOError: %s %s" % (ex.filename, ex.strerror))
1737         sys.exit(ex.errno)
1738
1739 # Get a converter instance::
1740
1741     converter = get_converter(data, **vars(options).copy())
1742
1743 # Convert and write to out_stream::
1744
1745     out_stream.write(str(converter))
1746
1747     if out_stream is not sys.stdout:
1748         print("output written to", out_stream.name)
1749         out_stream.close()
1750
1751 # If input and output are from files, set the modification time (`mtime`) of
1752 # the output file to the one of the input file to indicate that the contained
1753 # information is equal. [#]_ ::
1754
1755
1756         # print("fractions?", os.stat_float_times())
1757         try:
1758             os.utime(options.outfile, (os.path.getatime(options.outfile),
1759                                        os.path.getmtime(options.infile))
1760                     )
1761         except OSError:
1762             pass
1763
1764     ## print("mtime", os.path.getmtime(options.infile),  options.infile)
1765     ## print("mtime", os.path.getmtime(options.outfile), options.outfile)
1766
1767
1768 # .. [#] Make sure the corresponding file object (here `out_stream`) is
1769 #        closed, as otherwise the change will be overwritten when `close` is
1770 #        called afterwards (either explicitly or at program exit).
1771 #
1772 #
1773 # Rename the infile to a backup copy if ``--replace`` is set::
1774
1775     if options.replace:
1776         os.rename(options.infile, options.infile + "~")
1777
1778
1779 # Run main, if called from the command line::
1780
1781 if __name__ == '__main__':
1782     main()
1783
1784
1785 # Open questions
1786 # ==============
1787 #
1788 # Open questions and ideas for further development
1789 #
1790 # Clean code
1791 # ----------
1792 #
1793 # * can we gain from using "shutils" over "os.path" and "os"?
1794 # * use pylint or pyChecker to enforce a consistent style?
1795 #
1796 # Options
1797 # -------
1798 #
1799 # * Use templates for the "intelligent guesses" (with Python syntax for string
1800 #   replacement with dicts: ``"hello %(what)s" % {'what': 'world'}``)
1801 #
1802 # * Is it sensible to offer the `header_string` option also as command line
1803 #   option?
1804 #
1805 # treatment of blank lines
1806 # ------------------------
1807 #
1808 # Alternatives: Keep blank lines blank
1809 #
1810 # - "never" (current setting) -> "visually merges" all documentation
1811 #    if there is no interjacent code
1812 #
1813 # - "always" -> disrupts documentation blocks,
1814 #
1815 # - "if empty" (no whitespace). Comment if there is whitespace.
1816 #
1817 #   This would allow non-obstructing markup but unfortunately this is (in
1818 #   most editors) also non-visible markup.
1819 #
1820 # + "if double" (if there is more than one consecutive blank line)
1821 #
1822 #   With this handling, the "visual gap" remains in both, text and code
1823 #   source.
1824 #
1825 #
1826 # Parsing Problems
1827 # ----------------
1828 #
1829 # * Ignore "matching comments" in literal strings?
1830 #
1831 #   Too complicated: Would need a specific detection algorithm for every
1832 #   language that supports multi-line literal strings (C++, PHP, Python)
1833 #
1834 # * Warn if a comment in code will become documentation after round-trip?
1835 #
1836 #
1837 # docstrings in code blocks
1838 # -------------------------
1839 #
1840 # * How to handle docstrings in code blocks? (it would be nice to convert them
1841 #   to rst-text if ``__docformat__ == restructuredtext``)
1842 #
1843 # TODO: Ask at Docutils users|developers
1844 #
1845 # Plug-ins
1846 # --------
1847 #
1848 # Specify a path for user additions and plug-ins. This would require to
1849 # convert Pylit from a pure module to a package...
1850 #
1851 #   6.4.3 Packages in Multiple Directories
1852 #
1853 #   Packages support one more special attribute, __path__. This is initialized
1854 #   to be a list containing the name of the directory holding the package's
1855 #   __init__.py before the code in that file is executed. This
1856 #   variable can be modified; doing so affects future searches for modules and
1857 #   subpackages contained in the package.
1858 #
1859 #   While this feature is not often needed, it can be used to extend the set
1860 #   of modules found in a package.
1861 #
1862 #
1863 # .. References
1864 #
1865 # .. _Docutils: http://docutils.sourceforge.net/
1866 # .. _Sphinx: http://sphinx.pocoo.org
1867 # .. _Pygments: http://pygments.org/
1868 # .. _code-block directive:
1869 #     http://docutils.sourceforge.net/sandbox/code-block-directive/
1870 # .. _literal block:
1871 # .. _literal blocks:
1872 #     http://docutils.sf.net/docs/ref/rst/restructuredtext.html#literal-blocks
1873 # .. _indented literal block:
1874 # .. _indented literal blocks:
1875 #     http://docutils.sf.net/docs/ref/rst/restructuredtext.html#indented-literal-blocks
1876 # .. _quoted literal block:
1877 # .. _quoted literal blocks:
1878 #     http://docutils.sf.net/docs/ref/rst/restructuredtext.html#quoted-literal-blocks
1879 # .. _parsed-literal blocks:
1880 #     http://docutils.sf.net/docs/ref/rst/directives.html#parsed-literal-block
1881 # .. _doctest block:
1882 # .. _doctest blocks:
1883 #     http://docutils.sf.net/docs/ref/rst/restructuredtext.html#doctest-blocks