pylit.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf8 -*-
   3
   4 """pylit: bidirectional text <-> code converter
   5
   6 Covert between a *text source* with embedded computer code
   7 and a *code source* with embedded documentation.
   8 """
   9
  10 from __future__ import print_function
  11
  12 # pylit.py
  13 # ********
  14 # Literate programming with reStructuredText
  15 # ++++++++++++++++++++++++++++++++++++++++++
  16 #
  17 # :Copyright: © 2005, 2007, 2015, 2021 Günter Milde.
  18 #             Released without warranty under the terms of the
  19 #             GNU General Public License (v. 3 or later)
  20 #
  21 # .. contents::
  22 #
  23 # Frontmatter
  24 # ===========
  25 #
  26 # Changelog
  27 # ---------
  28 #
  29 # .. class:: borderless
  30 #
  31 # ====== ==========  =========================================================
  32 # 0.1    2005-06-29  Initial version.
  33 # 0.1.1  2005-06-30  First literate version.
  34 # 0.1.2  2005-07-01  Object oriented script using generators.
  35 # 0.1.3  2005-07-10  Two state machine (later added 'header' state).
  36 # 0.2b   2006-12-04  Start of work on version 0.2 (code restructuring).
  37 # 0.2    2007-01-23  Published at ``pylit.berlios.de``.
  38 # 0.2.1  2007-01-25  Outsourced non-core documentation to the PyLit pages.
  39 # 0.2.2  2007-01-26  New behaviour of `diff` function.
  40 # 0.2.3  2007-01-29  New `header` methods after suggestion by Riccardo Murri.
  41 # 0.2.4  2007-01-31  Raise Error if code indent is too small.
  42 # 0.2.5  2007-02-05  New command line option --comment-string.
  43 # 0.2.6  2007-02-09  Add section with open questions,
  44 # ..                 Code2Text: let only blank lines (no comment str)
  45 #                    separate text and code,
  46 # ..                 fix `Code2Text.header`.
  47 # 0.2.7  2007-02-19  Simplify `Code2Text.header`,
  48 #                    new `iter_strip` method replacing a lot of ``if``-s.
  49 # 0.2.8  2007-02-22  Set `mtime` of outfile to the one of infile.
  50 # 0.3    2007-02-27  New `Code2Text` converter after an idea by Riccardo Murri,
  51 # ..                 explicit `option_defaults` dict for easier customisation.
  52 # 0.3.1  2007-03-02  Expand hard-tabs to prevent errors in indentation,
  53 # ..                 `Text2Code` now also works on blocks,
  54 # ..                 removed dependency on SimpleStates module.
  55 # 0.3.2  2007-03-06  Bug fix: do not set `language` in `option_defaults`
  56 # ..                 renamed `code_languages` to `languages`.
  57 # 0.3.3  2007-03-16  New language css,
  58 # ..                 option_defaults -> defaults = optparse.Values(),
  59 # ..                 simpler PylitOptions: don't store parsed values,
  60 #                    don't parse at initialisation,
  61 # ..                 OptionValues: return `None` for non-existing attributes,
  62 # ..                 removed -infile and -outfile, use positional arguments.
  63 # 0.3.4  2007-03-19  Documentation update,
  64 #                    separate `execute` function.
  65 # ..     2007-03-21  Code cleanup in `Text2Code.__iter__`.
  66 # 0.3.5  2007-03-23  Removed "css" from known languages after learning that
  67 #                    there is no C++ style "// " comment string in CSS2.
  68 # 0.3.6  2007-04-24  Documentation update.
  69 # 0.4    2007-05-18  Implement Converter.__iter__ as stack of iterator
  70 #                    generators. Iterating over a converter instance now
  71 #                    yields lines instead of blocks.
  72 # ..                 Provide "hooks" for pre- and postprocessing filters.
  73 # ..                 Rename states to reduce confusion with formats:
  74 #                    "text" -> "documentation", "code" -> "code_block".
  75 # 0.4.1  2007-05-22  Converter.__iter__: cleanup and reorganisation,
  76 #                    rename parent class Converter -> TextCodeConverter.
  77 # 0.4.2  2007-05-23  Merged Text2Code.converter and Code2Text.converter into
  78 #                    TextCodeConverter.converter.
  79 # 0.4.3  2007-05-30  Replaced use of defaults.code_extensions with
  80 #                    values.languages.keys().
  81 # ..                 Removed spurious `print` statement in code_block_handler.
  82 # ..                 Added basic support for 'c' and 'css' languages with
  83 #                    `dumb_c_preprocessor()`_ and `dumb_c_postprocessor()`_.
  84 # 0.5    2007-06-06  Moved `collect_blocks()`_ out of `TextCodeConverter`_,
  85 # ..                 bug fix: collect all trailing blank lines into a block.
  86 # ..                 Expand tabs with `expandtabs_filter()`_.
  87 # 0.6    2007-06-20  Configurable code-block marker (default ``::``)
  88 # 0.6.1  2007-06-28  Bug fix: reset self.code_block_marker_missing.
  89 # 0.7    2007-12-12  prepending an empty string to sys.path in run_doctest()
  90 #                    to allow imports from the current working dir.
  91 # 0.7.1  2008-01-07  If outfile does not exist, do a round-trip conversion
  92 #                    and report differences (as with outfile=='-').
  93 # 0.7.2  2008-01-28  Do not add missing code-block separators with
  94 #                    `doctest_run` on the code source. Keeps lines consistent.
  95 # 0.7.3  2008-04-07  Use value of code_block_marker for insertion of missing
  96 #                    transition marker in Code2Text.code_block_handler
  97 # ..                 Add "shell" to defaults.languages
  98 # 0.7.4  2008-06-23  Add "latex" to defaults.languages
  99 # 0.7.5  2009-05-14  Bugfix: ignore blank lines in test for end of code block
 100 # 0.7.6  2009-12-15  language-dependent code-block markers (after a
 101 #                    feature request and patch by `jrioux`),
 102 # ..                 use DefaultDict for language-dependent defaults,
 103 # ..                 new setting `add_missing_marker`_.
 104 # 0.7.7  2010-06-23  New command line option --codeindent.
 105 # 0.7.8  2011-03-30  Do not overwrite custom `add_missing_marker` value,
 106 #                    allow directive options following the 'code' directive.
 107 # 0.7.9  2011-04-05  Decode doctest string if 'magic comment' gives encoding.
 108 # 0.7.10 2013-06-07  Add "lua" to defaults.languages
 109 # 0.7.11 2020-10-10  Return 0, if input and output file are of same age.
 110 # 0.8.0  2022-06-29  Fix ``--execute`` behaviour and tests.
 111 # ..                 Change default `codeindent`_ to 2.
 112 # ..                 Switch to `argparse`_. Remove class `OptionValues`.
 113 # ====== ==========  =========================================================
 114 #
 115 # ::
 116
 117 __version__ = "0.8.0dev"
 118
 119 __docformat__ = 'restructuredtext'
 120
 121
 122 # Introduction
 123 # ------------
 124 #
 125 # PyLit is a bidirectional converter between two formats of a computer
 126 # program source:
 127 #
 128 # * a (reStructured) text document with program code embedded in
 129 #   *code blocks*, and
 130 # * a compilable (or executable) code source with *documentation*
 131 #   embedded in comment blocks
 132 #
 133 #
 134 # Requirements
 135 # ------------
 136 #
 137 # ::
 138
 139 import argparse
 140 import os
 141 import re
 142 import sys
 143
 144
 145 # DefaultDict
 146 # ~~~~~~~~~~~
 147 #
 148 # As `collections.defaultdict` adds key/value pairs when the default
 149 # constructor is called,  we  define an alternative that does not mutate the
 150 # dict as side-effect. ::
 151
 152 class DefaultDict(dict):
 153     """Dictionary with default value."""
 154
 155     default = 'python'
 156
 157     def __missing__(self, key):
 158         # cf. file:///usr/share/doc/python3/html/library/stdtypes.html#dict
 159         return self.default
 160
 161
 162 # defaults
 163 # ========
 164 #
 165 # The `defaults` object provides a central repository for default
 166 # values and their customisation. ::
 167
 168 defaults = argparse.Namespace()
 169
 170 # It is used for
 171 #
 172 # * the initialisation of data arguments in TextCodeConverter_ and
 173 #   PylitOptions_
 174 #
 175 # * completion of `command line arguments`_ in
 176 #   `PylitOptions.complete_values()`_.
 177 #
 178 # This allows the easy creation of _`back-ends` that customise the
 179 # defaults and then call `main()`_ e.g.
 180 #
 181 # .. code:: python
 182 #
 183 #    #!/usr/bin/env python
 184 #    import pylit
 185 #
 186 #    pylit.defaults.code_block_marker['c++'] = '.. code-block:: c++'
 187 #    pylit.defaults.languages['.def'] = 'latex'
 188 #    pylit.defaults.languages['.dfu'] = 'latex'
 189 #
 190 #    pylit.main()
 191 #
 192 # .. note:: Defaults for the `command line arguments`_ can also be specified
 193 #           as keyword arguments to ``main()``
 194 #
 195 #           .. code:: python
 196 #
 197 #              #!/usr/bin/env python
 198 #              import pylit
 199 #              pylit.main(language='c++')
 200 #
 201 # The following default values are defined in pylit.py:
 202 #
 203 # language
 204 # --------
 205 #
 206 # Code language. Determined from languages_ if ``None``::
 207
 208 defaults.language = None
 209
 210 # languages
 211 # ---------
 212 #
 213 # Mapping of code file extensions to code language::
 214
 215 defaults.languages = DefaultDict({".c":   "c",
 216                                   ".cc":  "c++",
 217                                   ".css": "css",
 218                                   ".lua": "lua",
 219                                   ".py":  "python",
 220                                   ".sh":  "shell",
 221                                   ".sl":  "slang",
 222                                   ".sty": "latex",
 223                                   ".tex": "latex"
 224                                   })
 225
 226 # The result can be overridden by the ``--language`` command line option.
 227 #
 228 # The fallback language, used if there is no matching extension (e.g. if pylit
 229 # is used as filter) and no ``--language`` is specified is ``"python"``::
 230
 231 defaults.languages.default = 'python'
 232
 233 # It can be changed programmatically by changing the ``.default``
 234 # attribute, e.g.
 235 #
 236 # >>> import pylit
 237 # >>> pylit.defaults.languages.default = 'c++'
 238 # >>> pylit.defaults.languages['.camel']
 239 # 'c++'
 240 #
 241 # .. _text_extension:
 242 #
 243 # text_extensions
 244 # ---------------
 245 #
 246 # List of known extensions of (reStructured) text files. The first
 247 # extension in this list is used by the `_get_outfile_name()`_ method to
 248 # generate a text output filename::
 249
 250 defaults.text_extensions = [".txt", ".rst"]
 251
 252 # comment_string
 253 # --------------
 254 #
 255 # Used in Code2Text_ to recognise text blocks and in Text2Code_ to format
 256 # text blocks as comments.
 257 # Determined from comment_strings_ if ``None``::
 258
 259 defaults.comment_string = None
 260
 261
 262 # comment_strings
 263 # ---------------
 264 #
 265 # Comment strings for known languages.
 266 # The fallback value is ``'# '``.
 267 #
 268 # **Comment strings include trailing whitespace.** ::
 269
 270 defaults.comment_strings = DefaultDict({"css":    '// ',
 271                                         "c":      '// ',
 272                                         "c++":    '// ',
 273                                         "lua":    '-- ',
 274                                         "latex":  '% ',
 275                                         "python": '# ',
 276                                         "shell":  '# ',
 277                                         "slang":  '% '
 278                                         })
 279 defaults.comment_strings.default = '# '
 280
 281 # header_string
 282 # -------------
 283 #
 284 # Marker string for a header code block in the text source. No trailing
 285 # whitespace needed as indented code follows.
 286 # Must be a valid rst directive that accepts code on the same line, e.g.
 287 # ``'..admonition::'``.
 288 #
 289 # Default is a comment marker::
 290
 291 defaults.header_string = '..'
 292
 293
 294 # code_block_marker
 295 # -----------------
 296 #
 297 # Markup at the end of a documentation block.
 298 #
 299 # The `code_block_marker` string is determined based on the code language_
 300 # and `inserted into a regular expression`_.
 301 #
 302 #   defaults.code_block_marker = None # get from `code_block_markers`
 303 #
 304 # code_block_markers
 305 # ------------------
 306 #
 307 # Language-specific code-block markers can be defined programmatically in
 308 # back-ends_.
 309 #
 310 # The fallback value is Docutils' marker for a `literal block`_::
 311
 312 defaults.code_block_markers = DefaultDict()
 313 defaults.code_block_markers.default = '::'
 314
 315 # In a document where code examples are only one of several uses of
 316 # literal blocks, it is more appropriate to single out the source code,
 317 # e.g., with the double colon at a separate line ("expanded form")
 318 #
 319 #   ``defaults.code_block_marker.default = ':: *'``
 320 #
 321 # or a dedicated ``.. code-block::`` directive
 322 #
 323 #   ``defaults.code_block_marker['c++'] = '.. code-block:: *c++'``
 324 #
 325 # The latter form also allows mixing code in different languages in one
 326 # literate source file.
 327 #
 328 #
 329 # strip
 330 # -----
 331 #
 332 # Strip documentation (or code) blocks from the output::
 333
 334 defaults.strip = False
 335
 336
 337 # strip_marker
 338 # ------------
 339 #
 340 # Strip `code_block_marker`_ from the end of documentation blocks when
 341 # converting  to code format. Makes the code more concise but looses the
 342 # synchronisation of line numbers in text and code formats.
 343 #
 344 # Can be used together with `add_missing_marker`_ to change
 345 # the `code_block_marker`_ in a round trip::
 346
 347 defaults.strip_marker = False
 348
 349
 350 # add_missing_marker
 351 # ------------------
 352 #
 353 # When converting from code format to text format, add a `code_block_marker`_
 354 # at the end of documentation blocks if it is missing::
 355
 356 defaults.add_missing_marker = True
 357
 358 # Keep this at ``True``, if you want to re-convert to code format later!
 359 #
 360 #
 361 # .. _defaults.preprocessors:
 362 #
 363 # preprocessors
 364 # -------------
 365 #
 366 # Preprocess the data with language-specific Filters_
 367 # (cf. `register filters`_)::
 368
 369 defaults.preprocessors = {}
 370
 371 # .. _defaults.postprocessors:
 372 #
 373 # postprocessors
 374 # --------------
 375 #
 376 # Postprocess the data with language-specific Filters_
 377 # (cf. `register filters`_)::
 378
 379 defaults.postprocessors = {}
 380
 381 # .. _defaults.codeindent:
 382 #
 383 # codeindent
 384 # ----------
 385 #
 386 # Number of spaces to indent code blocks in `Code2Text.code_block_handler()`_::
 387
 388 defaults.codeindent = 2
 389
 390 # In `Text2Code.code_block_handler()`_, the codeindent is determined by the
 391 # first recognised code line (header or first indented literal block
 392 # of the text source).
 393 #
 394 # overwrite
 395 # ---------
 396 #
 397 # What to do if the outfile already exists? (ignored if `outfile` == '-')::
 398
 399 defaults.overwrite = 'update'
 400
 401 # Recognised values:
 402 #
 403 #  :'yes':    overwrite eventually existing `outfile`,
 404 #  :'update': fail if the `outfile` is newer than `infile`,
 405 #             TODO: fix behaviour if both are of same age
 406 #  :'no':     fail if `outfile` exists.
 407 #
 408 #
 409 # Actions: execute, doctest, diff
 410 # -------------------------------
 411 # If true, these actions replace or follow the txt<->code conversion.
 412 # See `command line arguments`_. ::
 413
 414 defaults.execute = False
 415 defaults.doctest = False
 416 defaults.diff = False
 417
 418 # Initial values
 419 # --------------
 420 #
 421 # The following settings are auto-determined if None
 422 # (see `PylitOptions.complete_values()`_).
 423 # Initialize them here as they will not be set by
 424 # `ArgumentParser.parse_args()`_::
 425
 426 # defaults.infile = ''   # required
 427 defaults.outfile = None
 428 defaults.replace = None
 429 defaults.txt2code = None
 430
 431
 432 # Extensions
 433 # ==========
 434 #
 435 # Try to import optional extensions::
 436
 437 try:
 438     import pylit_elisp  # noqa
 439 except ImportError:
 440     pass
 441
 442
 443 # Converter Classes
 444 # =================
 445 #
 446 # The converter classes implement a simple state machine to separate and
 447 # transform documentation and code blocks. For this task, only a very limited
 448 # parsing is needed. PyLit's parser assumes:
 449 #
 450 # * `indented literal blocks`_ in a text source are code blocks.
 451 #
 452 # * comment blocks in a code source where every line starts with a matching
 453 #   `comment_string`_ are documentation blocks.
 454 #
 455 # TextCodeConverter
 456 # -----------------
 457 # ::
 458
 459 class TextCodeConverter(object):
 460     """Parent class for the converters `Text2Code` and `Code2Text`.
 461     """
 462
 463 # The parent class defines data attributes and functions used in both
 464 # `Text2Code`_ converting a text source to executable code source, and
 465 # `Code2Text`_ converting commented code to a text source.
 466 #
 467 # Data attributes
 468 # ~~~~~~~~~~~~~~~
 469 #
 470 # Class default values are fetched from the `defaults`_ object and can be
 471 # overridden by matching keyword arguments during class instantiation.
 472 # This also works with keyword arguments to `get_converter()`_ and `main()`_,
 473 # as these functions pass on unused keyword args to the instantiation of a
 474 # converter class. ::
 475
 476     language = defaults.languages[None]
 477     comment_strings = defaults.comment_strings
 478     comment_string = ""  # set in __init__ (if empty)
 479     codeindent = defaults.codeindent
 480     header_string = defaults.header_string
 481     code_block_markers = defaults.code_block_markers
 482     code_block_marker = ""  # set in __init__ (if empty)
 483     strip = defaults.strip
 484     strip_marker = defaults.strip_marker
 485     add_missing_marker = defaults.add_missing_marker
 486     directive_option_regexp = re.compile(r' +:(\w|[-._+:])+:( |$)')
 487     state = ""  # type of current block, see `TextCodeConverter.convert`_
 488
 489 # Interface methods
 490 # ~~~~~~~~~~~~~~~~~
 491 #
 492 # .. _TextCodeConverter.__init__:
 493 #
 494 # __init__()
 495 # """"""""""
 496 #
 497 # Initialising sets the `data` attribute, an iterable object yielding lines of
 498 # the source to convert. [#]_
 499 #
 500 # .. [#] The most common choice of data is a `file` object with the text
 501 #        or code source.
 502 #
 503 #        To convert a string into a suitable object, use its splitlines()
 504 #        method like ``"2 lines\nof source".splitlines(True)``.
 505 #
 506 #
 507 # Additional keyword arguments are stored as instance variables,
 508 # overwriting the class defaults::
 509
 510     def __init__(self, data, **keyw):
 511         """data   --  iterable data object
 512                       (list, file, generator, string, ...)
 513            **keyw --  remaining keyword arguments are
 514                       stored as data-attributes
 515         """
 516         self.data = data
 517         self.__dict__.update(keyw)
 518
 519 # If empty, `code_block_marker` and `comment_string` are set according
 520 # to the `language`::
 521
 522         if not self.code_block_marker:
 523             self.code_block_marker = self.code_block_markers[self.language]
 524         if not self.comment_string:
 525             self.comment_string = self.comment_strings[self.language]
 526         self.stripped_comment_string = self.comment_string.rstrip()
 527
 528 # Pre- and postprocessing filters are set (with
 529 # `TextCodeConverter.get_filter`_)::
 530
 531         self.preprocessor = self.get_filter("preprocessors", self.language)
 532         self.postprocessor = self.get_filter("postprocessors", self.language)
 533
 534 # .. _inserted into a regular expression:
 535 #
 536 # Finally, a regular_expression for the `code_block_marker` is compiled
 537 # to find valid cases of `code_block_marker` in a given line and return
 538 # the groups: ``\1 prefix, \2 code_block_marker, \3 remainder`` ::
 539
 540         marker = self.code_block_marker
 541         if marker == '::':
 542             # the default marker may occur at the end of a text line
 543             self.marker_regexp = re.compile('^( *(?!\.\.).*)(::)([ \n]*)$')
 544         else:
 545             # marker must be on a separate line
 546             self.marker_regexp = re.compile('^( *)(%s)(.*\n?)$' % marker)
 547
 548 # .. _TextCodeConverter.__iter__:
 549 #
 550 # __iter__()
 551 # """"""""""
 552 #
 553 # Return an iterator for the instance. Iteration yields lines of converted
 554 # data.
 555 #
 556 # The iterator is a chain of iterators acting on `self.data` that does
 557 #
 558 # * preprocessing
 559 # * text<->code format conversion
 560 # * postprocessing
 561 #
 562 # Pre- and postprocessing are only performed, if filters for the current
 563 # language are registered in `defaults.preprocessors`_ and|or
 564 # `defaults.postprocessors`_. The filters must accept an iterable as first
 565 # argument and yield the processed input data line-wise.
 566 # ::
 567
 568     def __iter__(self):
 569         """Iterate over input data source and yield converted lines
 570         """
 571         return self.postprocessor(self.convert(self.preprocessor(self.data)))
 572
 573
 574 # .. _TextCodeConverter.__call__:
 575 #
 576 # __call__()
 577 # """"""""""
 578 # The special `__call__` method allows the use of class instances as callable
 579 # objects. It returns the converted data as list of lines::
 580
 581     def __call__(self):
 582         """Iterate over state-machine and return results as list of lines."""
 583         return [line for line in self]
 584
 585
 586 # .. _TextCodeConverter.__str__:
 587 #
 588 # __str__()
 589 # """""""""
 590 # Return converted data as string::
 591
 592     def __str__(self):
 593         return "".join(self())
 594
 595
 596 # Helpers and convenience methods
 597 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 598 #
 599 # .. _TextCodeConverter.convert:
 600 #
 601 # convert()
 602 # """""""""
 603 #
 604 # The `convert` method generates an iterator that does the actual  code <-->
 605 # text format conversion. The converted data is yielded line-wise and the
 606 # instance's `status` argument indicates whether the current line is "header",
 607 # "documentation", or "code_block"::
 608
 609     def convert(self, lines):
 610         """Iterate over lines of a program document and convert
 611         between "text" and "code" format
 612         """
 613
 614 # Initialise internal data arguments. (Done here, so that every new iteration
 615 # re-initialises them.)
 616 #
 617 # `state`
 618 #   the "type" of the currently processed block of lines. One of
 619 #
 620 #   :"":              initial state: check for header,
 621 #   :"header":        leading code block: strip `header_string`,
 622 #   :"documentation": documentation part: comment out,
 623 #   :"code_block":    literal blocks containing source code: unindent.
 624 #
 625 # ::
 626
 627         self.state = ""
 628
 629 # `_codeindent`
 630 #   * Do not confuse the internal attribute `_codeindent` with the
 631 #     configurable `codeindent` (without the leading underscore).
 632 #   * `_codeindent` is set in `Text2Code.code_block_handler()`_ to the indent
 633 #     of the first non-blank "code_block" line and stripped from all
 634 #     "code_block" lines in the text-to-code conversion,
 635 #   * `codeindent` is set in `__init__` to `defaults.codeindent`_ and added to
 636 #     "code_block" lines in the code-to-text conversion.
 637 #
 638 # ::
 639
 640         self._codeindent = 0
 641
 642 # `_textindent`
 643 #   * set by `Text2Code.documentation_handler()`_ to the minimal indent of a
 644 #     documentation block,
 645 #   * used in `Text2Code.set_state`_ to find the end of a code block.
 646 #
 647 # ::
 648
 649         self._textindent = 0
 650
 651 # `_add_code_block_marker`
 652 #   If the last paragraph of a documentation block does not end with a
 653 #   code_block_marker_, it should be added (otherwise, the back-conversion
 654 #   fails.).
 655 #
 656 #   `_add_code_block_marker` is set by `Code2Text.documentation_handler()`_
 657 #   and evaluated by `Code2Text.code_block_handler()`_, because the
 658 #   documentation_handler does not know whether the next block will be
 659 #   documentation (with no need for a code_block_marker) or a code block.
 660 #
 661 # ::
 662
 663         self._add_code_block_marker = False
 664
 665 # Determine the state of the block and convert with the matching "handler"::
 666
 667         for block in collect_blocks(expandtabs_filter(lines)):
 668             try:
 669                 self.set_state(block)
 670             except StopIteration:
 671                 return
 672             for line in getattr(self, self.state+"_handler")(block):
 673                 yield line
 674
 675
 676 # .. _TextCodeConverter.get_filter:
 677 #
 678 # get_filter()
 679 # """"""""""""
 680 # ::
 681
 682     def get_filter(self, filter_set, language):
 683         """Return language specific filter"""
 684         if self.__class__ == Text2Code:
 685             key = "text2"+language
 686         elif self.__class__ == Code2Text:
 687             key = language+"2text"
 688         else:
 689             key = ""
 690         try:
 691             return getattr(defaults, filter_set)[key]
 692         except (AttributeError, KeyError, TypeError):
 693             # print("there is no %r filter in %r"%(key, filter_set))
 694             pass
 695         return identity_filter
 696
 697
 698 # get_indent()
 699 # """"""""""""
 700 # Return the number of leading spaces in `line`::
 701
 702     def get_indent(self, line):
 703         """Return the indentation of `string`.
 704         """
 705         return len(line) - len(line.lstrip())
 706
 707
 708 # Text2Code
 709 # ---------
 710 #
 711 # The `Text2Code` converter separates *code-blocks* [#]_ from *documentation*.
 712 # Code blocks are unindented, documentation is commented (or filtered, if the
 713 # ``strip`` option is True).
 714 #
 715 # .. [#] Only `indented literal blocks`_ are considered code-blocks. `quoted
 716 #        literal blocks`_, `parsed-literal blocks`_, and `doctest blocks`_ are
 717 #        treated as part of the documentation. This allows the inclusion of
 718 #        examples:
 719 #
 720 #           >>> 23 + 3
 721 #           26
 722 #
 723 #        Mark that there is no double colon before the doctest block in the
 724 #        text source.
 725 #
 726 # The class inherits the interface and helper functions from
 727 # TextCodeConverter_ and adds functions specific to the text-to-code format
 728 # conversion::
 729
 730 class Text2Code(TextCodeConverter):
 731     """Convert a (reStructured) text source to code source
 732     """
 733
 734 # .. _Text2Code.set_state:
 735 #
 736 # set_state()
 737 # ~~~~~~~~~~~
 738 # ::
 739
 740     def set_state(self, block):
 741         """Determine state of `block`. Set `self.state`
 742         """
 743
 744 # `set_state` is used inside an iteration. Hence, if we are out of data, a
 745 # StopItertion exception should be raised::
 746
 747         if not block:
 748             raise StopIteration
 749
 750 # The new state depends on the active state (from the last block) and
 751 # features of the current block. It is either "header", "documentation", or
 752 # "code_block".
 753 #
 754 # If the current state is "" (first block), check for
 755 # the  `header_string` indicating a leading code block::
 756
 757         if self.state == "":
 758             # print("set state for %r"%block)
 759             if block[0].startswith(self.header_string):
 760                 self.state = "header"
 761             else:
 762                 self.state = "documentation"
 763
 764 # If the current state is "documentation", the next block is also
 765 # documentation. The end of a documentation part is detected in the
 766 # `Text2Code.documentation_handler()`_::
 767
 768         # elif self.state == "documentation":
 769         #    self.state = "documentation"
 770
 771 # A "code_block" ends with the first less indented, non-blank line.
 772 # `_textindent` is set by the documentation handler to the indent of the
 773 # preceding documentation block::
 774
 775         elif self.state in ["code_block", "header"]:
 776             indents = [self.get_indent(line) for line in block
 777                        if line.rstrip()]
 778             # print("set_state:", indents, self._textindent)
 779             if indents and min(indents) <= self._textindent:
 780                 self.state = 'documentation'
 781             else:
 782                 self.state = 'code_block'
 783
 784 # TODO: (or not to do?) insert blank line before the first line with too-small
 785 # codeindent using self.ensure_trailing_blank_line(lines, line) (would need
 786 # split and push-back of the documentation part)?
 787 #
 788 #
 789 # .. _Text2Code.header_handler():
 790 #
 791 # header_handler()
 792 # ~~~~~~~~~~~~~~~~
 793 #
 794 # Sometimes code needs to remain on the first line(s) of the document to be
 795 # valid. The most common example is the "shebang" line that tells a POSIX
 796 # shell how to process an executable file
 797 #
 798 # ..code:: shell
 799 #
 800 #   #!/usr/bin/env python
 801 #
 802 # In Python, the special comment to indicate the encoding, e.g.
 803 # ``# -*- coding: iso-8859-1 -*-``, must occur before any other comment
 804 # or code too.
 805 #
 806 # If we want to keep the line numbers in sync for text and code source, the
 807 # reStructured Text markup for these header lines must start at the same line
 808 # as the first header line. Therefore, header lines could not be marked as
 809 # literal block (this would require the ``::`` and an empty line above the
 810 # code_block).
 811 #
 812 # OTOH, a comment may start at the same line as the comment marker and it
 813 # includes subsequent indented lines. Comments are visible in the reStructured
 814 # Text source but hidden in the pretty-printed output.
 815 #
 816 # With a header converted to comment in the text source, everything before
 817 # the first documentation block (i.e. before the first paragraph using the
 818 # matching comment string) will be hidden away (in HTML or PDF output).
 819 #
 820 # This seems a good compromise, the advantages
 821 #
 822 # * line numbers are kept
 823 # * the "normal" code_block conversion rules (indent/unindent by `codeindent`)
 824 #   apply
 825 # * greater flexibility: you can hide a repeating header in a project
 826 #   consisting of many source files.
 827 #
 828 # set off the disadvantages
 829 #
 830 # - it may come as surprise if a part of the file is not "printed",
 831 # - one more syntax element to learn for rst newbies to start with pylit,
 832 #   (however, starting from the code source, this will be auto-generated)
 833 #
 834 # In the case that there is no matching comment at all, the complete code
 835 # source will become a comment -- however, in this case it is not very likely
 836 # the source is a literate document anyway.
 837 #
 838 # If needed for the documentation, it is possible to quote the header in (or
 839 # after) the first documentation block, e.g. as `parsed literal`.
 840 # ::
 841
 842     def header_handler(self, lines):
 843         """Format leading code block"""
 844         # strip header string from first line
 845         lines[0] = lines[0].replace(self.header_string, "", 1)
 846         # yield remaining lines formatted as code-block
 847         for line in self.code_block_handler(lines):
 848             yield line
 849
 850
 851 # .. _Text2Code.documentation_handler():
 852 #
 853 # documentation_handler()
 854 # ~~~~~~~~~~~~~~~~~~~~~~~
 855 #
 856 # The 'documentation' handler processes everything that is not recognised as
 857 # "code_block". Documentation is quoted with `self.comment_string`
 858 # (or filtered with `--strip=True`).
 859 #
 860 # If end-of-documentation marker is detected,
 861 #
 862 # * set state to 'code_block'
 863 # * set `self._textindent` (needed by `Text2Code.set_state`_ to find the
 864 #   next "documentation" block)
 865 #
 866 # ::
 867
 868     def documentation_handler(self, lines):
 869         """Convert documentation blocks from text to code format
 870         """
 871         for line in lines:
 872             # test lines following the code-block marker for false positives
 873             if (self.state == "code_block" and line.rstrip()
 874                 and not self.directive_option_regexp.search(line)):
 875                 self.state = "documentation"
 876             # test for end of documentation block
 877             if self.marker_regexp.search(line):
 878                 self.state = "code_block"
 879                 self._textindent = self.get_indent(line)
 880             # yield lines
 881             if self.strip:
 882                 continue
 883             # do not comment blank lines preceding a code block
 884             if line.rstrip():
 885                 yield self.comment_string + line
 886             else:
 887                 if self.state == "code_block":
 888                     yield line
 889                 else:
 890                     yield self.comment_string.rstrip() + line
 891
 892
 893 # .. _Text2Code.code_block_handler():
 894 #
 895 # code_block_handler()
 896 # ~~~~~~~~~~~~~~~~~~~~
 897 #
 898 # The "code_block" handler is called with an indented literal block. It
 899 # removes leading whitespace up to the indentation of the first code line in
 900 # the file (this deviation from Docutils behaviour allows indented blocks of
 901 # Python code). ::
 902
 903     def code_block_handler(self, block):
 904         """Convert indented literal blocks to source code format
 905         """
 906
 907 # If still unset, determine the indentation of code blocks from first non-blank
 908 # code line::
 909
 910         if self._codeindent == 0:
 911             self._codeindent = self.get_indent(block[0])
 912
 913 # Yield unindented lines after check whether we can safely unindent. If the
 914 # line is less indented then `_codeindent`, something got wrong. ::
 915
 916         for line in block:
 917             if line.lstrip() and self.get_indent(line) < self._codeindent:
 918                 raise ValueError("code block contains line less indented than"
 919                                  " %d spaces \n%r"%(self._codeindent, block))
 920             yield line.replace(" "*self._codeindent, "", 1)
 921
 922
 923 # Code2Text
 924 # ---------
 925 #
 926 # The `Code2Text` converter does the opposite of `Text2Code`_ -- it processes
 927 # a source in "code format" (i.e. in a programming language), extracts
 928 # documentation from comment blocks, and puts program code in literal blocks.
 929 #
 930 # The class inherits the interface and helper functions from
 931 # TextCodeConverter_ and adds functions specific to the text-to-code  format
 932 # conversion::
 933
 934 class Code2Text(TextCodeConverter):
 935     """Convert code source to text source
 936     """
 937
 938 # set_state()
 939 # ~~~~~~~~~~~
 940 #
 941 # Check if block is "header", "documentation", or "code_block":
 942 #
 943 # A paragraph is "documentation", if every non-blank line starts with a
 944 # matching comment string (including whitespace except for commented blank
 945 # lines) ::
 946
 947     def set_state(self, block):
 948         """Determine state of `block`."""
 949         for line in block:
 950             # skip documentation lines (commented, blank or blank comment)
 951             if (line.startswith(self.comment_string)
 952                 or not line.rstrip()
 953                 or line.rstrip() == self.comment_string.rstrip()):
 954                 continue
 955             # non-commented line found:
 956             if self.state == "":
 957                 self.state = "header"
 958             else:
 959                 self.state = "code_block"
 960             break
 961         else:
 962             # no code line found
 963             # keep state if the block is just a blank line
 964             # if len(block) == 1 and self._is_blank_codeline(line):
 965             #     return
 966             self.state = "documentation"
 967
 968
 969 # header_handler()
 970 # ~~~~~~~~~~~~~~~~
 971 #
 972 # Handle a leading code block. (See `Text2Code.header_handler()`_ for a
 973 # discussion of the "header" state.) ::
 974
 975     def header_handler(self, lines):
 976         """Format leading code block"""
 977         if self.strip:
 978             return
 979         # get iterator over the lines that formats them as code-block
 980         lines = iter(self.code_block_handler(lines))
 981         # prepend header string to first line
 982         yield self.header_string + next(lines)
 983         # yield remaining lines
 984         for line in lines:
 985             yield line
 986
 987 # .. _Code2Text.documentation_handler():
 988 #
 989 # documentation_handler()
 990 # ~~~~~~~~~~~~~~~~~~~~~~~
 991 #
 992 # The *documentation state* handler converts a comment to a documentation
 993 # block by stripping the leading `comment string` from every line::
 994
 995     def documentation_handler(self, block):
 996         """Uncomment documentation blocks in source code
 997         """
 998
 999 # Strip comment strings::
1000
1001         lines = [self.uncomment_line(line) for line in block]
1002
1003 # If the code block is stripped, the literal marker would lead to an
1004 # error when the text is converted with Docutils. Strip it as well. ::
1005
1006         if self.strip or self.strip_marker:
1007             self.strip_code_block_marker(lines)
1008
1009 # Otherwise, check for the `code_block_marker`_ at the end of the
1010 # documentation block (skipping directive options that might follow it)::
1011
1012         elif self.add_missing_marker:
1013             for line in lines[::-1]:
1014                 if self.marker_regexp.search(line):
1015                     self._add_code_block_marker = False
1016                     break
1017                 if (line.rstrip()
1018                     and not self.directive_option_regexp.search(line)):
1019                     self._add_code_block_marker = True
1020                     break
1021             else:
1022                 self._add_code_block_marker = True
1023
1024 # Yield lines::
1025
1026         for line in lines:
1027             yield line
1028
1029
1030 # uncomment_line()
1031 # ~~~~~~~~~~~~~~~~
1032 #
1033 # Return documentation line after stripping comment string. Consider the
1034 # case that a blank line has a comment string without trailing whitespace::
1035
1036     def uncomment_line(self, line):
1037         """Return uncommented documentation line"""
1038         line = line.replace(self.comment_string, "", 1)
1039         if line.rstrip() == self.stripped_comment_string:
1040             line = line.replace(self.stripped_comment_string, "", 1)
1041         return line
1042
1043
1044 # .. _Code2Text.code_block_handler():
1045 #
1046 # code_block_handler()
1047 # ~~~~~~~~~~~~~~~~~~~~
1048 #
1049 # The `code_block` handler returns the code block as indented literal
1050 # block (or filters it, if ``self.strip == True``). The amount of the code
1051 # indentation is controlled by `self.codeindent` (default 2).  ::
1052
1053     def code_block_handler(self, lines):
1054         """Covert code blocks to text format (indent or strip)
1055         """
1056         if self.strip:
1057             return
1058         # eventually insert transition marker
1059         if self._add_code_block_marker:
1060             self.state = "documentation"
1061             yield self.code_block_marker + "\n"
1062             yield "\n"
1063             self._add_code_block_marker = False
1064             self.state = "code_block"
1065         for line in lines:
1066             if not line.rstrip():
1067                 yield line  # don't add indent to blank lines
1068             else:
1069                 yield " "*self.codeindent + line
1070
1071
1072 # strip_code_block_marker()
1073 # ~~~~~~~~~~~~~~~~~~~~~~~~~
1074 #
1075 # Replace the literal marker with the equivalent of Docutils replace rules
1076 #
1077 # * strip ``::``-line (and preceding blank line) if on a line on its own
1078 # * strip ``::`` if it is preceded by whitespace.
1079 # * convert ``::`` to a single colon if preceded by text
1080 #
1081 # `lines` is a list of documentation lines (with a trailing blank line).
1082 # It is modified in-place::
1083
1084     def strip_code_block_marker(self, lines):
1085         try:
1086             line = lines[-2]
1087         except IndexError:
1088             return  # just one line (no trailing blank line)
1089
1090         # match with regexp: `match` is None or has groups
1091         # \1 leading text, \2 code_block_marker, \3 remainder
1092         match = self.marker_regexp.search(line)
1093
1094         if not match:                 # no code_block_marker present
1095             return
1096         if not match.group(1):        # `code_block_marker` on an extra line
1097             del(lines[-2])
1098             # delete preceding line if it is blank
1099             if len(lines) >= 2 and not lines[-2].lstrip():
1100                 del(lines[-2])
1101         elif match.group(1).rstrip() < match.group(1):
1102             # '::' follows whitespace
1103             lines[-2] = match.group(1).rstrip() + match.group(3)
1104         else:                         # '::' follows text
1105             lines[-2] = match.group(1).rstrip() + ':' + match.group(3)
1106
1107
1108 # Filters
1109 # =======
1110 #
1111 # Filters allow pre- and post-processing of the data to bring it in a format
1112 # suitable for the "normal" text<->code conversion. An example is conversion
1113 # of `C` ``/*`` ``*/`` comments into C++ ``//`` comments (and back).
1114 # Another example is the conversion of `C` ``/*`` ``*/`` comments into C++
1115 # ``//`` comments (and back).
1116 #
1117 # Filters are generator functions that return an iterator acting on a
1118 # `data` iterable and yielding processed `data` lines.
1119 #
1120 #
1121 # identity_filter()
1122 # -----------------
1123 #
1124 # The most basic filter is the identity filter, that returns its argument as
1125 # iterator::
1126
1127 def identity_filter(data):
1128     """Return data iterator without any processing"""
1129     return iter(data)
1130
1131
1132 # expandtabs_filter()
1133 # -------------------
1134 #
1135 # Expand hard-tabs in every line of `data` (cf. `str.expandtabs`).
1136 #
1137 # This filter is applied to the input data by `TextCodeConverter.convert`_ as
1138 # hard tabs can lead to errors when the indentation is changed. ::
1139
1140 def expandtabs_filter(data):
1141     """Yield data tokens with hard-tabs expanded"""
1142     for line in data:
1143         yield line.expandtabs()
1144
1145
1146 # collect_blocks()
1147 # ----------------
1148 #
1149 # A filter to aggregate "paragraphs" (blocks separated by blank
1150 # lines). Yields lists of lines::
1151
1152 def collect_blocks(lines):
1153     """collect lines in a list
1154
1155     yield list for each paragraph, i.e. block of lines separated by a
1156     blank line (whitespace only).
1157
1158     Trailing blank lines are collected as well.
1159     """
1160     blank_line_reached = False
1161     block = []
1162     for line in lines:
1163         if blank_line_reached and line.rstrip():
1164             yield block
1165             blank_line_reached = False
1166             block = [line]
1167             continue
1168         if not line.rstrip():
1169             blank_line_reached = True
1170         block.append(line)
1171     yield block
1172
1173
1174 # dumb_c_preprocessor()
1175 # ---------------------
1176 #
1177 # This is a basic filter to convert `C` to `C++` comments. Works line-wise and
1178 # only converts lines that
1179 #
1180 # * start with "/\* " and end with " \*/" (followed by whitespace only)
1181 #
1182 # A more sophisticated version would also
1183 #
1184 # * convert multi-line comments
1185 #
1186 #   + Keep indentation or strip 3 leading spaces?
1187 #
1188 # * account for nested comments
1189 #
1190 # * only convert comments that are separated from code by a blank line
1191 #
1192 # ::
1193
1194 def dumb_c_preprocessor(data):
1195     """change `C` ``/* `` `` */`` comments into C++ ``// `` comments"""
1196     comment_string = defaults.comment_strings["c++"]
1197     boc_string = "/* "
1198     eoc_string = " */"
1199     for line in data:
1200         if (line.startswith(boc_string)
1201             and line.rstrip().endswith(eoc_string)):
1202             line = line.replace(boc_string, comment_string, 1)
1203             line = "".join(line.rsplit(eoc_string, 1))
1204         yield line
1205
1206 # Unfortunately, the `replace` method of strings does not support negative
1207 # numbers for the `count` argument:
1208 #
1209 #   >>> "foo */ baz */ bar".replace(" */", "", -1) == "foo */ baz bar"
1210 #   False
1211 #
1212 # However, there is the `rsplit` method, that can be used together with `join`:
1213 #
1214 #   >>> "".join("foo */ baz */ bar".rsplit(" */", 1)) == "foo */ baz bar"
1215 #   True
1216
1217
1218 # dumb_c_postprocessor()
1219 # ----------------------
1220 #
1221 # Undo the preparations by the dumb_c_preprocessor and re-insert valid comment
1222 # delimiters ::
1223
1224 def dumb_c_postprocessor(data):
1225     """change C++ ``// `` comments into `C` ``/* `` `` */`` comments"""
1226     comment_string = defaults.comment_strings["c++"]
1227     boc_string = "/* "
1228     eoc_string = " */"
1229     for line in data:
1230         if line.rstrip() == comment_string.rstrip():
1231             line = line.replace(comment_string, "", 1)
1232         elif line.startswith(comment_string):
1233             line = line.replace(comment_string, boc_string, 1)
1234             line = line.rstrip() + eoc_string + "\n"
1235         yield line
1236
1237
1238 # register filters
1239 # ----------------
1240 #
1241 # ::
1242
1243 defaults.preprocessors['c2text'] = dumb_c_preprocessor
1244 defaults.preprocessors['css2text'] = dumb_c_preprocessor
1245 defaults.postprocessors['text2c'] = dumb_c_postprocessor
1246 defaults.postprocessors['text2css'] = dumb_c_postprocessor
1247
1248
1249 # Command line use
1250 # ================
1251 #
1252 # Using this script from the command line will convert a file according to its
1253 # extension. This default can be overridden by a couple of options.
1254 #
1255 # Dual source handling
1256 # --------------------
1257 #
1258 # How to determine which source is up-to-date?
1259 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1260 #
1261 # - `Set the modification time`_ of `outfile` to the one of `infile` to
1262 #   indicate that the source files are 'synchronised'.
1263 #
1264 #   * Are there problems to expect from "backdating" a file? Which?
1265 #
1266 #     Looking at http://www.unix.com/showthread.php?t=20526, it seems
1267 #     perfectly legal to set `mtime` (while leaving `ctime`) as `mtime` is a
1268 #     description of the "actuality" of the data in the file.
1269 #
1270 # - alternatively move input file to a backup copy (with option: `--replace`)
1271 #
1272 # - check modification date before overwriting
1273 #   (with option: `--overwrite=update`)
1274 #
1275 # - check modification date before editing (implemented as `Jed editor`_
1276 #   function `pylit_check()` in `pylit.sl`_)
1277 #
1278 # .. _Jed editor: http://www.jedsoft.org/jed/
1279 # .. _pylit.sl: http://jedmodes.sourceforge.net/mode/pylit/
1280 #
1281 # Recognised Filename Extensions
1282 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1283 #
1284 # Instead of defining a new extension for "pylit" literate programs,
1285 # by default ``.txt`` will be appended for the text source and stripped by
1286 # the conversion to the code source. I.e. for a Python program foo:
1287 #
1288 # * the code source is called ``foo.py``
1289 # * the text source is called ``foo.py.txt``
1290 # * the html rendering is called ``foo.py.html``
1291 #
1292 #
1293 # PylitOptions
1294 # ------------
1295 #
1296 # The `PylitOptions` class comprises an option parser and methods for
1297 # completion of command line arguments from defaults and context::
1298
1299 class PylitOptions(object):
1300     """Storage and handling of command line arguments for pylit"""
1301
1302
1303 # __init__()
1304 # ~~~~~~~~~~
1305 #
1306 # ::
1307
1308     def __init__(self):
1309         """Set up an `OptionParser` instance for pylit command line arguments
1310         """
1311         p = argparse.ArgumentParser(usage=main.__doc__)
1312
1313 # .. _command line arguments:
1314 #
1315 # Positional arguments (I/O):
1316 #
1317 # The "infile" argument is required unless there is a value in the
1318 # `namespace` passed to `p.parse_args()`.
1319 # We need to cheat here, because this behaviour is not supported by
1320 # `argparse` (cf. `issue 29670`_).
1321 #
1322 # The default value is set to `argparse.SUPPRESS` to prevent overwriting an
1323 # existing value in the `namespace` (cf. `issue 28734`_). ::
1324
1325         p.add_argument('infile', metavar='INFILE',
1326                        nargs='?', default=argparse.SUPPRESS,
1327                        help='input file ("-" for stdin)')
1328         p.add_argument('outfile', metavar='OUTFILE',
1329                        nargs='?', default=argparse.SUPPRESS,
1330                        help=u'output file, default: auto-determined')
1331
1332 # Conversion settings::
1333
1334         p.add_argument("-c", "--code2txt", action="store_false",
1335                        dest="txt2code",
1336                        help='convert code source to text')
1337         p.add_argument("-t", "--txt2code", action="store_true",
1338                        help='convert text source to code'
1339                             ' (default determined from input file name)')
1340         p.add_argument("--language",
1341                        choices=list(defaults.comment_strings.keys()),
1342                        help="use LANGUAGE's native comment style"
1343                             ' (default: determined from input file extension)')
1344         p.add_argument("--comment-string", dest="comment_string",
1345                        help="documentation block marker in code source "
1346                        "(including trailing whitespace, "
1347                        "default: language dependent)")
1348         p.add_argument("-m", "--code-block-marker", dest="code_block_marker",
1349                        help='syntax token starting a code block.'
1350                        ' (default "%s")' % defaults.code_block_markers.default)
1351         p.add_argument("--codeindent", type=int,
1352                        help='Number of spaces to indent code blocks with'
1353                             ' code2text (default %d)' % defaults.codeindent)
1354
1355 # Output file handling::
1356
1357         p.add_argument("--overwrite", action="store",
1358                        choices=['yes', 'no', 'update'],
1359                        help='overwrite output file (default "%s")'
1360                             % defaults.overwrite)
1361         p.add_argument("--replace", action="store_true",
1362                        help="move infile to a backup copy (appending '~')")
1363         # TODO: do we need this? If yes, make mtime update depend on it!
1364         # p.add_argument("--keep-mtime", action="store_true",
1365         #              help="do not set the modification time of the outfile "
1366         #              "to the corresponding value of the infile")
1367         p.add_argument("-s", "--strip", action="store_true",
1368                        help='"export" by stripping documentation or code')
1369
1370 # Actions::
1371
1372         p.add_argument("-d", "--diff", action="store_true",
1373                        help="test for differences to existing file")
1374         p.add_argument("--doctest", action="store_true",
1375                        help="run doctest.testfile() on the text version")
1376         p.add_argument("-e", "--execute", action="store_true",
1377                        help="execute code (Python only)")
1378         p.add_argument('-v', '--version', action='version',
1379                        version=__version__)
1380
1381         self.parser = p
1382
1383
1384 # .. _PylitOptions.complete_values():
1385 #
1386 # complete_values()
1387 # ~~~~~~~~~~~~~~~~~
1388 #
1389 # Complete an OptionValues instance `values`.  Use module-level defaults and
1390 # context information to set missing option values to sensible defaults (if
1391 # possible) ::
1392
1393     def complete_values(self, values):
1394         """complete option values with module and context sensible defaults
1395
1396         x.complete_values(values) -> values
1397         values -- OptionValues instance
1398         """
1399
1400 # The "infile" argument is required but may be pre-set in the `namespace`
1401 # passed to `p.parse_args()` (cf. `issue 29670`_::
1402
1403         try:
1404             values.infile
1405         except AttributeError:
1406             self.parser.error('the following argument is required: infile')
1407
1408 # Guess conversion direction from `infile` filename::
1409
1410         if getattr(values, 'txt2code', None) is None:
1411             in_extension = os.path.splitext(values.infile)[1]
1412             if in_extension in defaults.text_extensions:
1413                 values.txt2code = True
1414             elif in_extension in defaults.languages.keys():
1415                 values.txt2code = False
1416             else:
1417                 values.txt2code = None
1418
1419 # Auto-determine the output file name::
1420
1421         if not values.outfile:
1422             values.outfile = self._get_outfile_name(values)
1423
1424 # Second try: Guess conversion direction from outfile filename::
1425
1426         if values.txt2code is None:
1427             out_extension = os.path.splitext(values.outfile)[1]
1428             values.txt2code = not (out_extension in defaults.text_extensions)
1429
1430 # Set the language of the code::
1431
1432         if values.language is None:
1433             if values.txt2code is True:
1434                 code_extension = os.path.splitext(values.outfile)[1]
1435             elif values.txt2code is False:
1436                 code_extension = os.path.splitext(values.infile)[1]
1437             values.language = defaults.languages[code_extension]
1438
1439         return values
1440
1441
1442 # _get_outfile_name()
1443 # ~~~~~~~~~~~~~~~~~~~
1444 #
1445 # Construct a matching filename for the output file. The output filename is
1446 # constructed from `infile` by the following rules:
1447 #
1448 # * '-' (stdin) results in '-' (stdout)
1449 # * strip the `text_extension`_ (txt2code) or
1450 # * add the `text_extension`_ (code2txt)
1451 # * fallback: if no guess can be made, add ".out"
1452 #
1453 #   .. TODO: use values.outfile_extension if it exists?
1454 #
1455 # ::
1456
1457     def _get_outfile_name(self, values):
1458         """Return a matching output filename for `infile`
1459         """
1460         # if input is stdin, default output is stdout
1461         if values.infile == '-':
1462             return '-'
1463
1464         # Derive from `infile` name: strip or add text extension
1465         (base, ext) = os.path.splitext(values.infile)
1466         if ext in defaults.text_extensions:
1467             return base  # strip
1468         if ext and ext in defaults.languages or values.txt2code is False:
1469             return values.infile + defaults.text_extensions[0]  # add
1470         # give up
1471         return values.infile + ".out"
1472
1473
1474 # .. _PylitOptions.__call__():
1475 #
1476 # __call__()
1477 # ~~~~~~~~~~
1478 #
1479 # Use PylitOptions instances as *callables*: Calling a `PylitOptions` instance
1480 # parses the argument list to extract option values and completes them based
1481 # on "context-sensitive defaults".
1482 # Keyword arguments overwrite the `defaults`_
1483 # and are overwritten by command line arguments.
1484 #
1485 # Attention: passing a `namespace` to `ArgumentParser.parse_args()` has a
1486 # side-effect:
1487 #
1488 #   […] if you give an existing object, the option defaults will not be
1489 #   initialized on it
1490 #
1491 #   -- https://docs.python.org/dev/library/optparse.html#parsing-arguments
1492 #
1493 # .. The argument is renamed from `values` to `namespace` in Python 3.
1494 #    Positional argument defaults are initialized unless the default value
1495 #    `argparse.SUPPRESS` is specified.
1496 #
1497 # ::
1498
1499     def __call__(self, args=sys.argv[1:], **kwargs):
1500         """parse and complete command line args, return option values
1501         """
1502         settings = vars(defaults).copy()  # don't change global settings
1503         settings.update(kwargs)
1504         settings = argparse.Namespace(**settings)
1505
1506         settings = self.parser.parse_args(args, settings)
1507         settings = self.complete_values(settings)
1508         # print(f'{settings.outfile=}')
1509         # for k,v in vars(settings).items():
1510         #    print(k,v)
1511         return settings
1512
1513
1514 # Helper functions
1515 # ----------------
1516 #
1517 # open_streams()
1518 # ~~~~~~~~~~~~~~
1519 #
1520 # Return file objects for in- and output. If the input path is missing,
1521 # write usage and abort. (An alternative would be to use stdin as default.
1522 # However,  this leaves the uninitiated user with a non-responding application
1523 # if (s)he just tries the script without any arguments) ::
1524
1525 def open_streams(infile='-', outfile='-', overwrite='update', **keyw):
1526     """Open and return the input and output stream
1527
1528     open_streams(infile, outfile) -> (in_stream, out_stream)
1529
1530     in_stream   --  file(infile) or sys.stdin
1531     out_stream  --  file(outfile) or sys.stdout
1532     overwrite   --  'yes': overwrite eventually existing `outfile`,
1533                     'update': fail if the `outfile` is newer than `infile`,
1534                     'no': fail if `outfile` exists.
1535
1536                     Irrelevant if `outfile` == '-'.
1537     """
1538     if overwrite not in ('yes', 'no', 'update'):
1539         raise ValueError('Argument "overwrite" must be "yes", "no",'
1540                          ' or update, not "%s".' % overwrite)
1541     if not infile:
1542         strerror = "Missing input file name ('-' for stdin; -h for help)"
1543         raise IOError(2, strerror, infile)
1544     if infile == '-':
1545         in_stream = sys.stdin
1546     else:
1547         in_stream = open(infile, 'r')
1548     if outfile == '-':
1549         out_stream = sys.stdout
1550     elif overwrite == 'no' and os.path.exists(outfile):
1551         raise IOError(17, "Output file exists!", outfile)
1552     elif overwrite == 'update' and is_newer(outfile, infile) is None:
1553         print('Output file "%s" is as old as input file!\n'
1554               'Use "--overwrite=yes", if you want to overwrite it.' % outfile,
1555               file=sys.stderr)
1556         sys.exit()
1557     elif overwrite == 'update' and is_newer(outfile, infile):
1558         raise IOError(1, "Output file is newer than input file!", outfile)
1559     else:
1560         out_stream = open(outfile, 'w')
1561     return (in_stream, out_stream)
1562
1563
1564 # is_newer()
1565 # ~~~~~~~~~~
1566 # ::
1567
1568 def is_newer(path1, path2):
1569     """Check if `path1` is newer than `path2` (using mtime)
1570
1571     Compare modification time of files at path1 and path2.
1572
1573     Non-existing files are considered oldest: Return False if path1 does not
1574     exist and True if path2 does not exist.
1575
1576     Return None if the modification time differs less than 1/10 second.
1577     (This evaluates to False in a Boolean context but allows a test
1578     for equality.)
1579     """
1580     try:
1581         mtime1 = os.path.getmtime(path1)
1582     except OSError:
1583         mtime1 = -1
1584     try:
1585         mtime2 = os.path.getmtime(path2)
1586     except OSError:
1587         mtime2 = -1
1588     if abs(mtime1 - mtime2) < 0.1:
1589         return None
1590     return mtime1 > mtime2
1591
1592
1593 # get_converter()
1594 # ~~~~~~~~~~~~~~~
1595 #
1596 # Get an instance of the converter state machine::
1597
1598 def get_converter(data, txt2code=True, **keyw):
1599     if txt2code:
1600         return Text2Code(data, **keyw)
1601     else:
1602         return Code2Text(data, **keyw)
1603
1604
1605 # Actions
1606 # -------
1607 #
1608 # run_doctest()
1609 # ~~~~~~~~~~~~~
1610 # ::
1611
1612 def run_doctest(infile="-", txt2code=True,
1613                 globs={}, verbose=False, optionflags=0, **keyw):
1614     """run doctest on the text source
1615     """
1616
1617 # Allow imports from the current working dir by prepending an empty string to
1618 # sys.path (see doc of sys.path())::
1619
1620     sys.path.insert(0, '')
1621
1622 # Import classes from the doctest module::
1623
1624     from doctest import DocTestParser, DocTestRunner
1625
1626 # Read in source. Make sure it is in text format, as tests in comments are not
1627 # found by doctest::
1628
1629     (data, out_stream) = open_streams(infile, "-")
1630     if txt2code is False:
1631         keyw.update({'add_missing_marker': False})
1632         converter = Code2Text(data, **keyw)
1633         docstring = str(converter)
1634     else:
1635         docstring = data.read()
1636
1637 # decode doc string if there is a "magic comment" in the first or second line
1638 # (http://docs.python.org/reference/lexical_analysis.html#encoding-declarations)
1639 # ::
1640
1641     if sys.version_info < (3, 0):
1642         firstlines = ' '.join(docstring.splitlines()[:2])
1643         match = re.search(r'coding[=:]\s*([-\w.]+)', firstlines)
1644         if match:
1645             docencoding = match.group(1)
1646             docstring = docstring.decode(docencoding)
1647
1648 # Use the doctest Advanced API to run all doctests in the source text::
1649
1650     test = DocTestParser().get_doctest(docstring, globs, name="",
1651                                        filename=infile, lineno=0)
1652     runner = DocTestRunner(verbose, optionflags)
1653     runner.run(test)
1654     runner.summarize()
1655     # give feedback also if no failures occurred
1656     if not runner.failures:
1657         print("%d failures in %d tests"%(runner.failures, runner.tries))
1658     return runner.failures, runner.tries
1659
1660
1661 # diff()
1662 # ~~~~~~
1663 #
1664 # ::
1665
1666 def diff(infile='-', outfile='-', txt2code=True, **keyw):
1667     """Report differences between converted infile and existing outfile
1668
1669     If outfile does not exist or is '-', do a round-trip conversion and
1670     report differences.
1671     """
1672
1673     import difflib
1674
1675     instream = open(infile)
1676     # for diffing, we need a copy of the data as list::
1677     data = instream.readlines()
1678     # convert
1679     converter = get_converter(data, txt2code, **keyw)
1680     new = converter()
1681
1682     if outfile != '-' and os.path.exists(outfile):
1683         outstream = open(outfile)
1684         old = outstream.readlines()
1685         oldname = outfile
1686         newname = "<conversion of %s>"%infile
1687     else:
1688         old = data
1689         oldname = infile
1690         # back-convert the output data
1691         converter = get_converter(new, not txt2code)
1692         new = converter()
1693         newname = "<round-conversion of %s>"%infile
1694
1695     # find and print the differences
1696     is_different = False
1697     # print(type(old), old)
1698     # print(type(new), new)
1699     delta = difflib.unified_diff(old, new,
1700                                  fromfile=oldname, tofile=newname)
1701     for line in delta:
1702         is_different = True
1703         print(line, end=' ')
1704     if not is_different:
1705         print(oldname)
1706         print(newname)
1707         print("no differences found")
1708     return is_different
1709
1710
1711 # execute()
1712 # ~~~~~~~~~
1713 #
1714 # Works only for python code.
1715 #
1716 # Does not work with `eval`, as code is not just one expression. ::
1717
1718 def execute(infile="-", txt2code=True, **keyw):
1719     """Execute the input file. Convert first, if it is a text source.
1720     """
1721
1722     with open(infile) as f:
1723         data = f.readlines()
1724     if txt2code:
1725         data = str(Text2Code(data, **keyw))
1726     exec(''.join(data))
1727
1728
1729 # main()
1730 # ------
1731 #
1732 # If this script is called from the command line, the `main` function will
1733 # convert the input (file or stdin) between text and code formats.
1734 #
1735 # Setting values for the conversion can be given as keyword arguments to
1736 # `main()`_. The option defaults will be updated by command line arguments
1737 # and extended with "intelligent guesses" by `PylitOptions`_ and passed on to
1738 # helper functions and the converter instantiation.
1739 #
1740 # This allows easy customisation for programmatic use
1741 # -- just call `main` with the appropriate keyword options,
1742 # e.g., ``pylit.main(comment_string="## ")``. ::
1743
1744 def main(args=sys.argv[1:], **settings):
1745     """%(prog)s [options] INFILE [OUTFILE]
1746
1747     Convert between (reStructured) text source with embedded code,
1748     and code source with embedded documentation (comment blocks)
1749
1750     The special filename '-' stands for standard in- and output.
1751     """
1752
1753 # Parse and complete the options::
1754
1755     settings = PylitOptions()(args, **settings)
1756
1757 # Special actions with early return::
1758
1759     if settings.doctest:
1760         return run_doctest(**vars(settings).copy())
1761
1762     if settings.diff:
1763         return diff(**vars(settings).copy())
1764
1765     if settings.execute:
1766         return execute(**vars(settings).copy())
1767
1768 # Open in- and output streams::
1769
1770     try:
1771         (data, out_stream) = open_streams(**vars(settings).copy())
1772     except IOError as ex:
1773         print("IOError: %s %s" % (ex.filename, ex.strerror))
1774         sys.exit(ex.errno)
1775
1776 # Get a converter instance::
1777
1778     converter = get_converter(data, **vars(settings).copy())
1779
1780 # Convert and write to out_stream::
1781
1782     out_stream.write(str(converter))
1783
1784     if out_stream is not sys.stdout:
1785         print('Output written to %r' % out_stream.name)
1786         out_stream.close()
1787
1788 # If input and output are from files, _`set the modification time` (`mtime`)
1789 # of the output file to the one of the input file to indicate that the
1790 # contained information is equal. [#]_ ::
1791
1792         # print("fractions?", os.stat_float_times())
1793         try:
1794             os.utime(settings.outfile, (os.path.getatime(settings.outfile),
1795                                         os.path.getmtime(settings.infile)))
1796         except OSError:
1797             pass
1798
1799     ## print("mtime", os.path.getmtime(settings.infile),  settings.infile)
1800     ## print("mtime", os.path.getmtime(settings.outfile), settings.outfile)
1801
1802 # .. [#] Make sure the corresponding file object (here `out_stream`) is
1803 #        closed, as otherwise the change will be overwritten when `close` is
1804 #        called afterwards (either explicitly or at program exit).
1805 #
1806 #
1807 # Rename the infile to a backup copy if ``--replace`` is set::
1808
1809     if settings.replace:
1810         os.rename(settings.infile, settings.infile + "~")
1811
1812
1813 # Run main, if called from the command line::
1814
1815 if __name__ == '__main__':
1816     main()
1817
1818
1819 # Open questions
1820 # ==============
1821 #
1822 # Open questions and ideas for further development
1823 #
1824 # Clean code
1825 # ----------
1826 #
1827 # * can we gain from using "shutils" over "os.path" and "os"?
1828 # * use pylint or pyChecker to enforce a consistent style?
1829 #
1830 # Options
1831 # -------
1832 #
1833 # * Use templates for the "intelligent guesses" (with Python syntax for string
1834 #   replacement with dicts: ``"hello %(what)s" % {'what': 'world'}``)
1835 #
1836 # * Is it sensible to offer the `header_string` option also as command line
1837 #   option?
1838 #
1839 # treatment of blank lines
1840 # ------------------------
1841 #
1842 # Alternatives: Keep blank lines blank
1843 #
1844 # - "never" (current setting) -> "visually merges" all documentation
1845 #    if there is no interjacent code
1846 #
1847 # - "always" -> disrupts documentation blocks,
1848 #
1849 # - "if empty" (no whitespace). Comment if there is whitespace.
1850 #
1851 #   This would allow non-obstructing markup but unfortunately this is (in
1852 #   most editors) also non-visible markup.
1853 #
1854 # + "if double" (if there is more than one consecutive blank line)
1855 #
1856 #   With this handling, the "visual gap" remains in both, text and code
1857 #   source.
1858 #
1859 #
1860 # Parsing Problems
1861 # ----------------
1862 #
1863 # * Ignore "matching comments" in literal strings?
1864 #
1865 #   Too complicated: Would need a specific detection algorithm for every
1866 #   language that supports multi-line literal strings (C++, PHP, Python)
1867 #
1868 # * Warn if a comment in code will become documentation after round-trip?
1869 #
1870 #
1871 # docstrings in code blocks
1872 # -------------------------
1873 #
1874 # * How to handle docstrings in code blocks? (it would be nice to convert them
1875 #   to rst-text if ``__docformat__ == restructuredtext``)
1876 #
1877 # TODO: Ask at Docutils users|developers
1878 #
1879 # Plug-ins
1880 # --------
1881 #
1882 # Specify a path for user additions and plug-ins. This would require to
1883 # convert Pylit from a pure module to a package...
1884 #
1885 #   6.4.3 Packages in Multiple Directories
1886 #
1887 #   Packages support one more special attribute, __path__. This is initialized
1888 #   to be a list containing the name of the directory holding the package's
1889 #   __init__.py before the code in that file is executed. This
1890 #   variable can be modified; doing so affects future searches for modules and
1891 #   subpackages contained in the package.
1892 #
1893 #   While this feature is not often needed, it can be used to extend the set
1894 #   of modules found in a package.
1895 #
1896 #
1897 # .. References
1898 #
1899 # .. _Docutils: http://docutils.sourceforge.net/
1900 # .. _Sphinx: http://sphinx.pocoo.org
1901 # .. _Pygments: http://pygments.org/
1902 # .. _code-block directive:
1903 #     http://docutils.sourceforge.net/sandbox/code-block-directive/
1904 # .. _literal block:
1905 # .. _literal blocks:
1906 #     http://docutils.sf.net/docs/ref/rst/restructuredtext.html#literal-blocks
1907 # .. _indented literal block:
1908 # .. _indented literal blocks:
1909 #     http://docutils.sf.net/docs/ref/rst/restructuredtext.html#indented-literal-blocks
1910 # .. _quoted literal block:
1911 # .. _quoted literal blocks:
1912 #     http://docutils.sf.net/docs/ref/rst/restructuredtext.html#quoted-literal-blocks
1913 # .. _parsed-literal blocks:
1914 #     http://docutils.sf.net/docs/ref/rst/directives.html#parsed-literal-block
1915 # .. _doctest block:
1916 # .. _doctest blocks:
1917 #     http://docutils.sf.net/docs/ref/rst/restructuredtext.html#doctest-blocks
1918 # .. _issue 28734: https://bugs.python.org/issue28734
1919 # .. _issue 29670: https://bugs.python.org/issue29670
1920 # .. _argparse: https://docs.python.org/dev/library/argparse.html
1921 # .. _ArgumentParser.parse_args():
1922 #     https://docs.python.org/dev/library/argparse.html#the-parse-args-method