Prepare move from optparse to argparse.
[pylit.git] / pylit.py
blob605a6f21e8a567d934c5321989e3286039d57930
1 #!/usr/bin/env python
2 # -*- coding: utf8 -*-
4 """pylit: bidirectional text <-> code converter
6 Covert between a *text source* with embedded computer code
7 and a *code source* with embedded documentation.
8 """
10 from __future__ import print_function
12 # pylit.py
13 # ********
14 # Literate programming with reStructuredText
15 # ++++++++++++++++++++++++++++++++++++++++++
17 # :Copyright: © 2005, 2007, 2015, 2021 Günter Milde.
18 # Released without warranty under the terms of the
19 # GNU General Public License (v. 3 or later)
21 # .. contents::
23 # Frontmatter
24 # ===========
26 # Changelog
27 # ---------
29 # .. class:: borderless
31 # ====== ========== ==========================================================
32 # 0.1 2005-06-29 Initial version.
33 # 0.1.1 2005-06-30 First literate version.
34 # 0.1.2 2005-07-01 Object oriented script using generators.
35 # 0.1.3 2005-07-10 Two state machine (later added 'header' state).
36 # 0.2b 2006-12-04 Start of work on version 0.2 (code restructuring).
37 # 0.2 2007-01-23 Published at ``pylit.berlios.de``.
38 # 0.2.1 2007-01-25 Outsourced non-core documentation to the PyLit pages.
39 # 0.2.2 2007-01-26 New behaviour of `diff` function.
40 # 0.2.3 2007-01-29 New `header` methods after suggestion by Riccardo Murri.
41 # 0.2.4 2007-01-31 Raise Error if code indent is too small.
42 # 0.2.5 2007-02-05 New command line option --comment-string.
43 # 0.2.6 2007-02-09 Add section with open questions,
44 # Code2Text: let only blank lines (no comment str)
45 # separate text and code,
46 # fix `Code2Text.header`.
47 # 0.2.7 2007-02-19 Simplify `Code2Text.header`,
48 # new `iter_strip` method replacing a lot of ``if``-s.
49 # 0.2.8 2007-02-22 Set `mtime` of outfile to the one of infile.
50 # 0.3 2007-02-27 New `Code2Text` converter after an idea by Riccardo Murri,
51 # explicit `option_defaults` dict for easier customisation.
52 # 0.3.1 2007-03-02 Expand hard-tabs to prevent errors in indentation,
53 # `Text2Code` now also works on blocks,
54 # removed dependency on SimpleStates module.
55 # 0.3.2 2007-03-06 Bug fix: do not set `language` in `option_defaults`
56 # renamed `code_languages` to `languages`.
57 # 0.3.3 2007-03-16 New language css,
58 # option_defaults -> defaults = optparse.Values(),
59 # simpler PylitOptions: don't store parsed values,
60 # don't parse at initialisation,
61 # OptionValues: return `None` for non-existing attributes,
62 # removed -infile and -outfile, use positional arguments.
63 # 0.3.4 2007-03-19 Documentation update,
64 # separate `execute` function.
65 # 2007-03-21 Code cleanup in `Text2Code.__iter__`.
66 # 0.3.5 2007-03-23 Removed "css" from known languages after learning that
67 # there is no C++ style "// " comment string in CSS2.
68 # 0.3.6 2007-04-24 Documentation update.
69 # 0.4 2007-05-18 Implement Converter.__iter__ as stack of iterator
70 # generators. Iterating over a converter instance now
71 # yields lines instead of blocks.
72 # Provide "hooks" for pre- and postprocessing filters.
73 # Rename states to reduce confusion with formats:
74 # "text" -> "documentation", "code" -> "code_block".
75 # 0.4.1 2007-05-22 Converter.__iter__: cleanup and reorganisation,
76 # rename parent class Converter -> TextCodeConverter.
77 # 0.4.2 2007-05-23 Merged Text2Code.converter and Code2Text.converter into
78 # TextCodeConverter.converter.
79 # 0.4.3 2007-05-30 Replaced use of defaults.code_extensions with
80 # values.languages.keys().
81 # Removed spurious `print` statement in code_block_handler.
82 # Added basic support for 'c' and 'css' languages
83 # with `dumb_c_preprocessor`_ and `dumb_c_postprocessor`_.
84 # 0.5 2007-06-06 Moved `collect_blocks`_ out of `TextCodeConverter`_,
85 # bug fix: collect all trailing blank lines into a block.
86 # Expand tabs with `expandtabs_filter`_.
87 # 0.6 2007-06-20 Configurable code-block marker (default ``::``)
88 # 0.6.1 2007-06-28 Bug fix: reset self.code_block_marker_missing.
89 # 0.7 2007-12-12 prepending an empty string to sys.path in run_doctest()
90 # to allow imports from the current working dir.
91 # 0.7.1 2008-01-07 If outfile does not exist, do a round-trip conversion
92 # and report differences (as with outfile=='-').
93 # 0.7.2 2008-01-28 Do not add missing code-block separators with
94 # `doctest_run` on the code source. Keeps lines consistent.
95 # 0.7.3 2008-04-07 Use value of code_block_marker for insertion of missing
96 # transition marker in Code2Text.code_block_handler
97 # Add "shell" to defaults.languages
98 # 0.7.4 2008-06-23 Add "latex" to defaults.languages
99 # 0.7.5 2009-05-14 Bugfix: ignore blank lines in test for end of code block
100 # 0.7.6 2009-12-15 language-dependent code-block markers (after a
101 # feature request and patch by `jrioux`),
102 # use DefaultDict for language-dependent defaults,
103 # new defaults setting `add_missing_marker`_.
104 # 0.7.7 2010-06-23 New command line option --codeindent.
105 # 0.7.8 2011-03-30 Do not overwrite custom `add_missing_marker` value,
106 # allow directive options following the 'code' directive.
107 # 0.7.9 2011-04-05 Decode doctest string if 'magic comment' gives encoding.
108 # 0.7.10 2013-06-07 Add "lua" to defaults.languages
109 # 0.7.11 2020-10-10 Return 0, if input and output file are of same age.
110 # 0.8.0 unpublishd Fix ``--execute`` behaviour and tests.
111 # Change default `codeindent` to 2.
112 # ====== ========== ==========================================================
114 # ::
116 __version__ = "0.8.0dev"
118 __docformat__ = 'restructuredtext'
121 # Introduction
122 # ------------
124 # PyLit is a bidirectional converter between two formats of a computer
125 # program source:
127 # * a (reStructured) text document with program code embedded in
128 # *code blocks*, and
129 # * a compilable (or executable) code source with *documentation*
130 # embedded in comment blocks
133 # Requirements
134 # ------------
136 # ::
138 #import argparse
139 import optparse
140 import os
141 import re
142 import sys
145 # DefaultDict
146 # ~~~~~~~~~~~
148 # As `collections.defaultdict` adds key/value pairs when the default
149 # constructor is called, we define an alternative that does not mutate the
150 # dict as side-effect. ::
152 class DefaultDict(dict):
153 """Dictionary with default value."""
155 default = 'python'
157 def __missing__(self, key):
158 # cf. file:///usr/share/doc/python3/html/library/stdtypes.html#dict
159 return self.default
162 # defaults
163 # ========
165 # The `defaults` object provides a central repository for default
166 # values and their customisation. ::
168 defaults = optparse.Values()
170 # It is used for
172 # * the initialisation of data arguments in TextCodeConverter_ and
173 # PylitOptions_
175 # * completion of command line options in `PylitOptions.complete_values`_.
177 # This allows the easy creation of back-ends that customise the
178 # defaults and then call `main`_ e.g.:
180 # >>> import pylit
181 # >>> pylit.defaults.comment_string = "## "
182 # >>> pylit.defaults.codeindent = 4
183 # >>> pylit.main()
184 # 0 failures in 0 tests
185 # (0, 0)
187 # The following default values are defined in pylit.py:
189 # languages
190 # ---------
192 # Mapping of code file extensions to code language::
194 defaults.languages = DefaultDict({".c": "c",
195 ".cc": "c++",
196 ".css": "css",
197 ".lua": "lua",
198 ".py": "python",
199 ".sh": "shell",
200 ".sl": "slang",
201 ".sty": "latex",
202 ".tex": "latex"
204 defaults.languages.default = 'python'
206 # The result can be overridden by the ``--language`` command line option.
208 # The fallback language, used if there is no matching extension (e.g. if pylit
209 # is used as filter) and no ``--language`` is specified is ``"python"``.
210 # It can be changed programmatically by changing the ``.default``
211 # attribute, e.g.
213 # >>> pylit.defaults.languages['.parrot']
214 # 'python'
215 # >>> pylit.defaults.languages.default = 'c++'
216 # >>> pylit.defaults.languages['.camel']
217 # 'c++'
219 # .. _text_extension:
221 # text_extensions
222 # ---------------
224 # List of known extensions of (reStructured) text files. The first
225 # extension in this list is used by the `_get_outfile_name`_ method to
226 # generate a text output filename::
228 defaults.text_extensions = [".txt", ".rst"]
231 # comment_strings
232 # ---------------
234 # Comment strings for known languages. Used in Code2Text_ to recognise
235 # text blocks and in Text2Code_ to format text blocks as comments.
236 # Defaults to ``'# '``.
238 # **Comment strings include trailing whitespace.** ::
240 defaults.comment_strings = DefaultDict({"css": '// ',
241 "c": '// ',
242 "c++": '// ',
243 "lua": '-- ',
244 "latex": '% ',
245 "python": '# ',
246 "shell": '# ',
247 "slang": '% '
249 defaults.comment_strings.default = '# '
251 # header_string
252 # -------------
254 # Marker string for a header code block in the text source. No trailing
255 # whitespace needed as indented code follows.
256 # Must be a valid rst directive that accepts code on the same line, e.g.
257 # ``'..admonition::'``.
259 # Default is a comment marker::
261 defaults.header_string = '..'
264 # .. _code_block_marker:
266 # code_block_markers
267 # ------------------
269 # Markup at the end of a documentation block.
270 # Default is Docutils' marker for a `literal block`_::
272 defaults.code_block_markers = DefaultDict()
273 defaults.code_block_markers.default = '::'
275 # The `code_block_marker` string is `inserted into a regular expression`_.
276 # Language-specific markers can be defined programmatically, e.g. in a
277 # wrapper script.
279 # In a document where code examples are only one of several uses of
280 # literal blocks, it is more appropriate to single out the source code
281 # ,e.g. with the double colon at a separate line ("expanded form")
283 # ``defaults.code_block_marker.default = ':: *'``
285 # or a dedicated ``.. code-block::`` directive [#]_
287 # ``defaults.code_block_marker['c++'] = '.. code-block:: *c++'``
289 # The latter form also allows code in different languages kept together
290 # in one literate source file.
292 # .. [#] The ``.. code-block::`` directive is not (yet) supported by
293 # standard Docutils. It is provided by several add-ons, including
294 # the `code-block directive`_ project in the Docutils Sandbox and
295 # Sphinx_.
298 # strip
299 # -----
301 # Export to the output format stripping documentation or code blocks::
303 defaults.strip = False
305 # strip_marker
306 # ------------
308 # Strip literal marker from the end of documentation blocks when
309 # converting to code format. Makes the code more concise but looses the
310 # synchronisation of line numbers in text and code formats. Can also be used
311 # (together with the auto-completion of the code-text conversion) to change
312 # the `code_block_marker`::
314 defaults.strip_marker = False
316 # add_missing_marker
317 # ------------------
319 # When converting from code format to text format, add a `code_block_marker`
320 # at the end of documentation blocks if it is missing::
322 defaults.add_missing_marker = True
324 # Keep this at ``True``, if you want to re-convert to code format later!
327 # .. _defaults.preprocessors:
329 # preprocessors
330 # -------------
332 # Preprocess the data with language-specific filters_
333 # Set below in Filters_::
335 defaults.preprocessors = {}
337 # .. _defaults.postprocessors:
339 # postprocessors
340 # --------------
342 # Postprocess the data with language-specific filters_::
344 defaults.postprocessors = {}
346 # .. _defaults.codeindent:
348 # codeindent
349 # ----------
351 # Number of spaces to indent code blocks in `Code2Text.code_block_handler`_::
353 defaults.codeindent = 2
355 # In `Text2Code.code_block_handler`_, the codeindent is determined by the
356 # first recognised code line (header or first indented literal block
357 # of the text source).
359 # overwrite
360 # ---------
362 # What to do if the outfile already exists? (ignored if `outfile` == '-')::
364 defaults.overwrite = 'update'
366 # Recognised values:
368 # :'yes': overwrite eventually existing `outfile`,
369 # :'update': fail if the `outfile` is newer than `infile`,
370 # :'no': fail if `outfile` exists.
373 # Extensions
374 # ==========
376 # Try to import optional extensions::
378 try:
379 import pylit_elisp
380 except ImportError:
381 pass
384 # Converter Classes
385 # =================
387 # The converter classes implement a simple state machine to separate and
388 # transform documentation and code blocks. For this task, only a very limited
389 # parsing is needed. PyLit's parser assumes:
391 # * `indented literal blocks`_ in a text source are code blocks.
393 # * comment blocks in a code source where every line starts with a matching
394 # comment string are documentation blocks.
396 # TextCodeConverter
397 # -----------------
398 # ::
400 class TextCodeConverter(object):
401 """Parent class for the converters `Text2Code` and `Code2Text`.
404 # The parent class defines data attributes and functions used in both
405 # `Text2Code`_ converting a text source to executable code source, and
406 # `Code2Text`_ converting commented code to a text source.
408 # Data attributes
409 # ~~~~~~~~~~~~~~~
411 # Class default values are fetched from the `defaults`_ object and can be
412 # overridden by matching keyword arguments during class instantiation. This
413 # also works with keyword arguments to `get_converter`_ and `main`_, as these
414 # functions pass on unused keyword args to the instantiation of a converter
415 # class. ::
417 language = defaults.languages[None]
418 comment_strings = defaults.comment_strings
419 comment_string = "" # set in __init__ (if empty)
420 codeindent = defaults.codeindent
421 header_string = defaults.header_string
422 code_block_markers = defaults.code_block_markers
423 code_block_marker = "" # set in __init__ (if empty)
424 strip = defaults.strip
425 strip_marker = defaults.strip_marker
426 add_missing_marker = defaults.add_missing_marker
427 directive_option_regexp = re.compile(r' +:(\w|[-._+:])+:( |$)')
428 state = "" # type of current block, see `TextCodeConverter.convert`_
430 # Interface methods
431 # ~~~~~~~~~~~~~~~~~
433 # .. _TextCodeConverter.__init__:
435 # __init__
436 # """"""""
438 # Initialising sets the `data` attribute, an iterable object yielding lines of
439 # the source to convert. [#]_
441 # .. [#] The most common choice of data is a `file` object with the text
442 # or code source.
444 # To convert a string into a suitable object, use its splitlines()
445 # method like ``"2 lines\nof source".splitlines(True)``.
448 # Additional keyword arguments are stored as instance variables,
449 # overwriting the class defaults::
451 def __init__(self, data, **keyw):
452 """data -- iterable data object
453 (list, file, generator, string, ...)
454 **keyw -- remaining keyword arguments are
455 stored as data-attributes
457 self.data = data
458 self.__dict__.update(keyw)
460 # If empty, `code_block_marker` and `comment_string` are set according
461 # to the `language`::
463 if not self.code_block_marker:
464 self.code_block_marker = self.code_block_markers[self.language]
465 if not self.comment_string:
466 self.comment_string = self.comment_strings[self.language]
467 self.stripped_comment_string = self.comment_string.rstrip()
469 # Pre- and postprocessing filters are set (with
470 # `TextCodeConverter.get_filter`_)::
472 self.preprocessor = self.get_filter("preprocessors", self.language)
473 self.postprocessor = self.get_filter("postprocessors", self.language)
475 # .. _inserted into a regular expression:
477 # Finally, a regular_expression for the `code_block_marker` is compiled
478 # to find valid cases of `code_block_marker` in a given line and return
479 # the groups: ``\1 prefix, \2 code_block_marker, \3 remainder`` ::
481 marker = self.code_block_marker
482 if marker == '::':
483 # the default marker may occur at the end of a text line
484 self.marker_regexp = re.compile('^( *(?!\.\.).*)(::)([ \n]*)$')
485 else:
486 # marker must be on a separate line
487 self.marker_regexp = re.compile('^( *)(%s)(.*\n?)$' % marker)
489 # .. _TextCodeConverter.__iter__:
491 # __iter__
492 # """"""""
494 # Return an iterator for the instance. Iteration yields lines of converted
495 # data.
497 # The iterator is a chain of iterators acting on `self.data` that does
499 # * preprocessing
500 # * text<->code format conversion
501 # * postprocessing
503 # Pre- and postprocessing are only performed, if filters for the current
504 # language are registered in `defaults.preprocessors`_ and|or
505 # `defaults.postprocessors`_. The filters must accept an iterable as first
506 # argument and yield the processed input data line-wise.
507 # ::
509 def __iter__(self):
510 """Iterate over input data source and yield converted lines
512 return self.postprocessor(self.convert(self.preprocessor(self.data)))
515 # .. _TextCodeConverter.__call__:
517 # __call__
518 # """"""""
519 # The special `__call__` method allows the use of class instances as callable
520 # objects. It returns the converted data as list of lines::
522 def __call__(self):
523 """Iterate over state-machine and return results as list of lines"""
524 return [line for line in self]
527 # .. _TextCodeConverter.__str__:
529 # __str__
530 # """""""
531 # Return converted data as string::
533 def __str__(self):
534 return "".join(self())
537 # Helpers and convenience methods
538 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
540 # .. _TextCodeConverter.convert:
542 # convert
543 # """""""
545 # The `convert` method generates an iterator that does the actual code <-->
546 # text format conversion. The converted data is yielded line-wise and the
547 # instance's `status` argument indicates whether the current line is "header",
548 # "documentation", or "code_block"::
550 def convert(self, lines):
551 """Iterate over lines of a program document and convert
552 between "text" and "code" format
555 # Initialise internal data arguments. (Done here, so that every new iteration
556 # re-initialises them.)
558 # `state`
559 # the "type" of the currently processed block of lines. One of
561 # :"": initial state: check for header,
562 # :"header": leading code block: strip `header_string`,
563 # :"documentation": documentation part: comment out,
564 # :"code_block": literal blocks containing source code: unindent.
566 # ::
568 self.state = ""
570 # `_codeindent`
571 # * Do not confuse the internal attribute `_codeindent` with the configurable
572 # `codeindent` (without the leading underscore).
573 # * `_codeindent` is set in `Text2Code.code_block_handler`_ to the indent of
574 # first non-blank "code_block" line and stripped from all "code_block" lines
575 # in the text-to-code conversion,
576 # * `codeindent` is set in `__init__` to `defaults.codeindent`_ and added to
577 # "code_block" lines in the code-to-text conversion.
579 # ::
581 self._codeindent = 0
583 # `_textindent`
584 # * set by `Text2Code.documentation_handler`_ to the minimal indent of a
585 # documentation block,
586 # * used in `Text2Code.set_state`_ to find the end of a code block.
588 # ::
590 self._textindent = 0
592 # `_add_code_block_marker`
593 # If the last paragraph of a documentation block does not end with a
594 # code_block_marker_, it should be added (otherwise, the back-conversion
595 # fails.).
597 # `_add_code_block_marker` is set by `Code2Text.documentation_handler`_
598 # and evaluated by `Code2Text.code_block_handler`_, because the
599 # documentation_handler does not know whether the next block will be
600 # documentation (with no need for a code_block_marker) or a code block.
602 # ::
604 self._add_code_block_marker = False
608 # Determine the state of the block and convert with the matching "handler"::
610 for block in collect_blocks(expandtabs_filter(lines)):
611 try:
612 self.set_state(block)
613 except StopIteration:
614 return
615 for line in getattr(self, self.state+"_handler")(block):
616 yield line
619 # .. _TextCodeConverter.get_filter:
621 # get_filter
622 # """"""""""
623 # ::
625 def get_filter(self, filter_set, language):
626 """Return language specific filter"""
627 if self.__class__ == Text2Code:
628 key = "text2"+language
629 elif self.__class__ == Code2Text:
630 key = language+"2text"
631 else:
632 key = ""
633 try:
634 return getattr(defaults, filter_set)[key]
635 except (AttributeError, KeyError, TypeError):
636 # print("there is no %r filter in %r"%(key, filter_set))
637 pass
638 return identity_filter
641 # get_indent
642 # """"""""""
643 # Return the number of leading spaces in `line`::
645 def get_indent(self, line):
646 """Return the indentation of `string`.
648 return len(line) - len(line.lstrip())
651 # Text2Code
652 # ---------
654 # The `Text2Code` converter separates *code-blocks* [#]_ from *documentation*.
655 # Code blocks are unindented, documentation is commented (or filtered, if the
656 # ``strip`` option is True).
658 # .. [#] Only `indented literal blocks`_ are considered code-blocks. `quoted
659 # literal blocks`_, `parsed-literal blocks`_, and `doctest blocks`_ are
660 # treated as part of the documentation. This allows the inclusion of
661 # examples:
663 # >>> 23 + 3
664 # 26
666 # Mark that there is no double colon before the doctest block in the
667 # text source.
669 # The class inherits the interface and helper functions from
670 # TextCodeConverter_ and adds functions specific to the text-to-code format
671 # conversion::
673 class Text2Code(TextCodeConverter):
674 """Convert a (reStructured) text source to code source
677 # .. _Text2Code.set_state:
679 # set_state
680 # ~~~~~~~~~
681 # ::
683 def set_state(self, block):
684 """Determine state of `block`. Set `self.state`
687 # `set_state` is used inside an iteration. Hence, if we are out of data, a
688 # StopItertion exception should be raised::
690 if not block:
691 raise StopIteration
693 # The new state depends on the active state (from the last block) and
694 # features of the current block. It is either "header", "documentation", or
695 # "code_block".
697 # If the current state is "" (first block), check for
698 # the `header_string` indicating a leading code block::
700 if self.state == "":
701 # print("set state for %r"%block)
702 if block[0].startswith(self.header_string):
703 self.state = "header"
704 else:
705 self.state = "documentation"
707 # If the current state is "documentation", the next block is also
708 # documentation. The end of a documentation part is detected in the
709 # `Text2Code.documentation_handler`_::
711 # elif self.state == "documentation":
712 # self.state = "documentation"
714 # A "code_block" ends with the first less indented, non-blank line.
715 # `_textindent` is set by the documentation handler to the indent of the
716 # preceding documentation block::
718 elif self.state in ["code_block", "header"]:
719 indents = [self.get_indent(line) for line in block
720 if line.rstrip()]
721 # print("set_state:", indents, self._textindent)
722 if indents and min(indents) <= self._textindent:
723 self.state = 'documentation'
724 else:
725 self.state = 'code_block'
727 # TODO: (or not to do?) insert blank line before the first line with too-small
728 # codeindent using self.ensure_trailing_blank_line(lines, line) (would need
729 # split and push-back of the documentation part)?
731 # .. _Text2Code.header_handler:
733 # header_handler
734 # ~~~~~~~~~~~~~~
736 # Sometimes code needs to remain on the first line(s) of the document to be
737 # valid. The most common example is the "shebang" line that tells a POSIX
738 # shell how to process an executable file::
740 #!/usr/bin/env python
742 # In Python, the special comment to indicate the encoding, e.g.
743 # ``# -*- coding: iso-8859-1 -*-``, must occur before any other comment
744 # or code too.
746 # If we want to keep the line numbers in sync for text and code source, the
747 # reStructured Text markup for these header lines must start at the same line
748 # as the first header line. Therefore, header lines could not be marked as
749 # literal block (this would require the ``::`` and an empty line above the
750 # code_block).
752 # OTOH, a comment may start at the same line as the comment marker and it
753 # includes subsequent indented lines. Comments are visible in the reStructured
754 # Text source but hidden in the pretty-printed output.
756 # With a header converted to comment in the text source, everything before
757 # the first documentation block (i.e. before the first paragraph using the
758 # matching comment string) will be hidden away (in HTML or PDF output).
760 # This seems a good compromise, the advantages
762 # * line numbers are kept
763 # * the "normal" code_block conversion rules (indent/unindent by `codeindent` apply
764 # * greater flexibility: you can hide a repeating header in a project
765 # consisting of many source files.
767 # set off the disadvantages
769 # - it may come as surprise if a part of the file is not "printed",
770 # - one more syntax element to learn for rst newbies to start with pylit,
771 # (however, starting from the code source, this will be auto-generated)
773 # In the case that there is no matching comment at all, the complete code
774 # source will become a comment -- however, in this case it is not very likely
775 # the source is a literate document anyway.
777 # If needed for the documentation, it is possible to quote the header in (or
778 # after) the first documentation block, e.g. as `parsed literal`.
779 # ::
781 def header_handler(self, lines):
782 """Format leading code block"""
783 # strip header string from first line
784 lines[0] = lines[0].replace(self.header_string, "", 1)
785 # yield remaining lines formatted as code-block
786 for line in self.code_block_handler(lines):
787 yield line
790 # .. _Text2Code.documentation_handler:
792 # documentation_handler
793 # ~~~~~~~~~~~~~~~~~~~~~
795 # The 'documentation' handler processes everything that is not recognised as
796 # "code_block". Documentation is quoted with `self.comment_string`
797 # (or filtered with `--strip=True`).
799 # If end-of-documentation marker is detected,
801 # * set state to 'code_block'
802 # * set `self._textindent` (needed by `Text2Code.set_state`_ to find the
803 # next "documentation" block)
805 # ::
807 def documentation_handler(self, lines):
808 """Convert documentation blocks from text to code format
810 for line in lines:
811 # test lines following the code-block marker for false positives
812 if (self.state == "code_block" and line.rstrip()
813 and not self.directive_option_regexp.search(line)):
814 self.state = "documentation"
815 # test for end of documentation block
816 if self.marker_regexp.search(line):
817 self.state = "code_block"
818 self._textindent = self.get_indent(line)
819 # yield lines
820 if self.strip:
821 continue
822 # do not comment blank lines preceding a code block
823 if line.rstrip():
824 yield self.comment_string + line
825 else:
826 if self.state == "code_block":
827 yield line
828 else:
829 yield self.comment_string.rstrip() + line
833 # .. _Text2Code.code_block_handler:
835 # code_block_handler
836 # ~~~~~~~~~~~~~~~~~~
838 # The "code_block" handler is called with an indented literal block. It
839 # removes leading whitespace up to the indentation of the first code line in
840 # the file (this deviation from Docutils behaviour allows indented blocks of
841 # Python code). ::
843 def code_block_handler(self, block):
844 """Convert indented literal blocks to source code format
847 # If still unset, determine the indentation of code blocks from first non-blank
848 # code line::
850 if self._codeindent == 0:
851 self._codeindent = self.get_indent(block[0])
853 # Yield unindented lines after check whether we can safely unindent. If the
854 # line is less indented then `_codeindent`, something got wrong. ::
856 for line in block:
857 if line.lstrip() and self.get_indent(line) < self._codeindent:
858 raise ValueError("code block contains line less indented "
859 "than %d spaces \n%r"%(self._codeindent, block))
860 yield line.replace(" "*self._codeindent, "", 1)
863 # Code2Text
864 # ---------
866 # The `Code2Text` converter does the opposite of `Text2Code`_ -- it processes
867 # a source in "code format" (i.e. in a programming language), extracts
868 # documentation from comment blocks, and puts program code in literal blocks.
870 # The class inherits the interface and helper functions from
871 # TextCodeConverter_ and adds functions specific to the text-to-code format
872 # conversion::
874 class Code2Text(TextCodeConverter):
875 """Convert code source to text source
878 # set_state
879 # ~~~~~~~~~
881 # Check if block is "header", "documentation", or "code_block":
883 # A paragraph is "documentation", if every non-blank line starts with a
884 # matching comment string (including whitespace except for commented blank
885 # lines) ::
887 def set_state(self, block):
888 """Determine state of `block`."""
889 for line in block:
890 # skip documentation lines (commented, blank or blank comment)
891 if (line.startswith(self.comment_string)
892 or not line.rstrip()
893 or line.rstrip() == self.comment_string.rstrip()
895 continue
896 # non-commented line found:
897 if self.state == "":
898 self.state = "header"
899 else:
900 self.state = "code_block"
901 break
902 else:
903 # no code line found
904 # keep state if the block is just a blank line
905 # if len(block) == 1 and self._is_blank_codeline(line):
906 # return
907 self.state = "documentation"
910 # header_handler
911 # ~~~~~~~~~~~~~~
913 # Handle a leading code block. (See `Text2Code.header_handler`_ for a
914 # discussion of the "header" state.) ::
916 def header_handler(self, lines):
917 """Format leading code block"""
918 if self.strip == True:
919 return
920 # get iterator over the lines that formats them as code-block
921 lines = iter(self.code_block_handler(lines))
922 # prepend header string to first line
923 yield self.header_string + next(lines)
924 # yield remaining lines
925 for line in lines:
926 yield line
928 # .. _Code2Text.documentation_handler:
930 # documentation_handler
931 # ~~~~~~~~~~~~~~~~~~~~~
933 # The *documentation state* handler converts a comment to a documentation
934 # block by stripping the leading `comment string` from every line::
936 def documentation_handler(self, block):
937 """Uncomment documentation blocks in source code
940 # Strip comment strings::
942 lines = [self.uncomment_line(line) for line in block]
944 # If the code block is stripped, the literal marker would lead to an
945 # error when the text is converted with Docutils. Strip it as well. ::
947 if self.strip or self.strip_marker:
948 self.strip_code_block_marker(lines)
950 # Otherwise, check for the `code_block_marker`_ at the end of the
951 # documentation block (skipping directive options that might follow it)::
953 elif self.add_missing_marker:
954 for line in lines[::-1]:
955 if self.marker_regexp.search(line):
956 self._add_code_block_marker = False
957 break
958 if (line.rstrip() and
959 not self.directive_option_regexp.search(line)):
960 self._add_code_block_marker = True
961 break
962 else:
963 self._add_code_block_marker = True
965 # Yield lines::
967 for line in lines:
968 yield line
970 # uncomment_line
971 # ~~~~~~~~~~~~~~
973 # Return documentation line after stripping comment string. Consider the
974 # case that a blank line has a comment string without trailing whitespace::
976 def uncomment_line(self, line):
977 """Return uncommented documentation line"""
978 line = line.replace(self.comment_string, "", 1)
979 if line.rstrip() == self.stripped_comment_string:
980 line = line.replace(self.stripped_comment_string, "", 1)
981 return line
983 # .. _Code2Text.code_block_handler:
985 # code_block_handler
986 # ~~~~~~~~~~~~~~~~~~
988 # The `code_block` handler returns the code block as indented literal
989 # block (or filters it, if ``self.strip == True``). The amount of the code
990 # indentation is controlled by `self.codeindent` (default 2). ::
992 def code_block_handler(self, lines):
993 """Covert code blocks to text format (indent or strip)
995 if self.strip == True:
996 return
997 # eventually insert transition marker
998 if self._add_code_block_marker:
999 self.state = "documentation"
1000 yield self.code_block_marker + "\n"
1001 yield "\n"
1002 self._add_code_block_marker = False
1003 self.state = "code_block"
1004 for line in lines:
1005 yield " "*self.codeindent + line
1009 # strip_code_block_marker
1010 # ~~~~~~~~~~~~~~~~~~~~~~~
1012 # Replace the literal marker with the equivalent of Docutils replace rules
1014 # * strip ``::``-line (and preceding blank line) if on a line on its own
1015 # * strip ``::`` if it is preceded by whitespace.
1016 # * convert ``::`` to a single colon if preceded by text
1018 # `lines` is a list of documentation lines (with a trailing blank line).
1019 # It is modified in-place::
1021 def strip_code_block_marker(self, lines):
1022 try:
1023 line = lines[-2]
1024 except IndexError:
1025 return # just one line (no trailing blank line)
1027 # match with regexp: `match` is None or has groups
1028 # \1 leading text, \2 code_block_marker, \3 remainder
1029 match = self.marker_regexp.search(line)
1031 if not match: # no code_block_marker present
1032 return
1033 if not match.group(1): # `code_block_marker` on an extra line
1034 del(lines[-2])
1035 # delete preceding line if it is blank
1036 if len(lines) >= 2 and not lines[-2].lstrip():
1037 del(lines[-2])
1038 elif match.group(1).rstrip() < match.group(1):
1039 # '::' follows whitespace
1040 lines[-2] = match.group(1).rstrip() + match.group(3)
1041 else: # '::' follows text
1042 lines[-2] = match.group(1).rstrip() + ':' + match.group(3)
1044 # Filters
1045 # =======
1047 # Filters allow pre- and post-processing of the data to bring it in a format
1048 # suitable for the "normal" text<->code conversion. An example is conversion
1049 # of `C` ``/*`` ``*/`` comments into C++ ``//`` comments (and back).
1050 # Another example is the conversion of `C` ``/*`` ``*/`` comments into C++
1051 # ``//`` comments (and back).
1053 # Filters are generator functions that return an iterator acting on a
1054 # `data` iterable and yielding processed `data` lines.
1056 # identity_filter
1057 # ---------------
1059 # The most basic filter is the identity filter, that returns its argument as
1060 # iterator::
1062 def identity_filter(data):
1063 """Return data iterator without any processing"""
1064 return iter(data)
1066 # expandtabs_filter
1067 # -----------------
1069 # Expand hard-tabs in every line of `data` (cf. `str.expandtabs`).
1071 # This filter is applied to the input data by `TextCodeConverter.convert`_ as
1072 # hard tabs can lead to errors when the indentation is changed. ::
1074 def expandtabs_filter(data):
1075 """Yield data tokens with hard-tabs expanded"""
1076 for line in data:
1077 yield line.expandtabs()
1080 # collect_blocks
1081 # --------------
1083 # A filter to aggregate "paragraphs" (blocks separated by blank
1084 # lines). Yields lists of lines::
1086 def collect_blocks(lines):
1087 """collect lines in a list
1089 yield list for each paragraph, i.e. block of lines separated by a
1090 blank line (whitespace only).
1092 Trailing blank lines are collected as well.
1094 blank_line_reached = False
1095 block = []
1096 for line in lines:
1097 if blank_line_reached and line.rstrip():
1098 yield block
1099 blank_line_reached = False
1100 block = [line]
1101 continue
1102 if not line.rstrip():
1103 blank_line_reached = True
1104 block.append(line)
1105 yield block
1109 # dumb_c_preprocessor
1110 # -------------------
1112 # This is a basic filter to convert `C` to `C++` comments. Works line-wise and
1113 # only converts lines that
1115 # * start with "/\* " and end with " \*/" (followed by whitespace only)
1117 # A more sophisticated version would also
1119 # * convert multi-line comments
1121 # + Keep indentation or strip 3 leading spaces?
1123 # * account for nested comments
1125 # * only convert comments that are separated from code by a blank line
1127 # ::
1129 def dumb_c_preprocessor(data):
1130 """change `C` ``/* `` `` */`` comments into C++ ``// `` comments"""
1131 comment_string = defaults.comment_strings["c++"]
1132 boc_string = "/* "
1133 eoc_string = " */"
1134 for line in data:
1135 if (line.startswith(boc_string)
1136 and line.rstrip().endswith(eoc_string)
1138 line = line.replace(boc_string, comment_string, 1)
1139 line = "".join(line.rsplit(eoc_string, 1))
1140 yield line
1142 # Unfortunately, the `replace` method of strings does not support negative
1143 # numbers for the `count` argument:
1145 # >>> "foo */ baz */ bar".replace(" */", "", -1) == "foo */ baz bar"
1146 # False
1148 # However, there is the `rsplit` method, that can be used together with `join`:
1150 # >>> "".join("foo */ baz */ bar".rsplit(" */", 1)) == "foo */ baz bar"
1151 # True
1153 # dumb_c_postprocessor
1154 # --------------------
1156 # Undo the preparations by the dumb_c_preprocessor and re-insert valid comment
1157 # delimiters ::
1159 def dumb_c_postprocessor(data):
1160 """change C++ ``// `` comments into `C` ``/* `` `` */`` comments"""
1161 comment_string = defaults.comment_strings["c++"]
1162 boc_string = "/* "
1163 eoc_string = " */"
1164 for line in data:
1165 if line.rstrip() == comment_string.rstrip():
1166 line = line.replace(comment_string, "", 1)
1167 elif line.startswith(comment_string):
1168 line = line.replace(comment_string, boc_string, 1)
1169 line = line.rstrip() + eoc_string + "\n"
1170 yield line
1173 # register filters
1174 # ----------------
1176 # ::
1178 defaults.preprocessors['c2text'] = dumb_c_preprocessor
1179 defaults.preprocessors['css2text'] = dumb_c_preprocessor
1180 defaults.postprocessors['text2c'] = dumb_c_postprocessor
1181 defaults.postprocessors['text2css'] = dumb_c_postprocessor
1184 # Command line use
1185 # ================
1187 # Using this script from the command line will convert a file according to its
1188 # extension. This default can be overridden by a couple of options.
1190 # Dual source handling
1191 # --------------------
1193 # How to determine which source is up-to-date?
1194 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1196 # - set modification date of `outfile` to the one of `infile`
1198 # Points out that the source files are 'synchronised'.
1200 # * Are there problems to expect from "backdating" a file? Which?
1202 # Looking at http://www.unix.com/showthread.php?t=20526, it seems
1203 # perfectly legal to set `mtime` (while leaving `ctime`) as `mtime` is a
1204 # description of the "actuality" of the data in the file.
1206 # * Should this become a default or an option?
1208 # - alternatively move input file to a backup copy (with option: `--replace`)
1210 # - check modification date before overwriting
1211 # (with option: `--overwrite=update`)
1213 # - check modification date before editing (implemented as `Jed editor`_
1214 # function `pylit_check()` in `pylit.sl`_)
1216 # .. _Jed editor: http://www.jedsoft.org/jed/
1217 # .. _pylit.sl: http://jedmodes.sourceforge.net/mode/pylit/
1219 # Recognised Filename Extensions
1220 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1222 # Instead of defining a new extension for "pylit" literate programs,
1223 # by default ``.txt`` will be appended for the text source and stripped by
1224 # the conversion to the code source. I.e. for a Python program foo:
1226 # * the code source is called ``foo.py``
1227 # * the text source is called ``foo.py.txt``
1228 # * the html rendering is called ``foo.py.html``
1231 # OptionValues
1232 # ------------
1234 # The following class adds `complete`_ and `__getattr__`_
1235 # methods to `optparse.Values`::
1237 class OptionValues(optparse.Values):
1239 # .. _OptionValues.complete:
1241 # complete
1242 # ~~~~~~~~
1244 # ::
1246 def complete(self, **keyw):
1248 Complete the data attributes from keyword arguments.
1250 Do not overwrite existing attributes.
1251 Drop keyword arguments that correspond to data attributes in `self`.
1253 for key, value in keyw.items():
1254 try:
1255 self.__dict__[key]
1256 except KeyError:
1257 setattr(self, key, value)
1259 # .. _OptionValues.__getattr__:
1261 # __getattr__
1262 # ~~~~~~~~~~~
1264 # To replace calls using ``<instance>.ensure_value("OPTION", None)`` with the
1265 # more concise ``<instance>.OPTION``, we define `__getattr__` [#]_ ::
1267 def __getattr__(self, name):
1268 """Return default value for non existing options"""
1269 return None
1272 # .. [#] The special method `__getattr__` is only called when an attribute
1273 # look-up has not found the attribute in the usual places (i.e. it is
1274 # not an instance attribute nor is it found in the class tree for
1275 # self).
1278 # PylitOptions
1279 # ------------
1281 # The `PylitOptions` class comprises an option parser and methods for parsing
1282 # and completion of command line options::
1284 class PylitOptions(object):
1285 """Storage and handling of command line options for pylit"""
1287 # Instantiation
1288 # ~~~~~~~~~~~~~
1290 # ::
1292 def __init__(self):
1293 """Set up an `OptionParser` instance for pylit command line options
1296 p = optparse.OptionParser(usage=main.__doc__, version=__version__)
1298 # Conversion settings
1300 p.add_option("-c", "--code2txt", dest="txt2code", action="store_false",
1301 help="convert code source to text source")
1302 p.add_option("-t", "--txt2code", action="store_true",
1303 help="convert text source to code source")
1304 p.add_option("--language",
1305 choices = list(defaults.comment_strings.keys()),
1306 help="use LANGUAGE native comment style")
1307 p.add_option("--comment-string", dest="comment_string",
1308 help="documentation block marker in code source "
1309 "(including trailing whitespace, "
1310 "default: language dependent)")
1311 p.add_option("-m", "--code-block-marker", dest="code_block_marker",
1312 help="syntax token starting a code block. (default '::')")
1313 p.add_option("--codeindent", type="int",
1314 help="Number of spaces to indent code blocks with "
1315 "code2text (default %d)" % defaults.codeindent)
1317 # Output file handling
1319 p.add_option("--overwrite", action="store",
1320 choices = ["yes", "update", "no"],
1321 help="overwrite output file (default 'update')")
1322 p.add_option("--replace", action="store_true",
1323 help="move infile to a backup copy (appending '~')")
1324 # TODO: do we need this? If yes, make mtime update depend on it!
1325 # p.add_option("--keep-mtime", action="store_true",
1326 # help="do not set the modification time of the outfile "
1327 # "to the corresponding value of the infile")
1328 p.add_option("-s", "--strip", action="store_true",
1329 help='"export" by stripping documentation or code')
1331 # Special actions
1333 p.add_option("-d", "--diff", action="store_true",
1334 help="test for differences to existing file")
1335 p.add_option("--doctest", action="store_true",
1336 help="run doctest.testfile() on the text version")
1337 p.add_option("-e", "--execute", action="store_true",
1338 help="execute code (Python only)")
1340 self.parser = p
1342 # .. _PylitOptions.parse_args:
1344 # parse_args
1345 # ~~~~~~~~~~
1347 # The `parse_args` method calls the `optparse.OptionParser` on command
1348 # line or provided args and returns the result as `PylitOptions.Values`
1349 # instance. Defaults can be provided as keyword arguments::
1351 def parse_args(self, args=sys.argv[1:], values=None):
1352 """Parse command line arguments using `optparse.OptionParser`.
1354 parse_args(args, **keyw) -> OptionValues instance
1356 args -- list of command line arguments.
1357 values -- object to store the option's values
1359 # parse arguments
1360 (values, args) = self.parser.parse_args(args, values)
1361 # Convert FILE and OUTFILE positional args to option values
1362 # (other positional arguments are ignored)
1363 try:
1364 values.infile = args[0]
1365 values.outfile = args[1]
1366 except IndexError:
1367 pass
1369 return values
1371 # .. _PylitOptions.complete_values:
1373 # complete_values
1374 # ~~~~~~~~~~~~~~~
1376 # Complete an OptionValues instance `values`. Use module-level defaults and
1377 # context information to set missing option values to sensible defaults (if
1378 # possible) ::
1380 def complete_values(self, values):
1381 """complete option values with module and context sensible defaults
1383 x.complete_values(values) -> values
1384 values -- OptionValues instance
1387 # Complete with module-level defaults_::
1389 values.complete(**vars(defaults).copy())
1391 # Ensure infile is a string::
1393 values.ensure_value("infile", "")
1395 # Guess conversion direction from `infile` filename::
1397 if getattr(values, 'txt2code', None) is None:
1398 in_extension = os.path.splitext(values.infile)[1]
1399 if in_extension in values.text_extensions:
1400 print('text extension %r found' % in_extension)
1401 values.txt2code = True
1402 elif in_extension in values.languages.keys():
1403 values.txt2code = False
1404 else:
1405 values.txt2code = None
1407 # Auto-determine the output file name::
1409 values.ensure_value("outfile", self._get_outfile_name(values))
1411 # Second try: Guess conversion direction from outfile filename::
1413 if values.txt2code is None:
1414 out_extension = os.path.splitext(values.outfile)[1]
1415 values.txt2code = not (out_extension in values.text_extensions)
1417 # Set the language of the code::
1419 if values.txt2code is True:
1420 code_extension = os.path.splitext(values.outfile)[1]
1421 elif values.txt2code is False:
1422 code_extension = os.path.splitext(values.infile)[1]
1423 values.ensure_value("language", values.languages[code_extension])
1425 return values
1427 # _get_outfile_name
1428 # ~~~~~~~~~~~~~~~~~
1430 # Construct a matching filename for the output file. The output filename is
1431 # constructed from `infile` by the following rules:
1433 # * '-' (stdin) results in '-' (stdout)
1434 # * strip the `text_extension`_ (txt2code) or
1435 # * add the `text_extension`_ (code2txt)
1436 # * fallback: if no guess can be made, add ".out"
1438 # .. TODO: use values.outfile_extension if it exists?
1440 # ::
1442 def _get_outfile_name(self, values):
1443 """Return a matching output filename for `infile`
1445 # if input is stdin, default output is stdout
1446 if values.infile == '-':
1447 return '-'
1449 # Derive from `infile` name: strip or add text extension
1450 (base, ext) = os.path.splitext(values.infile)
1451 if ext in values.text_extensions:
1452 return base # strip
1453 if ext and ext in values.languages or values.txt2code == False:
1454 return values.infile + values.text_extensions[0] # add
1455 # give up
1456 return values.infile + ".out"
1458 # .. _PylitOptions.__call__:
1460 # __call__
1461 # ~~~~~~~~
1463 # The special `__call__` method allows to use PylitOptions instances as
1464 # *callables*: Calling an instance parses the argument list to extract option
1465 # values and completes them based on "context-sensitive defaults". Keyword
1466 # arguments are used as default values. ::
1468 def __call__(self, args=sys.argv[1:], **keyw):
1469 """parse and complete command line args, return option values
1471 values = OptionValues(keyw)
1472 args = self.parse_args(args, values)
1473 return self.complete_values(args)
1476 # Helper functions
1477 # ----------------
1479 # open_streams
1480 # ~~~~~~~~~~~~
1482 # Return file objects for in- and output. If the input path is missing,
1483 # write usage and abort. (An alternative would be to use stdin as default.
1484 # However, this leaves the uninitiated user with a non-responding application
1485 # if (s)he just tries the script without any arguments) ::
1487 def open_streams(infile = '-', outfile = '-', overwrite='update', **keyw):
1488 """Open and return the input and output stream
1490 open_streams(infile, outfile) -> (in_stream, out_stream)
1492 in_stream -- file(infile) or sys.stdin
1493 out_stream -- file(outfile) or sys.stdout
1494 overwrite -- 'yes': overwrite eventually existing `outfile`,
1495 'update': fail if the `outfile` is newer than `infile`,
1496 'no': fail if `outfile` exists.
1498 Irrelevant if `outfile` == '-'.
1500 if overwrite not in ('yes', 'no', 'update'):
1501 raise ValueError('Argument "overwrite" must be "yes", "no",'
1502 ' or update, not "%s".' % overwrite)
1503 if not infile:
1504 strerror = "Missing input file name ('-' for stdin; -h for help)"
1505 raise IOError(2, strerror, infile)
1506 if infile == '-':
1507 in_stream = sys.stdin
1508 else:
1509 in_stream = open(infile, 'r')
1510 if outfile == '-':
1511 out_stream = sys.stdout
1512 elif overwrite == 'no' and os.path.exists(outfile):
1513 raise IOError(17, "Output file exists!", outfile)
1514 elif overwrite == 'update' and is_newer(outfile, infile) is None:
1515 raise IOError(0, "Output file is as old as input file!", outfile)
1516 elif overwrite == 'update' and is_newer(outfile, infile):
1517 raise IOError(1, "Output file is newer than input file!", outfile)
1518 else:
1519 out_stream = open(outfile, 'w')
1520 return (in_stream, out_stream)
1522 # is_newer
1523 # ~~~~~~~~
1525 # ::
1527 def is_newer(path1, path2):
1528 """Check if `path1` is newer than `path2` (using mtime)
1530 Compare modification time of files at path1 and path2.
1532 Non-existing files are considered oldest: Return False if path1 does not
1533 exist and True if path2 does not exist.
1535 Return None if the modification time differs less than 1/10 second.
1536 (This evaluates to False in a Boolean context but allows a test
1537 for equality.)
1539 try:
1540 mtime1 = os.path.getmtime(path1)
1541 except OSError:
1542 mtime1 = -1
1543 try:
1544 mtime2 = os.path.getmtime(path2)
1545 except OSError:
1546 mtime2 = -1
1547 if abs(mtime1 - mtime2) < 0.1:
1548 return None
1549 return mtime1 > mtime2
1552 # get_converter
1553 # ~~~~~~~~~~~~~
1555 # Get an instance of the converter state machine::
1557 def get_converter(data, txt2code=True, **keyw):
1558 if txt2code:
1559 return Text2Code(data, **keyw)
1560 else:
1561 return Code2Text(data, **keyw)
1564 # Use cases
1565 # ---------
1567 # run_doctest
1568 # ~~~~~~~~~~~
1569 # ::
1571 def run_doctest(infile="-", txt2code=True,
1572 globs={}, verbose=False, optionflags=0, **keyw):
1573 """run doctest on the text source
1576 # Allow imports from the current working dir by prepending an empty string to
1577 # sys.path (see doc of sys.path())::
1579 sys.path.insert(0, '')
1581 # Import classes from the doctest module::
1583 from doctest import DocTestParser, DocTestRunner
1585 # Read in source. Make sure it is in text format, as tests in comments are not
1586 # found by doctest::
1588 (data, out_stream) = open_streams(infile, "-")
1589 if txt2code is False:
1590 keyw.update({'add_missing_marker': False})
1591 converter = Code2Text(data, **keyw)
1592 docstring = str(converter)
1593 else:
1594 docstring = data.read()
1596 # decode doc string if there is a "magic comment" in the first or second line
1597 # (http://docs.python.org/reference/lexical_analysis.html#encoding-declarations)
1598 # ::
1600 if sys.version_info < (3,0):
1601 firstlines = ' '.join(docstring.splitlines()[:2])
1602 match = re.search('coding[=:]\s*([-\w.]+)', firstlines)
1603 if match:
1604 docencoding = match.group(1)
1605 docstring = docstring.decode(docencoding)
1607 # Use the doctest Advanced API to run all doctests in the source text::
1609 test = DocTestParser().get_doctest(docstring, globs, name="",
1610 filename=infile, lineno=0)
1611 runner = DocTestRunner(verbose, optionflags)
1612 runner.run(test)
1613 runner.summarize()
1614 # give feedback also if no failures occurred
1615 if not runner.failures:
1616 print("%d failures in %d tests"%(runner.failures, runner.tries))
1617 return runner.failures, runner.tries
1620 # diff
1621 # ~~~~
1623 # ::
1625 def diff(infile='-', outfile='-', txt2code=True, **keyw):
1626 """Report differences between converted infile and existing outfile
1628 If outfile does not exist or is '-', do a round-trip conversion and
1629 report differences.
1632 import difflib
1634 instream = open(infile)
1635 # for diffing, we need a copy of the data as list::
1636 data = instream.readlines()
1637 # convert
1638 converter = get_converter(data, txt2code, **keyw)
1639 new = converter()
1641 if outfile != '-' and os.path.exists(outfile):
1642 outstream = open(outfile)
1643 old = outstream.readlines()
1644 oldname = outfile
1645 newname = "<conversion of %s>"%infile
1646 else:
1647 old = data
1648 oldname = infile
1649 # back-convert the output data
1650 converter = get_converter(new, not txt2code)
1651 new = converter()
1652 newname = "<round-conversion of %s>"%infile
1654 # find and print the differences
1655 is_different = False
1656 # print(type(old), old)
1657 # print(type(new), new)
1658 delta = difflib.unified_diff(old, new,
1659 # delta = difflib.unified_diff(["heute\n", "schon\n"], ["heute\n", "noch\n"],
1660 fromfile=oldname, tofile=newname)
1661 for line in delta:
1662 is_different = True
1663 print(line, end=' ') #sys.stdout.write(line + ' ')
1664 if not is_different:
1665 print(oldname)
1666 print(newname)
1667 print("no differences found")
1668 return is_different
1671 # execute
1672 # ~~~~~~~
1674 # Works only for python code.
1676 # Does not work with `eval`, as code is not just one expression. ::
1678 def execute(infile="-", txt2code=True, **keyw):
1679 """Execute the input file. Convert first, if it is a text source.
1682 with open(infile) as f:
1683 data = f.readlines()
1684 if txt2code:
1685 data = str(Text2Code(data, **keyw))
1686 exec(''.join(data))
1689 # main
1690 # ----
1692 # If this script is called from the command line, the `main` function will
1693 # convert the input (file or stdin) between text and code formats.
1695 # Option default values for the conversion can be given as keyword arguments
1696 # to `main`_. The option defaults will be updated by command line options and
1697 # extended with "intelligent guesses" by `PylitOptions`_ and passed on to
1698 # helper functions and the converter instantiation.
1700 # This allows easy customisation for programmatic use -- just call `main`
1701 # with the appropriate keyword options, e.g. ``pylit.main(comment_string="## ")``
1703 # ::
1705 def main(args=sys.argv[1:], **defaults):
1706 """%prog [options] INFILE [OUTFILE]
1708 Convert between (reStructured) text source with embedded code,
1709 and code source with embedded documentation (comment blocks)
1711 The special filename '-' stands for standard in and output.
1714 # Parse and complete the options::
1716 options = PylitOptions()(args, **defaults)
1717 # print("infile", repr(options.infile))
1718 # print("doctest", repr(options.doctest))
1720 # Special actions with early return::
1722 if options.doctest:
1723 return run_doctest(**vars(options).copy())
1725 if options.diff:
1726 return diff(**vars(options).copy())
1728 if options.execute:
1729 return execute(**vars(options).copy())
1731 # Open in- and output streams::
1733 try:
1734 (data, out_stream) = open_streams(**vars(options).copy())
1735 except IOError as ex:
1736 print("IOError: %s %s" % (ex.filename, ex.strerror))
1737 sys.exit(ex.errno)
1739 # Get a converter instance::
1741 converter = get_converter(data, **vars(options).copy())
1743 # Convert and write to out_stream::
1745 out_stream.write(str(converter))
1747 if out_stream is not sys.stdout:
1748 print("output written to", out_stream.name)
1749 out_stream.close()
1751 # If input and output are from files, set the modification time (`mtime`) of
1752 # the output file to the one of the input file to indicate that the contained
1753 # information is equal. [#]_ ::
1756 # print("fractions?", os.stat_float_times())
1757 try:
1758 os.utime(options.outfile, (os.path.getatime(options.outfile),
1759 os.path.getmtime(options.infile))
1761 except OSError:
1762 pass
1764 ## print("mtime", os.path.getmtime(options.infile), options.infile)
1765 ## print("mtime", os.path.getmtime(options.outfile), options.outfile)
1768 # .. [#] Make sure the corresponding file object (here `out_stream`) is
1769 # closed, as otherwise the change will be overwritten when `close` is
1770 # called afterwards (either explicitly or at program exit).
1773 # Rename the infile to a backup copy if ``--replace`` is set::
1775 if options.replace:
1776 os.rename(options.infile, options.infile + "~")
1779 # Run main, if called from the command line::
1781 if __name__ == '__main__':
1782 main()
1785 # Open questions
1786 # ==============
1788 # Open questions and ideas for further development
1790 # Clean code
1791 # ----------
1793 # * can we gain from using "shutils" over "os.path" and "os"?
1794 # * use pylint or pyChecker to enforce a consistent style?
1796 # Options
1797 # -------
1799 # * Use templates for the "intelligent guesses" (with Python syntax for string
1800 # replacement with dicts: ``"hello %(what)s" % {'what': 'world'}``)
1802 # * Is it sensible to offer the `header_string` option also as command line
1803 # option?
1805 # treatment of blank lines
1806 # ------------------------
1808 # Alternatives: Keep blank lines blank
1810 # - "never" (current setting) -> "visually merges" all documentation
1811 # if there is no interjacent code
1813 # - "always" -> disrupts documentation blocks,
1815 # - "if empty" (no whitespace). Comment if there is whitespace.
1817 # This would allow non-obstructing markup but unfortunately this is (in
1818 # most editors) also non-visible markup.
1820 # + "if double" (if there is more than one consecutive blank line)
1822 # With this handling, the "visual gap" remains in both, text and code
1823 # source.
1826 # Parsing Problems
1827 # ----------------
1829 # * Ignore "matching comments" in literal strings?
1831 # Too complicated: Would need a specific detection algorithm for every
1832 # language that supports multi-line literal strings (C++, PHP, Python)
1834 # * Warn if a comment in code will become documentation after round-trip?
1837 # docstrings in code blocks
1838 # -------------------------
1840 # * How to handle docstrings in code blocks? (it would be nice to convert them
1841 # to rst-text if ``__docformat__ == restructuredtext``)
1843 # TODO: Ask at Docutils users|developers
1845 # Plug-ins
1846 # --------
1848 # Specify a path for user additions and plug-ins. This would require to
1849 # convert Pylit from a pure module to a package...
1851 # 6.4.3 Packages in Multiple Directories
1853 # Packages support one more special attribute, __path__. This is initialized
1854 # to be a list containing the name of the directory holding the package's
1855 # __init__.py before the code in that file is executed. This
1856 # variable can be modified; doing so affects future searches for modules and
1857 # subpackages contained in the package.
1859 # While this feature is not often needed, it can be used to extend the set
1860 # of modules found in a package.
1863 # .. References
1865 # .. _Docutils: http://docutils.sourceforge.net/
1866 # .. _Sphinx: http://sphinx.pocoo.org
1867 # .. _Pygments: http://pygments.org/
1868 # .. _code-block directive:
1869 # http://docutils.sourceforge.net/sandbox/code-block-directive/
1870 # .. _literal block:
1871 # .. _literal blocks:
1872 # http://docutils.sf.net/docs/ref/rst/restructuredtext.html#literal-blocks
1873 # .. _indented literal block:
1874 # .. _indented literal blocks:
1875 # http://docutils.sf.net/docs/ref/rst/restructuredtext.html#indented-literal-blocks
1876 # .. _quoted literal block:
1877 # .. _quoted literal blocks:
1878 # http://docutils.sf.net/docs/ref/rst/restructuredtext.html#quoted-literal-blocks
1879 # .. _parsed-literal blocks:
1880 # http://docutils.sf.net/docs/ref/rst/directives.html#parsed-literal-block
1881 # .. _doctest block:
1882 # .. _doctest blocks:
1883 # http://docutils.sf.net/docs/ref/rst/restructuredtext.html#doctest-blocks