third_party/markupsafe/__init__.py

   1 # -*- coding: utf-8 -*-
   2 """
   3     markupsafe
   4     ~~~~~~~~~~
   5
   6     Implements a Markup string.
   7
   8     :copyright: (c) 2010 by Armin Ronacher.
   9     :license: BSD, see LICENSE for more details.
  10 """
  11 import re
  12 from markupsafe._compat import text_type, string_types, int_types, \
  13      unichr, PY2
  14
  15
  16 __all__ = ['Markup', 'soft_unicode', 'escape', 'escape_silent']
  17
  18
  19 _striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
  20 _entity_re = re.compile(r'&([^;]+);')
  21
  22
  23 class Markup(text_type):
  24     r"""Marks a string as being safe for inclusion in HTML/XML output without
  25     needing to be escaped.  This implements the `__html__` interface a couple
  26     of frameworks and web applications use.  :class:`Markup` is a direct
  27     subclass of `unicode` and provides all the methods of `unicode` just that
  28     it escapes arguments passed and always returns `Markup`.
  29
  30     The `escape` function returns markup objects so that double escaping can't
  31     happen.
  32
  33     The constructor of the :class:`Markup` class can be used for three
  34     different things:  When passed an unicode object it's assumed to be safe,
  35     when passed an object with an HTML representation (has an `__html__`
  36     method) that representation is used, otherwise the object passed is
  37     converted into a unicode string and then assumed to be safe:
  38
  39     >>> Markup("Hello <em>World</em>!")
  40     Markup(u'Hello <em>World</em>!')
  41     >>> class Foo(object):
  42     ...  def __html__(self):
  43     ...   return '<a href="#">foo</a>'
  44     ...
  45     >>> Markup(Foo())
  46     Markup(u'<a href="#">foo</a>')
  47
  48     If you want object passed being always treated as unsafe you can use the
  49     :meth:`escape` classmethod to create a :class:`Markup` object:
  50
  51     >>> Markup.escape("Hello <em>World</em>!")
  52     Markup(u'Hello &lt;em&gt;World&lt;/em&gt;!')
  53
  54     Operations on a markup string are markup aware which means that all
  55     arguments are passed through the :func:`escape` function:
  56
  57     >>> em = Markup("<em>%s</em>")
  58     >>> em % "foo & bar"
  59     Markup(u'<em>foo &amp; bar</em>')
  60     >>> strong = Markup("<strong>%(text)s</strong>")
  61     >>> strong % {'text': '<blink>hacker here</blink>'}
  62     Markup(u'<strong>&lt;blink&gt;hacker here&lt;/blink&gt;</strong>')
  63     >>> Markup("<em>Hello</em> ") + "<foo>"
  64     Markup(u'<em>Hello</em> &lt;foo&gt;')
  65     """
  66     __slots__ = ()
  67
  68     def __new__(cls, base=u'', encoding=None, errors='strict'):
  69         if hasattr(base, '__html__'):
  70             base = base.__html__()
  71         if encoding is None:
  72             return text_type.__new__(cls, base)
  73         return text_type.__new__(cls, base, encoding, errors)
  74
  75     def __html__(self):
  76         return self
  77
  78     def __add__(self, other):
  79         if isinstance(other, string_types) or hasattr(other, '__html__'):
  80             return self.__class__(super(Markup, self).__add__(self.escape(other)))
  81         return NotImplemented
  82
  83     def __radd__(self, other):
  84         if hasattr(other, '__html__') or isinstance(other, string_types):
  85             return self.escape(other).__add__(self)
  86         return NotImplemented
  87
  88     def __mul__(self, num):
  89         if isinstance(num, int_types):
  90             return self.__class__(text_type.__mul__(self, num))
  91         return NotImplemented
  92     __rmul__ = __mul__
  93
  94     def __mod__(self, arg):
  95         if isinstance(arg, tuple):
  96             arg = tuple(_MarkupEscapeHelper(x, self.escape) for x in arg)
  97         else:
  98             arg = _MarkupEscapeHelper(arg, self.escape)
  99         return self.__class__(text_type.__mod__(self, arg))
 100
 101     def __repr__(self):
 102         return '%s(%s)' % (
 103             self.__class__.__name__,
 104             text_type.__repr__(self)
 105         )
 106
 107     def join(self, seq):
 108         return self.__class__(text_type.join(self, map(self.escape, seq)))
 109     join.__doc__ = text_type.join.__doc__
 110
 111     def split(self, *args, **kwargs):
 112         return list(map(self.__class__, text_type.split(self, *args, **kwargs)))
 113     split.__doc__ = text_type.split.__doc__
 114
 115     def rsplit(self, *args, **kwargs):
 116         return list(map(self.__class__, text_type.rsplit(self, *args, **kwargs)))
 117     rsplit.__doc__ = text_type.rsplit.__doc__
 118
 119     def splitlines(self, *args, **kwargs):
 120         return list(map(self.__class__, text_type.splitlines(self, *args, **kwargs)))
 121     splitlines.__doc__ = text_type.splitlines.__doc__
 122
 123     def unescape(self):
 124         r"""Unescape markup again into an text_type string.  This also resolves
 125         known HTML4 and XHTML entities:
 126
 127         >>> Markup("Main &raquo; <em>About</em>").unescape()
 128         u'Main \xbb <em>About</em>'
 129         """
 130         from markupsafe._constants import HTML_ENTITIES
 131         def handle_match(m):
 132             name = m.group(1)
 133             if name in HTML_ENTITIES:
 134                 return unichr(HTML_ENTITIES[name])
 135             try:
 136                 if name[:2] in ('#x', '#X'):
 137                     return unichr(int(name[2:], 16))
 138                 elif name.startswith('#'):
 139                     return unichr(int(name[1:]))
 140             except ValueError:
 141                 pass
 142             return u''
 143         return _entity_re.sub(handle_match, text_type(self))
 144
 145     def striptags(self):
 146         r"""Unescape markup into an text_type string and strip all tags.  This
 147         also resolves known HTML4 and XHTML entities.  Whitespace is
 148         normalized to one:
 149
 150         >>> Markup("Main &raquo;  <em>About</em>").striptags()
 151         u'Main \xbb About'
 152         """
 153         stripped = u' '.join(_striptags_re.sub('', self).split())
 154         return Markup(stripped).unescape()
 155
 156     @classmethod
 157     def escape(cls, s):
 158         """Escape the string.  Works like :func:`escape` with the difference
 159         that for subclasses of :class:`Markup` this function would return the
 160         correct subclass.
 161         """
 162         rv = escape(s)
 163         if rv.__class__ is not cls:
 164             return cls(rv)
 165         return rv
 166
 167     def make_wrapper(name):
 168         orig = getattr(text_type, name)
 169         def func(self, *args, **kwargs):
 170             args = _escape_argspec(list(args), enumerate(args), self.escape)
 171             #_escape_argspec(kwargs, kwargs.iteritems(), None)
 172             return self.__class__(orig(self, *args, **kwargs))
 173         func.__name__ = orig.__name__
 174         func.__doc__ = orig.__doc__
 175         return func
 176
 177     for method in '__getitem__', 'capitalize', \
 178                   'title', 'lower', 'upper', 'replace', 'ljust', \
 179                   'rjust', 'lstrip', 'rstrip', 'center', 'strip', \
 180                   'translate', 'expandtabs', 'swapcase', 'zfill':
 181         locals()[method] = make_wrapper(method)
 182
 183     # new in python 2.5
 184     if hasattr(text_type, 'partition'):
 185         def partition(self, sep):
 186             return tuple(map(self.__class__,
 187                              text_type.partition(self, self.escape(sep))))
 188         def rpartition(self, sep):
 189             return tuple(map(self.__class__,
 190                              text_type.rpartition(self, self.escape(sep))))
 191
 192     # new in python 2.6
 193     if hasattr(text_type, 'format'):
 194         format = make_wrapper('format')
 195
 196     # not in python 3
 197     if hasattr(text_type, '__getslice__'):
 198         __getslice__ = make_wrapper('__getslice__')
 199
 200     del method, make_wrapper
 201
 202
 203 def _escape_argspec(obj, iterable, escape):
 204     """Helper for various string-wrapped functions."""
 205     for key, value in iterable:
 206         if hasattr(value, '__html__') or isinstance(value, string_types):
 207             obj[key] = escape(value)
 208     return obj
 209
 210
 211 class _MarkupEscapeHelper(object):
 212     """Helper for Markup.__mod__"""
 213
 214     def __init__(self, obj, escape):
 215         self.obj = obj
 216         self.escape = escape
 217
 218     __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x], s.escape)
 219     __unicode__ = __str__ = lambda s: text_type(s.escape(s.obj))
 220     __repr__ = lambda s: str(s.escape(repr(s.obj)))
 221     __int__ = lambda s: int(s.obj)
 222     __float__ = lambda s: float(s.obj)
 223
 224
 225 # we have to import it down here as the speedups and native
 226 # modules imports the markup type which is define above.
 227 try:
 228     from markupsafe._speedups import escape, escape_silent, soft_unicode
 229 except ImportError:
 230     from markupsafe._native import escape, escape_silent, soft_unicode
 231
 232 if not PY2:
 233     soft_str = soft_unicode
 234     __all__.append('soft_str')