1 # -*- coding: utf-8 -*-
6 Implements a Markup string.
8 :copyright: (c) 2010 by Armin Ronacher.
9 :license: BSD, see LICENSE for more details.
12 from markupsafe
._compat
import text_type
, string_types
, int_types
, \
16 __all__
= ['Markup', 'soft_unicode', 'escape', 'escape_silent']
19 _striptags_re
= re
.compile(r
'(<!--.*?-->|<[^>]*>)')
20 _entity_re
= re
.compile(r
'&([^;]+);')
23 class Markup(text_type
):
24 r
"""Marks a string as being safe for inclusion in HTML/XML output without
25 needing to be escaped. This implements the `__html__` interface a couple
26 of frameworks and web applications use. :class:`Markup` is a direct
27 subclass of `unicode` and provides all the methods of `unicode` just that
28 it escapes arguments passed and always returns `Markup`.
30 The `escape` function returns markup objects so that double escaping can't
33 The constructor of the :class:`Markup` class can be used for three
34 different things: When passed an unicode object it's assumed to be safe,
35 when passed an object with an HTML representation (has an `__html__`
36 method) that representation is used, otherwise the object passed is
37 converted into a unicode string and then assumed to be safe:
39 >>> Markup("Hello <em>World</em>!")
40 Markup(u'Hello <em>World</em>!')
41 >>> class Foo(object):
42 ... def __html__(self):
43 ... return '<a href="#">foo</a>'
46 Markup(u'<a href="#">foo</a>')
48 If you want object passed being always treated as unsafe you can use the
49 :meth:`escape` classmethod to create a :class:`Markup` object:
51 >>> Markup.escape("Hello <em>World</em>!")
52 Markup(u'Hello <em>World</em>!')
54 Operations on a markup string are markup aware which means that all
55 arguments are passed through the :func:`escape` function:
57 >>> em = Markup("<em>%s</em>")
59 Markup(u'<em>foo & bar</em>')
60 >>> strong = Markup("<strong>%(text)s</strong>")
61 >>> strong % {'text': '<blink>hacker here</blink>'}
62 Markup(u'<strong><blink>hacker here</blink></strong>')
63 >>> Markup("<em>Hello</em> ") + "<foo>"
64 Markup(u'<em>Hello</em> <foo>')
68 def __new__(cls
, base
=u
'', encoding
=None, errors
='strict'):
69 if hasattr(base
, '__html__'):
70 base
= base
.__html
__()
72 return text_type
.__new
__(cls
, base
)
73 return text_type
.__new
__(cls
, base
, encoding
, errors
)
78 def __add__(self
, other
):
79 if isinstance(other
, string_types
) or hasattr(other
, '__html__'):
80 return self
.__class
__(super(Markup
, self
).__add
__(self
.escape(other
)))
83 def __radd__(self
, other
):
84 if hasattr(other
, '__html__') or isinstance(other
, string_types
):
85 return self
.escape(other
).__add
__(self
)
88 def __mul__(self
, num
):
89 if isinstance(num
, int_types
):
90 return self
.__class
__(text_type
.__mul
__(self
, num
))
94 def __mod__(self
, arg
):
95 if isinstance(arg
, tuple):
96 arg
= tuple(_MarkupEscapeHelper(x
, self
.escape
) for x
in arg
)
98 arg
= _MarkupEscapeHelper(arg
, self
.escape
)
99 return self
.__class
__(text_type
.__mod
__(self
, arg
))
103 self
.__class
__.__name
__,
104 text_type
.__repr
__(self
)
108 return self
.__class
__(text_type
.join(self
, map(self
.escape
, seq
)))
109 join
.__doc
__ = text_type
.join
.__doc
__
111 def split(self
, *args
, **kwargs
):
112 return list(map(self
.__class
__, text_type
.split(self
, *args
, **kwargs
)))
113 split
.__doc
__ = text_type
.split
.__doc
__
115 def rsplit(self
, *args
, **kwargs
):
116 return list(map(self
.__class
__, text_type
.rsplit(self
, *args
, **kwargs
)))
117 rsplit
.__doc
__ = text_type
.rsplit
.__doc
__
119 def splitlines(self
, *args
, **kwargs
):
120 return list(map(self
.__class
__, text_type
.splitlines(self
, *args
, **kwargs
)))
121 splitlines
.__doc
__ = text_type
.splitlines
.__doc
__
124 r
"""Unescape markup again into an text_type string. This also resolves
125 known HTML4 and XHTML entities:
127 >>> Markup("Main » <em>About</em>").unescape()
128 u'Main \xbb <em>About</em>'
130 from markupsafe
._constants
import HTML_ENTITIES
133 if name
in HTML_ENTITIES
:
134 return unichr(HTML_ENTITIES
[name
])
136 if name
[:2] in ('#x', '#X'):
137 return unichr(int(name
[2:], 16))
138 elif name
.startswith('#'):
139 return unichr(int(name
[1:]))
143 return _entity_re
.sub(handle_match
, text_type(self
))
146 r
"""Unescape markup into an text_type string and strip all tags. This
147 also resolves known HTML4 and XHTML entities. Whitespace is
150 >>> Markup("Main » <em>About</em>").striptags()
153 stripped
= u
' '.join(_striptags_re
.sub('', self
).split())
154 return Markup(stripped
).unescape()
158 """Escape the string. Works like :func:`escape` with the difference
159 that for subclasses of :class:`Markup` this function would return the
163 if rv
.__class
__ is not cls
:
167 def make_wrapper(name
):
168 orig
= getattr(text_type
, name
)
169 def func(self
, *args
, **kwargs
):
170 args
= _escape_argspec(list(args
), enumerate(args
), self
.escape
)
171 #_escape_argspec(kwargs, kwargs.iteritems(), None)
172 return self
.__class
__(orig(self
, *args
, **kwargs
))
173 func
.__name
__ = orig
.__name
__
174 func
.__doc
__ = orig
.__doc
__
177 for method
in '__getitem__', 'capitalize', \
178 'title', 'lower', 'upper', 'replace', 'ljust', \
179 'rjust', 'lstrip', 'rstrip', 'center', 'strip', \
180 'translate', 'expandtabs', 'swapcase', 'zfill':
181 locals()[method
] = make_wrapper(method
)
184 if hasattr(text_type
, 'partition'):
185 def partition(self
, sep
):
186 return tuple(map(self
.__class
__,
187 text_type
.partition(self
, self
.escape(sep
))))
188 def rpartition(self
, sep
):
189 return tuple(map(self
.__class
__,
190 text_type
.rpartition(self
, self
.escape(sep
))))
193 if hasattr(text_type
, 'format'):
194 format
= make_wrapper('format')
197 if hasattr(text_type
, '__getslice__'):
198 __getslice__
= make_wrapper('__getslice__')
200 del method
, make_wrapper
203 def _escape_argspec(obj
, iterable
, escape
):
204 """Helper for various string-wrapped functions."""
205 for key
, value
in iterable
:
206 if hasattr(value
, '__html__') or isinstance(value
, string_types
):
207 obj
[key
] = escape(value
)
211 class _MarkupEscapeHelper(object):
212 """Helper for Markup.__mod__"""
214 def __init__(self
, obj
, escape
):
218 __getitem__
= lambda s
, x
: _MarkupEscapeHelper(s
.obj
[x
], s
.escape
)
219 __unicode__
= __str__
= lambda s
: text_type(s
.escape(s
.obj
))
220 __repr__
= lambda s
: str(s
.escape(repr(s
.obj
)))
221 __int__
= lambda s
: int(s
.obj
)
222 __float__
= lambda s
: float(s
.obj
)
225 # we have to import it down here as the speedups and native
226 # modules imports the markup type which is define above.
228 from markupsafe
._speedups
import escape
, escape_silent
, soft_unicode
230 from markupsafe
._native
import escape
, escape_silent
, soft_unicode
233 soft_str
= soft_unicode
234 __all__
.append('soft_str')