5 * This module implements functions for automatic escaping in C for better
8 * :copyright: (c) 2010 by Armin Ronacher.
14 #define ESCAPED_CHARS_TABLE_SIZE 63
15 #define UNICHR(x) (PyUnicode_AS_UNICODE((PyUnicodeObject*)PyUnicode_DecodeASCII(x, strlen(x), NULL)));
17 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
18 typedef int Py_ssize_t
;
19 #define PY_SSIZE_T_MAX INT_MAX
20 #define PY_SSIZE_T_MIN INT_MIN
24 static PyObject
* markup
;
25 static Py_ssize_t escaped_chars_delta_len
[ESCAPED_CHARS_TABLE_SIZE
];
26 static Py_UNICODE
*escaped_chars_repl
[ESCAPED_CHARS_TABLE_SIZE
];
32 /* happing of characters to replace */
33 escaped_chars_repl
['"'] = UNICHR(""");
34 escaped_chars_repl
['\''] = UNICHR("'");
35 escaped_chars_repl
['&'] = UNICHR("&");
36 escaped_chars_repl
['<'] = UNICHR("<");
37 escaped_chars_repl
['>'] = UNICHR(">");
39 /* lengths of those characters when replaced - 1 */
40 memset(escaped_chars_delta_len
, 0, sizeof (escaped_chars_delta_len
));
41 escaped_chars_delta_len
['"'] = escaped_chars_delta_len
['\''] = \
42 escaped_chars_delta_len
['&'] = 4;
43 escaped_chars_delta_len
['<'] = escaped_chars_delta_len
['>'] = 3;
45 /* import markup type so that we can mark the return value */
46 module
= PyImport_ImportModule("markupsafe");
49 markup
= PyObject_GetAttrString(module
, "Markup");
56 escape_unicode(PyUnicodeObject
*in
)
59 Py_UNICODE
*inp
= PyUnicode_AS_UNICODE(in
);
60 const Py_UNICODE
*inp_end
= PyUnicode_AS_UNICODE(in
) + PyUnicode_GET_SIZE(in
);
61 Py_UNICODE
*next_escp
;
63 Py_ssize_t delta
=0, erepl
=0, delta_len
=0;
65 /* First we need to figure out how long the escaped string will be */
66 while (*(inp
) || inp
< inp_end
) {
67 if (*inp
< ESCAPED_CHARS_TABLE_SIZE
) {
68 delta
+= escaped_chars_delta_len
[*inp
];
69 erepl
+= !!escaped_chars_delta_len
[*inp
];
74 /* Do we need to escape anything at all? */
80 out
= (PyUnicodeObject
*)PyUnicode_FromUnicode(NULL
, PyUnicode_GET_SIZE(in
) + delta
);
84 outp
= PyUnicode_AS_UNICODE(out
);
85 inp
= PyUnicode_AS_UNICODE(in
);
87 /* look for the next substitution */
89 while (next_escp
< inp_end
) {
90 if (*next_escp
< ESCAPED_CHARS_TABLE_SIZE
&&
91 (delta_len
= escaped_chars_delta_len
[*next_escp
])) {
98 if (next_escp
> inp
) {
99 /* copy unescaped chars between inp and next_escp */
100 Py_UNICODE_COPY(outp
, inp
, next_escp
-inp
);
101 outp
+= next_escp
- inp
;
104 /* escape 'next_escp' */
105 Py_UNICODE_COPY(outp
, escaped_chars_repl
[*next_escp
], delta_len
);
111 Py_UNICODE_COPY(outp
, inp
, PyUnicode_GET_SIZE(in
) - (inp
- PyUnicode_AS_UNICODE(in
)));
113 return (PyObject
*)out
;
118 escape(PyObject
*self
, PyObject
*text
)
120 PyObject
*s
= NULL
, *rv
= NULL
, *html
;
122 /* we don't have to escape integers, bools or floats */
123 if (PyLong_CheckExact(text
) ||
124 #if PY_MAJOR_VERSION < 3
125 PyInt_CheckExact(text
) ||
127 PyFloat_CheckExact(text
) || PyBool_Check(text
) ||
129 return PyObject_CallFunctionObjArgs(markup
, text
, NULL
);
131 /* if the object has an __html__ method that performs the escaping */
132 html
= PyObject_GetAttrString(text
, "__html__");
134 rv
= PyObject_CallObject(html
, NULL
);
139 /* otherwise make the object unicode if it isn't, then escape */
141 if (!PyUnicode_Check(text
)) {
142 #if PY_MAJOR_VERSION < 3
143 PyObject
*unicode
= PyObject_Unicode(text
);
145 PyObject
*unicode
= PyObject_Str(text
);
149 s
= escape_unicode((PyUnicodeObject
*)unicode
);
153 s
= escape_unicode((PyUnicodeObject
*)text
);
155 /* convert the unicode string into a markup object. */
156 rv
= PyObject_CallFunctionObjArgs(markup
, (PyObject
*)s
, NULL
);
163 escape_silent(PyObject
*self
, PyObject
*text
)
166 return escape(self
, text
);
167 return PyObject_CallFunctionObjArgs(markup
, NULL
);
172 soft_unicode(PyObject
*self
, PyObject
*s
)
174 if (!PyUnicode_Check(s
))
175 #if PY_MAJOR_VERSION < 3
176 return PyObject_Unicode(s
);
178 return PyObject_Str(s
);
185 static PyMethodDef module_methods
[] = {
186 {"escape", (PyCFunction
)escape
, METH_O
,
187 "escape(s) -> markup\n\n"
188 "Convert the characters &, <, >, ', and \" in string s to HTML-safe\n"
189 "sequences. Use this if you need to display text that might contain\n"
190 "such characters in HTML. Marks return value as markup string."},
191 {"escape_silent", (PyCFunction
)escape_silent
, METH_O
,
192 "escape_silent(s) -> markup\n\n"
193 "Like escape but converts None to an empty string."},
194 {"soft_unicode", (PyCFunction
)soft_unicode
, METH_O
,
195 "soft_unicode(object) -> string\n\n"
196 "Make a string unicode if it isn't already. That way a markup\n"
197 "string is not converted back to unicode."},
198 {NULL
, NULL
, 0, NULL
} /* Sentinel */
202 #if PY_MAJOR_VERSION < 3
204 #ifndef PyMODINIT_FUNC /* declarations for DLL import/export */
205 #define PyMODINIT_FUNC void
210 if (!init_constants())
213 Py_InitModule3("markupsafe._speedups", module_methods
, "");
216 #else /* Python 3.x module initialization */
218 static struct PyModuleDef module_definition
= {
219 PyModuleDef_HEAD_INIT
,
220 "markupsafe._speedups",
231 PyInit__speedups(void)
233 if (!init_constants())
236 return PyModule_Create(&module_definition
);