1 /* ------------------------------------------------------------------------
3 unicodedata -- Provides access to the Unicode 3.0 data base.
5 Data was extracted from the Unicode 3.0 UnicodeData.txt file.
7 Written by Marc-Andre Lemburg (mal@lemburg.com).
9 Copyright (c) Corporation for National Research Initiatives.
11 ------------------------------------------------------------------------ */
14 #include "unicodedatabase.h"
16 /* --- Helpers ------------------------------------------------------------ */
19 const _PyUnicode_DatabaseRecord
*unicode_db(register int i
)
21 register int page
= i
>> 12;
23 if (page
< sizeof(_PyUnicode_Database
))
24 return &_PyUnicode_Database
[page
][i
& 0x0fff];
25 return &_PyUnicode_Database
[0][0];
28 /* --- Module API --------------------------------------------------------- */
31 unicodedata_decimal(PyObject
*self
,
35 PyObject
*defobj
= NULL
;
38 if (!PyArg_ParseTuple(args
, "O!|O:decimal",
39 &PyUnicode_Type
, &v
, &defobj
))
41 if (PyUnicode_GET_SIZE(v
) != 1) {
42 PyErr_SetString(PyExc_TypeError
,
43 "need a single Unicode character as parameter");
46 rc
= Py_UNICODE_TODECIMAL(*PyUnicode_AS_UNICODE(v
));
49 PyErr_SetString(PyExc_ValueError
,
58 return PyInt_FromLong(rc
);
65 unicodedata_digit(PyObject
*self
,
69 PyObject
*defobj
= NULL
;
72 if (!PyArg_ParseTuple(args
, "O!|O:digit",
73 &PyUnicode_Type
, &v
, &defobj
))
75 if (PyUnicode_GET_SIZE(v
) != 1) {
76 PyErr_SetString(PyExc_TypeError
,
77 "need a single Unicode character as parameter");
80 rc
= Py_UNICODE_TODIGIT(*PyUnicode_AS_UNICODE(v
));
83 PyErr_SetString(PyExc_ValueError
,
92 return PyInt_FromLong(rc
);
99 unicodedata_numeric(PyObject
*self
,
103 PyObject
*defobj
= NULL
;
106 if (!PyArg_ParseTuple(args
, "O!|O:numeric",
107 &PyUnicode_Type
, &v
, &defobj
))
109 if (PyUnicode_GET_SIZE(v
) != 1) {
110 PyErr_SetString(PyExc_TypeError
,
111 "need a single Unicode character as parameter");
114 rc
= Py_UNICODE_TONUMERIC(*PyUnicode_AS_UNICODE(v
));
116 if (defobj
== NULL
) {
117 PyErr_SetString(PyExc_ValueError
,
118 "not a numeric character");
126 return PyFloat_FromDouble(rc
);
133 unicodedata_category(PyObject
*self
,
139 if (!PyArg_ParseTuple(args
, "O!:category",
140 &PyUnicode_Type
, &v
))
142 if (PyUnicode_GET_SIZE(v
) != 1) {
143 PyErr_SetString(PyExc_TypeError
,
144 "need a single Unicode character as parameter");
147 index
= (int)unicode_db((int)*PyUnicode_AS_UNICODE(v
))->category
;
149 index
> sizeof(_PyUnicode_CategoryNames
) /
150 sizeof(_PyUnicode_CategoryNames
[0])) {
151 PyErr_Format(PyExc_SystemError
,
152 "category index out of range: %i",
156 return PyString_FromString(_PyUnicode_CategoryNames
[index
]);
163 unicodedata_bidirectional(PyObject
*self
,
169 if (!PyArg_ParseTuple(args
, "O!:bidirectional",
170 &PyUnicode_Type
, &v
))
172 if (PyUnicode_GET_SIZE(v
) != 1) {
173 PyErr_SetString(PyExc_TypeError
,
174 "need a single Unicode character as parameter");
177 index
= (int)unicode_db((int)*PyUnicode_AS_UNICODE(v
))->bidirectional
;
179 index
> sizeof(_PyUnicode_CategoryNames
) /
180 sizeof(_PyUnicode_CategoryNames
[0])) {
181 PyErr_Format(PyExc_SystemError
,
182 "bidirectional index out of range: %i",
186 return PyString_FromString(_PyUnicode_BidirectionalNames
[index
]);
193 unicodedata_combining(PyObject
*self
,
199 if (!PyArg_ParseTuple(args
, "O!:combining",
200 &PyUnicode_Type
, &v
))
202 if (PyUnicode_GET_SIZE(v
) != 1) {
203 PyErr_SetString(PyExc_TypeError
,
204 "need a single Unicode character as parameter");
207 value
= (int)unicode_db((int)*PyUnicode_AS_UNICODE(v
))->combining
;
208 return PyInt_FromLong(value
);
215 unicodedata_mirrored(PyObject
*self
,
221 if (!PyArg_ParseTuple(args
, "O!:mirrored",
222 &PyUnicode_Type
, &v
))
224 if (PyUnicode_GET_SIZE(v
) != 1) {
225 PyErr_SetString(PyExc_TypeError
,
226 "need a single Unicode character as parameter");
229 value
= (int)unicode_db((int)*PyUnicode_AS_UNICODE(v
))->mirrored
;
230 return PyInt_FromLong(value
);
237 unicodedata_decomposition(PyObject
*self
,
243 if (!PyArg_ParseTuple(args
, "O!:decomposition",
244 &PyUnicode_Type
, &v
))
246 if (PyUnicode_GET_SIZE(v
) != 1) {
247 PyErr_SetString(PyExc_TypeError
,
248 "need a single Unicode character as parameter");
251 value
= unicode_db((int)*PyUnicode_AS_UNICODE(v
))->decomposition
;
253 return PyString_FromString("");
255 return PyString_FromString(value
);
261 /* XXX Add doc strings. */
263 static PyMethodDef unicodedata_functions
[] = {
264 {"decimal", unicodedata_decimal
, 1},
265 {"digit", unicodedata_digit
, 1},
266 {"numeric", unicodedata_numeric
, 1},
267 {"category", unicodedata_category
, 1},
268 {"bidirectional", unicodedata_bidirectional
, 1},
269 {"combining", unicodedata_combining
, 1},
270 {"mirrored", unicodedata_mirrored
, 1},
271 {"decomposition", unicodedata_decomposition
, 1},
272 {NULL
, NULL
} /* sentinel */
276 initunicodedata(void)
278 Py_InitModule("unicodedata", unicodedata_functions
);