2 Unicode character type helpers.
4 Written by Marc-Andre Lemburg (mal@lemburg.com).
5 Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
7 Copyright (c) Corporation for National Research Initiatives.
12 #include "unicodeobject.h"
14 #define ALPHA_MASK 0x01
15 #define DECIMAL_MASK 0x02
16 #define DIGIT_MASK 0x04
17 #define LOWER_MASK 0x08
18 #define LINEBREAK_MASK 0x10
19 #define SPACE_MASK 0x20
20 #define TITLE_MASK 0x40
21 #define UPPER_MASK 0x80
24 const unsigned short flags
;
25 const Py_UNICODE upper
;
26 const Py_UNICODE lower
;
27 const Py_UNICODE title
;
28 const unsigned char decimal
;
29 const unsigned char digit
;
30 } _PyUnicode_TypeRecord
;
32 #include "unicodetype_db.h"
34 static const _PyUnicode_TypeRecord
*
35 gettyperecord(Py_UNICODE code
)
42 index
= index1
[(code
>>SHIFT
)];
43 index
= index2
[(index
<<SHIFT
)+(code
&((1<<SHIFT
)-1))];
46 return &_PyUnicode_TypeRecords
[index
];
49 /* Returns 1 for Unicode characters having the category 'Zl' or type
52 int _PyUnicode_IsLinebreak(Py_UNICODE ch
)
54 const _PyUnicode_TypeRecord
*ctype
= gettyperecord(ch
);
56 return (ctype
->flags
& LINEBREAK_MASK
) != 0;
59 /* Returns the titlecase Unicode characters corresponding to ch or just
60 ch if no titlecase mapping is known. */
62 Py_UNICODE
_PyUnicode_ToTitlecase(register Py_UNICODE ch
)
64 const _PyUnicode_TypeRecord
*ctype
= gettyperecord(ch
);
78 /* Returns 1 for Unicode characters having the category 'Lt', 0
81 int _PyUnicode_IsTitlecase(Py_UNICODE ch
)
83 const _PyUnicode_TypeRecord
*ctype
= gettyperecord(ch
);
85 return (ctype
->flags
& TITLE_MASK
) != 0;
88 /* Returns the integer decimal (0-9) for Unicode characters having
89 this property, -1 otherwise. */
91 int _PyUnicode_ToDecimalDigit(Py_UNICODE ch
)
93 const _PyUnicode_TypeRecord
*ctype
= gettyperecord(ch
);
95 return (ctype
->flags
& DECIMAL_MASK
) ? ctype
->decimal
: -1;
98 int _PyUnicode_IsDecimalDigit(Py_UNICODE ch
)
100 if (_PyUnicode_ToDecimalDigit(ch
) < 0)
105 /* Returns the integer digit (0-9) for Unicode characters having
106 this property, -1 otherwise. */
108 int _PyUnicode_ToDigit(Py_UNICODE ch
)
110 const _PyUnicode_TypeRecord
*ctype
= gettyperecord(ch
);
112 return (ctype
->flags
& DIGIT_MASK
) ? ctype
->digit
: -1;
115 int _PyUnicode_IsDigit(Py_UNICODE ch
)
117 if (_PyUnicode_ToDigit(ch
) < 0)
122 /* Returns the numeric value as double for Unicode characters having
123 this property, -1.0 otherwise. */
125 /* TODO: replace with unicodetype_db.h table */
127 double _PyUnicode_ToNumeric(Py_UNICODE ch
)
140 return (double) 1 / 2;
142 return (double) 1 / 3;
144 return (double) 1 / 4;
146 return (double) 1 / 5;
148 return (double) 1 / 6;
150 return (double) 1 / 8;
173 return (double) 1000;
176 return (double) 10000;
228 return (double) 2 / 3;
230 return (double) 2 / 5;
244 return (double) 3 / 4;
246 return (double) 3 / 5;
248 return (double) 3 / 8;
259 return (double) 4 / 5;
268 return (double) 5 / 6;
270 return (double) 5 / 8;
279 return (double) 5000;
293 return (double) 7 / 8;
311 return (double) _PyUnicode_ToDigit(ch
);
315 int _PyUnicode_IsNumeric(Py_UNICODE ch
)
317 if (_PyUnicode_ToNumeric(ch
) < 0.0)
322 #ifndef WANT_WCTYPE_FUNCTIONS
324 /* Returns 1 for Unicode characters having the bidirectional type
325 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise. */
327 int _PyUnicode_IsWhitespace(Py_UNICODE ch
)
329 const _PyUnicode_TypeRecord
*ctype
= gettyperecord(ch
);
331 return (ctype
->flags
& SPACE_MASK
) != 0;
334 /* Returns 1 for Unicode characters having the category 'Ll', 0
337 int _PyUnicode_IsLowercase(Py_UNICODE ch
)
339 const _PyUnicode_TypeRecord
*ctype
= gettyperecord(ch
);
341 return (ctype
->flags
& LOWER_MASK
) != 0;
344 /* Returns 1 for Unicode characters having the category 'Lu', 0
347 int _PyUnicode_IsUppercase(Py_UNICODE ch
)
349 const _PyUnicode_TypeRecord
*ctype
= gettyperecord(ch
);
351 return (ctype
->flags
& UPPER_MASK
) != 0;
354 /* Returns the uppercase Unicode characters corresponding to ch or just
355 ch if no uppercase mapping is known. */
357 Py_UNICODE
_PyUnicode_ToUppercase(Py_UNICODE ch
)
359 const _PyUnicode_TypeRecord
*ctype
= gettyperecord(ch
);
360 int delta
= ctype
->upper
;
366 /* Returns the lowercase Unicode characters corresponding to ch or just
367 ch if no lowercase mapping is known. */
369 Py_UNICODE
_PyUnicode_ToLowercase(Py_UNICODE ch
)
371 const _PyUnicode_TypeRecord
*ctype
= gettyperecord(ch
);
372 int delta
= ctype
->lower
;
378 /* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',
379 'Lo' or 'Lm', 0 otherwise. */
381 int _PyUnicode_IsAlpha(Py_UNICODE ch
)
383 const _PyUnicode_TypeRecord
*ctype
= gettyperecord(ch
);
385 return (ctype
->flags
& ALPHA_MASK
) != 0;
390 /* Export the interfaces using the wchar_t type for portability
393 int _PyUnicode_IsWhitespace(Py_UNICODE ch
)
398 int _PyUnicode_IsLowercase(Py_UNICODE ch
)
403 int _PyUnicode_IsUppercase(Py_UNICODE ch
)
408 Py_UNICODE
_PyUnicode_ToLowercase(Py_UNICODE ch
)
413 Py_UNICODE
_PyUnicode_ToUppercase(Py_UNICODE ch
)
418 int _PyUnicode_IsAlpha(Py_UNICODE ch
)