2 Unicode character type helpers.
4 Written by Marc-Andre Lemburg (mal@lemburg.com).
5 Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
7 Copyright (c) Corporation for National Research Initiatives.
12 #include "unicodeobject.h"
14 #define ALPHA_MASK 0x01
15 #define DECIMAL_MASK 0x02
16 #define DIGIT_MASK 0x04
17 #define LOWER_MASK 0x08
18 #define LINEBREAK_MASK 0x10
19 #define SPACE_MASK 0x20
20 #define TITLE_MASK 0x40
21 #define UPPER_MASK 0x80
24 const unsigned short flags
;
25 const Py_UNICODE upper
;
26 const Py_UNICODE lower
;
27 const Py_UNICODE title
;
28 const unsigned char decimal
;
29 const unsigned char digit
;
30 } _PyUnicode_TypeRecord
;
32 #include "unicodetype_db.h"
34 static const _PyUnicode_TypeRecord
*
35 gettyperecord(int code
)
39 if (code
< 0 || code
>= 65536)
42 index
= index1
[(code
>>SHIFT
)];
43 index
= index2
[(index
<<SHIFT
)+(code
&((1<<SHIFT
)-1))];
45 return &_PyUnicode_TypeRecords
[index
];
48 /* Returns 1 for Unicode characters having the category 'Zl' or type
51 int _PyUnicode_IsLinebreak(register const Py_UNICODE ch
)
53 const _PyUnicode_TypeRecord
*ctype
= gettyperecord(ch
);
55 return (ctype
->flags
& LINEBREAK_MASK
) != 0;
58 /* Returns the titlecase Unicode characters corresponding to ch or just
59 ch if no titlecase mapping is known. */
61 Py_UNICODE
_PyUnicode_ToTitlecase(register const Py_UNICODE ch
)
63 const _PyUnicode_TypeRecord
*ctype
= gettyperecord(ch
);
66 return ch
+ ctype
->title
;
68 return ch
+ ctype
->upper
;
71 /* Returns 1 for Unicode characters having the category 'Lt', 0
74 int _PyUnicode_IsTitlecase(register const Py_UNICODE ch
)
76 const _PyUnicode_TypeRecord
*ctype
= gettyperecord(ch
);
78 return (ctype
->flags
& TITLE_MASK
) != 0;
81 /* Returns the integer decimal (0-9) for Unicode characters having
82 this property, -1 otherwise. */
84 int _PyUnicode_ToDecimalDigit(register const Py_UNICODE ch
)
86 const _PyUnicode_TypeRecord
*ctype
= gettyperecord(ch
);
88 return (ctype
->flags
& DECIMAL_MASK
) ? ctype
->decimal
: -1;
91 int _PyUnicode_IsDecimalDigit(register const Py_UNICODE ch
)
93 if (_PyUnicode_ToDecimalDigit(ch
) < 0)
98 /* Returns the integer digit (0-9) for Unicode characters having
99 this property, -1 otherwise. */
101 int _PyUnicode_ToDigit(register const Py_UNICODE ch
)
103 const _PyUnicode_TypeRecord
*ctype
= gettyperecord(ch
);
105 return (ctype
->flags
& DIGIT_MASK
) ? ctype
->digit
: -1;
108 int _PyUnicode_IsDigit(register const Py_UNICODE ch
)
110 if (_PyUnicode_ToDigit(ch
) < 0)
115 /* Returns the numeric value as double for Unicode characters having
116 this property, -1.0 otherwise. */
118 /* TODO: replace with unicodetype_db.h table */
120 double _PyUnicode_ToNumeric(register const Py_UNICODE ch
)
133 return (double) 1 / 2;
135 return (double) 1 / 3;
137 return (double) 1 / 4;
139 return (double) 1 / 5;
141 return (double) 1 / 6;
143 return (double) 1 / 8;
166 return (double) 1000;
169 return (double) 10000;
221 return (double) 2 / 3;
223 return (double) 2 / 5;
237 return (double) 3 / 4;
239 return (double) 3 / 5;
241 return (double) 3 / 8;
252 return (double) 4 / 5;
261 return (double) 5 / 6;
263 return (double) 5 / 8;
272 return (double) 5000;
286 return (double) 7 / 8;
304 return (double) _PyUnicode_ToDigit(ch
);
308 int _PyUnicode_IsNumeric(register const Py_UNICODE ch
)
310 if (_PyUnicode_ToNumeric(ch
) < 0.0)
315 #ifndef WANT_WCTYPE_FUNCTIONS
317 /* Returns 1 for Unicode characters having the bidirectional type
318 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise. */
320 int _PyUnicode_IsWhitespace(register const Py_UNICODE ch
)
322 const _PyUnicode_TypeRecord
*ctype
= gettyperecord(ch
);
324 return (ctype
->flags
& SPACE_MASK
) != 0;
327 /* Returns 1 for Unicode characters having the category 'Ll', 0
330 int _PyUnicode_IsLowercase(register const Py_UNICODE ch
)
332 const _PyUnicode_TypeRecord
*ctype
= gettyperecord(ch
);
334 return (ctype
->flags
& LOWER_MASK
) != 0;
337 /* Returns 1 for Unicode characters having the category 'Lu', 0
340 int _PyUnicode_IsUppercase(register const Py_UNICODE ch
)
342 const _PyUnicode_TypeRecord
*ctype
= gettyperecord(ch
);
344 return (ctype
->flags
& UPPER_MASK
) != 0;
347 /* Returns the uppercase Unicode characters corresponding to ch or just
348 ch if no uppercase mapping is known. */
350 Py_UNICODE
_PyUnicode_ToUppercase(register const Py_UNICODE ch
)
352 const _PyUnicode_TypeRecord
*ctype
= gettyperecord(ch
);
354 return ch
+ ctype
->upper
;
357 /* Returns the lowercase Unicode characters corresponding to ch or just
358 ch if no lowercase mapping is known. */
360 Py_UNICODE
_PyUnicode_ToLowercase(register const Py_UNICODE ch
)
362 const _PyUnicode_TypeRecord
*ctype
= gettyperecord(ch
);
364 return ch
+ ctype
->lower
;
367 /* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',
368 'Lo' or 'Lm', 0 otherwise. */
370 int _PyUnicode_IsAlpha(register const Py_UNICODE ch
)
372 const _PyUnicode_TypeRecord
*ctype
= gettyperecord(ch
);
374 return (ctype
->flags
& ALPHA_MASK
) != 0;
379 /* Export the interfaces using the wchar_t type for portability
382 int _PyUnicode_IsWhitespace(register const Py_UNICODE ch
)
387 int _PyUnicode_IsLowercase(register const Py_UNICODE ch
)
392 int _PyUnicode_IsUppercase(register const Py_UNICODE ch
)
397 Py_UNICODE
_PyUnicode_ToLowercase(register const Py_UNICODE ch
)
402 Py_UNICODE
_PyUnicode_ToUppercase(register const Py_UNICODE ch
)
407 int _PyUnicode_IsAlpha(register const Py_UNICODE ch
)