AddressList.__str__(): Get rid of useless, and broken method. Closes
[python/dscho.git] / Objects / unicodectype.c
blob106726d7fceefd26221446c6aa4e54942ad10437
1 /*
2 Unicode character type helpers.
4 Written by Marc-Andre Lemburg (mal@lemburg.com).
5 Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
7 Copyright (c) Corporation for National Research Initiatives.
9 */
11 #include "Python.h"
12 #include "unicodeobject.h"
14 #define ALPHA_MASK 0x01
15 #define DECIMAL_MASK 0x02
16 #define DIGIT_MASK 0x04
17 #define LOWER_MASK 0x08
18 #define LINEBREAK_MASK 0x10
19 #define SPACE_MASK 0x20
20 #define TITLE_MASK 0x40
21 #define UPPER_MASK 0x80
23 typedef struct {
24 const unsigned short flags;
25 const Py_UNICODE upper;
26 const Py_UNICODE lower;
27 const Py_UNICODE title;
28 const unsigned char decimal;
29 const unsigned char digit;
30 } _PyUnicode_TypeRecord;
32 #include "unicodetype_db.h"
34 static const _PyUnicode_TypeRecord *
35 gettyperecord(Py_UNICODE code)
37 int index;
39 if (code >= 0x110000)
40 index = 0;
41 else {
42 index = index1[(code>>SHIFT)];
43 index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];
46 return &_PyUnicode_TypeRecords[index];
49 /* Returns 1 for Unicode characters having the category 'Zl' or type
50 'B', 0 otherwise. */
52 int _PyUnicode_IsLinebreak(Py_UNICODE ch)
54 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
56 return (ctype->flags & LINEBREAK_MASK) != 0;
59 /* Returns the titlecase Unicode characters corresponding to ch or just
60 ch if no titlecase mapping is known. */
62 Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)
64 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
65 int delta;
67 if (ctype->title)
68 delta = ctype->title;
69 else
70 delta = ctype->upper;
72 if (delta >= 32768)
73 delta -= 65536;
75 return ch + delta;
78 /* Returns 1 for Unicode characters having the category 'Lt', 0
79 otherwise. */
81 int _PyUnicode_IsTitlecase(Py_UNICODE ch)
83 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
85 return (ctype->flags & TITLE_MASK) != 0;
88 /* Returns the integer decimal (0-9) for Unicode characters having
89 this property, -1 otherwise. */
91 int _PyUnicode_ToDecimalDigit(Py_UNICODE ch)
93 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
95 return (ctype->flags & DECIMAL_MASK) ? ctype->decimal : -1;
98 int _PyUnicode_IsDecimalDigit(Py_UNICODE ch)
100 if (_PyUnicode_ToDecimalDigit(ch) < 0)
101 return 0;
102 return 1;
105 /* Returns the integer digit (0-9) for Unicode characters having
106 this property, -1 otherwise. */
108 int _PyUnicode_ToDigit(Py_UNICODE ch)
110 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
112 return (ctype->flags & DIGIT_MASK) ? ctype->digit : -1;
115 int _PyUnicode_IsDigit(Py_UNICODE ch)
117 if (_PyUnicode_ToDigit(ch) < 0)
118 return 0;
119 return 1;
122 /* Returns the numeric value as double for Unicode characters having
123 this property, -1.0 otherwise. */
125 /* TODO: replace with unicodetype_db.h table */
127 double _PyUnicode_ToNumeric(Py_UNICODE ch)
129 switch (ch) {
130 case 0x3007:
131 return (double) 0;
132 case 0x09F4:
133 case 0x215F:
134 case 0x2160:
135 case 0x2170:
136 case 0x3021:
137 case 0x3280:
138 return (double) 1;
139 case 0x00BD:
140 return (double) 1 / 2;
141 case 0x2153:
142 return (double) 1 / 3;
143 case 0x00BC:
144 return (double) 1 / 4;
145 case 0x2155:
146 return (double) 1 / 5;
147 case 0x2159:
148 return (double) 1 / 6;
149 case 0x215B:
150 return (double) 1 / 8;
151 case 0x0BF0:
152 case 0x1372:
153 case 0x2169:
154 case 0x2179:
155 case 0x2469:
156 case 0x247D:
157 case 0x2491:
158 case 0x277F:
159 case 0x2789:
160 case 0x2793:
161 case 0x3038:
162 case 0x3289:
163 return (double) 10;
164 case 0x0BF1:
165 case 0x137B:
166 case 0x216D:
167 case 0x217D:
168 return (double) 100;
169 case 0x0BF2:
170 case 0x216F:
171 case 0x217F:
172 case 0x2180:
173 return (double) 1000;
174 case 0x137C:
175 case 0x2182:
176 return (double) 10000;
177 case 0x216A:
178 case 0x217A:
179 case 0x246A:
180 case 0x247E:
181 case 0x2492:
182 return (double) 11;
183 case 0x216B:
184 case 0x217B:
185 case 0x246B:
186 case 0x247F:
187 case 0x2493:
188 return (double) 12;
189 case 0x246C:
190 case 0x2480:
191 case 0x2494:
192 return (double) 13;
193 case 0x246D:
194 case 0x2481:
195 case 0x2495:
196 return (double) 14;
197 case 0x246E:
198 case 0x2482:
199 case 0x2496:
200 return (double) 15;
201 case 0x09F9:
202 case 0x246F:
203 case 0x2483:
204 case 0x2497:
205 return (double) 16;
206 case 0x16EE:
207 case 0x2470:
208 case 0x2484:
209 case 0x2498:
210 return (double) 17;
211 case 0x16EF:
212 case 0x2471:
213 case 0x2485:
214 case 0x2499:
215 return (double) 18;
216 case 0x16F0:
217 case 0x2472:
218 case 0x2486:
219 case 0x249A:
220 return (double) 19;
221 case 0x09F5:
222 case 0x2161:
223 case 0x2171:
224 case 0x3022:
225 case 0x3281:
226 return (double) 2;
227 case 0x2154:
228 return (double) 2 / 3;
229 case 0x2156:
230 return (double) 2 / 5;
231 case 0x1373:
232 case 0x2473:
233 case 0x2487:
234 case 0x249B:
235 case 0x3039:
236 return (double) 20;
237 case 0x09F6:
238 case 0x2162:
239 case 0x2172:
240 case 0x3023:
241 case 0x3282:
242 return (double) 3;
243 case 0x00BE:
244 return (double) 3 / 4;
245 case 0x2157:
246 return (double) 3 / 5;
247 case 0x215C:
248 return (double) 3 / 8;
249 case 0x1374:
250 case 0x303A:
251 return (double) 30;
252 case 0x09F7:
253 case 0x2163:
254 case 0x2173:
255 case 0x3024:
256 case 0x3283:
257 return (double) 4;
258 case 0x2158:
259 return (double) 4 / 5;
260 case 0x1375:
261 return (double) 40;
262 case 0x2164:
263 case 0x2174:
264 case 0x3025:
265 case 0x3284:
266 return (double) 5;
267 case 0x215A:
268 return (double) 5 / 6;
269 case 0x215D:
270 return (double) 5 / 8;
271 case 0x1376:
272 case 0x216C:
273 case 0x217C:
274 return (double) 50;
275 case 0x216E:
276 case 0x217E:
277 return (double) 500;
278 case 0x2181:
279 return (double) 5000;
280 case 0x2165:
281 case 0x2175:
282 case 0x3026:
283 case 0x3285:
284 return (double) 6;
285 case 0x1377:
286 return (double) 60;
287 case 0x2166:
288 case 0x2176:
289 case 0x3027:
290 case 0x3286:
291 return (double) 7;
292 case 0x215E:
293 return (double) 7 / 8;
294 case 0x1378:
295 return (double) 70;
296 case 0x2167:
297 case 0x2177:
298 case 0x3028:
299 case 0x3287:
300 return (double) 8;
301 case 0x1379:
302 return (double) 80;
303 case 0x2168:
304 case 0x2178:
305 case 0x3029:
306 case 0x3288:
307 return (double) 9;
308 case 0x137A:
309 return (double) 90;
310 default:
311 return (double) _PyUnicode_ToDigit(ch);
315 int _PyUnicode_IsNumeric(Py_UNICODE ch)
317 if (_PyUnicode_ToNumeric(ch) < 0.0)
318 return 0;
319 return 1;
322 #ifndef WANT_WCTYPE_FUNCTIONS
324 /* Returns 1 for Unicode characters having the bidirectional type
325 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise. */
327 int _PyUnicode_IsWhitespace(Py_UNICODE ch)
329 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
331 return (ctype->flags & SPACE_MASK) != 0;
334 /* Returns 1 for Unicode characters having the category 'Ll', 0
335 otherwise. */
337 int _PyUnicode_IsLowercase(Py_UNICODE ch)
339 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
341 return (ctype->flags & LOWER_MASK) != 0;
344 /* Returns 1 for Unicode characters having the category 'Lu', 0
345 otherwise. */
347 int _PyUnicode_IsUppercase(Py_UNICODE ch)
349 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
351 return (ctype->flags & UPPER_MASK) != 0;
354 /* Returns the uppercase Unicode characters corresponding to ch or just
355 ch if no uppercase mapping is known. */
357 Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
359 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
360 int delta = ctype->upper;
361 if (delta >= 32768)
362 delta -= 65536;
363 return ch + delta;
366 /* Returns the lowercase Unicode characters corresponding to ch or just
367 ch if no lowercase mapping is known. */
369 Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
371 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
372 int delta = ctype->lower;
373 if (delta >= 32768)
374 delta -= 65536;
375 return ch + delta;
378 /* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',
379 'Lo' or 'Lm', 0 otherwise. */
381 int _PyUnicode_IsAlpha(Py_UNICODE ch)
383 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
385 return (ctype->flags & ALPHA_MASK) != 0;
388 #else
390 /* Export the interfaces using the wchar_t type for portability
391 reasons: */
393 int _PyUnicode_IsWhitespace(Py_UNICODE ch)
395 return iswspace(ch);
398 int _PyUnicode_IsLowercase(Py_UNICODE ch)
400 return iswlower(ch);
403 int _PyUnicode_IsUppercase(Py_UNICODE ch)
405 return iswupper(ch);
408 Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
410 return towlower(ch);
413 Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
415 return towupper(ch);
418 int _PyUnicode_IsAlpha(Py_UNICODE ch)
420 return iswalpha(ch);
423 #endif