1 From 23d48c5fc7aa889dc7798f9c64acd43d9cb34683 Mon Sep 17 00:00:00 2001
2 From: Christian Persch <chpe@gnome.org>
3 Date: Sun, 12 Feb 2012 21:20:33 +0100
4 Subject: [PATCH] regex: Use glib for unicode data
6 Use g_unichar_type() and g_unichar_get_script() instead of pcre tables.
8 glib/pcre/pcre_compile.c | 26 +++---
9 glib/pcre/pcre_dfa_exec.c | 96 ++++++++--------
10 glib/pcre/pcre_exec.c | 26 +++---
11 glib/pcre/pcre_internal.h | 11 +--
12 glib/pcre/pcre_tables.c | 16 +++
13 glib/pcre/pcre_xclass.c | 24 ++--
14 glib/pcre/ucp.h | 265 +++++++++++++++++++++++----------------------
15 7 files changed, 239 insertions(+), 225 deletions(-)
17 diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c
18 index 21bef80..a6c84e1 100644
19 --- a/glib/pcre/pcre_compile.c
20 +++ b/glib/pcre/pcre_compile.c
21 @@ -2920,43 +2920,43 @@ Returns: TRUE if auto-possessifying is OK
23 check_char_prop(int c, int ptype, int pdata, BOOL negated)
25 -const ucd_record *prop = GET_UCD(c);
26 +const pcre_uint8 chartype = UCD_CHARTYPE(c);
30 - return (prop->chartype == ucp_Lu ||
31 - prop->chartype == ucp_Ll ||
32 - prop->chartype == ucp_Lt) == negated;
33 + return (chartype == ucp_Lu ||
34 + chartype == ucp_Ll ||
35 + chartype == ucp_Lt) == negated;
38 - return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
39 + return (pdata == PRIV(ucp_gentype)[chartype]) == negated;
42 - return (pdata == prop->chartype) == negated;
43 + return (pdata == chartype) == negated;
46 - return (pdata == prop->script) == negated;
47 + return (pdata == UCD_SCRIPT(c)) == negated;
49 /* These are specials */
52 - return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
53 - PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
54 + return (PRIV(ucp_gentype)[chartype] == ucp_L ||
55 + PRIV(ucp_gentype)[chartype] == ucp_N) == negated;
57 case PT_SPACE: /* Perl space */
58 - return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
59 + return (PRIV(ucp_gentype)[chartype] == ucp_Z ||
60 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
63 case PT_PXSPACE: /* POSIX space */
64 - return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
65 + return (PRIV(ucp_gentype)[chartype] == ucp_Z ||
66 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
67 c == CHAR_FF || c == CHAR_CR)
71 - return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
72 - PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
73 + return (PRIV(ucp_gentype)[chartype] == ucp_L ||
74 + PRIV(ucp_gentype)[chartype] == ucp_N ||
75 c == CHAR_UNDERSCORE) == negated;
78 diff --git a/glib/pcre/pcre_dfa_exec.c b/glib/pcre/pcre_dfa_exec.c
79 index 9565d46..3f913ce 100644
80 --- a/glib/pcre/pcre_dfa_exec.c
81 +++ b/glib/pcre/pcre_dfa_exec.c
82 @@ -1060,7 +1060,7 @@ for (;;)
86 - const ucd_record * prop = GET_UCD(c);
87 + const pcre_uint8 chartype = UCD_CHARTYPE(c);
91 @@ -1068,43 +1068,43 @@ for (;;)
95 - OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
96 - prop->chartype == ucp_Lt;
97 + OK = chartype == ucp_Lu || chartype == ucp_Ll ||
102 - OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
103 + OK = PRIV(ucp_gentype)[chartype] == code[2];
107 - OK = prop->chartype == code[2];
108 + OK = chartype == code[2];
112 - OK = prop->script == code[2];
113 + OK = UCD_SCRIPT(c) == code[2];
116 /* These are specials for combination cases. */
119 - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
120 - PRIV(ucp_gentype)[prop->chartype] == ucp_N;
121 + OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
122 + PRIV(ucp_gentype)[chartype] == ucp_N;
125 case PT_SPACE: /* Perl space */
126 - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
127 + OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
128 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
131 case PT_PXSPACE: /* POSIX space */
132 - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
133 + OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
134 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
135 c == CHAR_FF || c == CHAR_CR;
139 - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
140 - PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
141 + OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
142 + PRIV(ucp_gentype)[chartype] == ucp_N ||
143 c == CHAR_UNDERSCORE;
146 @@ -1294,7 +1294,7 @@ for (;;)
150 - const ucd_record * prop = GET_UCD(c);
151 + const pcre_uint8 chartype = UCD_CHARTYPE(c);
155 @@ -1302,43 +1302,43 @@ for (;;)
159 - OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
160 - prop->chartype == ucp_Lt;
161 + OK = chartype == ucp_Lu || chartype == ucp_Ll ||
162 + chartype == ucp_Lt;
166 - OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
167 + OK = PRIV(ucp_gentype)[chartype] == code[3];
171 - OK = prop->chartype == code[3];
172 + OK = chartype == code[3];
176 - OK = prop->script == code[3];
177 + OK = UCD_SCRIPT(c) == code[3];
180 /* These are specials for combination cases. */
183 - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
184 - PRIV(ucp_gentype)[prop->chartype] == ucp_N;
185 + OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
186 + PRIV(ucp_gentype)[chartype] == ucp_N;
189 case PT_SPACE: /* Perl space */
190 - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
191 + OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
192 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
195 case PT_PXSPACE: /* POSIX space */
196 - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
197 + OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
198 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
199 c == CHAR_FF || c == CHAR_CR;
203 - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
204 - PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
205 + OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
206 + PRIV(ucp_gentype)[chartype] == ucp_N ||
207 c == CHAR_UNDERSCORE;
210 @@ -1541,7 +1541,7 @@ for (;;)
214 - const ucd_record * prop = GET_UCD(c);
215 + const pcre_uint8 chartype = UCD_CHARTYPE(c);
219 @@ -1549,43 +1549,43 @@ for (;;)
223 - OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
224 - prop->chartype == ucp_Lt;
225 + OK = chartype == ucp_Lu || chartype == ucp_Ll ||
226 + chartype == ucp_Lt;
230 - OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
231 + OK = PRIV(ucp_gentype)[chartype] == code[3];
235 - OK = prop->chartype == code[3];
236 + OK = chartype == code[3];
240 - OK = prop->script == code[3];
241 + OK = UCD_SCRIPT(c) == code[3];
244 /* These are specials for combination cases. */
247 - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
248 - PRIV(ucp_gentype)[prop->chartype] == ucp_N;
249 + OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
250 + PRIV(ucp_gentype)[chartype] == ucp_N;
253 case PT_SPACE: /* Perl space */
254 - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
255 + OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
256 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
259 case PT_PXSPACE: /* POSIX space */
260 - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
261 + OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
262 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
263 c == CHAR_FF || c == CHAR_CR;
267 - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
268 - PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
269 + OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
270 + PRIV(ucp_gentype)[chartype] == ucp_N ||
271 c == CHAR_UNDERSCORE;
274 @@ -1813,7 +1813,7 @@ for (;;)
278 - const ucd_record * prop = GET_UCD(c);
279 + const pcre_uint8 chartype = UCD_CHARTYPE(c);
280 switch(code[1 + IMM2_SIZE + 1])
283 @@ -1821,43 +1821,43 @@ for (;;)
287 - OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
288 - prop->chartype == ucp_Lt;
289 + OK = chartype == ucp_Lu || chartype == ucp_Ll ||
290 + chartype == ucp_Lt;
294 - OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
295 + OK = PRIV(ucp_gentype)[chartype] == code[1 + IMM2_SIZE + 2];
299 - OK = prop->chartype == code[1 + IMM2_SIZE + 2];
300 + OK = chartype == code[1 + IMM2_SIZE + 2];
304 - OK = prop->script == code[1 + IMM2_SIZE + 2];
305 + OK = UCD_SCRIPT(c) == code[1 + IMM2_SIZE + 2];
308 /* These are specials for combination cases. */
311 - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
312 - PRIV(ucp_gentype)[prop->chartype] == ucp_N;
313 + OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
314 + PRIV(ucp_gentype)[chartype] == ucp_N;
317 case PT_SPACE: /* Perl space */
318 - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
319 + OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
320 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
323 case PT_PXSPACE: /* POSIX space */
324 - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
325 + OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
326 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
327 c == CHAR_FF || c == CHAR_CR;
331 - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
332 - PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
333 + OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
334 + PRIV(ucp_gentype)[chartype] == ucp_N ||
335 c == CHAR_UNDERSCORE;
338 diff --git a/glib/pcre/pcre_exec.c b/glib/pcre/pcre_exec.c
339 index 830b8b5..c89a3f9 100644
340 --- a/glib/pcre/pcre_exec.c
341 +++ b/glib/pcre/pcre_exec.c
342 @@ -2565,7 +2565,7 @@ for (;;)
344 GETCHARINCTEST(c, eptr);
346 - const ucd_record *prop = GET_UCD(c);
347 + const pcre_uint8 chartype = UCD_CHARTYPE(c);
351 @@ -2574,44 +2574,44 @@ for (;;)
355 - if ((prop->chartype == ucp_Lu ||
356 - prop->chartype == ucp_Ll ||
357 - prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
358 + if ((chartype == ucp_Lu ||
359 + chartype == ucp_Ll ||
360 + chartype == ucp_Lt) == (op == OP_NOTPROP))
361 RRETURN(MATCH_NOMATCH);
365 - if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
366 + if ((ecode[2] != PRIV(ucp_gentype)[chartype]) == (op == OP_PROP))
367 RRETURN(MATCH_NOMATCH);
371 - if ((ecode[2] != prop->chartype) == (op == OP_PROP))
372 + if ((ecode[2] != chartype) == (op == OP_PROP))
373 RRETURN(MATCH_NOMATCH);
377 - if ((ecode[2] != prop->script) == (op == OP_PROP))
378 + if ((ecode[2] != UCD_SCRIPT(c)) == (op == OP_PROP))
379 RRETURN(MATCH_NOMATCH);
382 /* These are specials */
385 - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
386 - PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
387 + if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
388 + PRIV(ucp_gentype)[chartype] == ucp_N) == (op == OP_NOTPROP))
389 RRETURN(MATCH_NOMATCH);
392 case PT_SPACE: /* Perl space */
393 - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
394 + if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
395 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
396 == (op == OP_NOTPROP))
397 RRETURN(MATCH_NOMATCH);
400 case PT_PXSPACE: /* POSIX space */
401 - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
402 + if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
403 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
404 c == CHAR_FF || c == CHAR_CR)
405 == (op == OP_NOTPROP))
406 @@ -2619,8 +2619,8 @@ for (;;)
410 - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
411 - PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
412 + if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
413 + PRIV(ucp_gentype)[chartype] == ucp_N ||
414 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
415 RRETURN(MATCH_NOMATCH);
417 diff --git a/glib/pcre/pcre_internal.h b/glib/pcre/pcre_internal.h
418 index 181c312..234af1b 100644
419 --- a/glib/pcre/pcre_internal.h
420 +++ b/glib/pcre/pcre_internal.h
421 @@ -2329,15 +2329,12 @@ extern const int PRIV(ucp_typerange)[];
423 /* UCD access macros */
425 -#define UCD_BLOCK_SIZE 128
426 -#define GET_UCD(ch) (PRIV(ucd_records) + \
427 - PRIV(ucd_stage2)[PRIV(ucd_stage1)[(ch) / UCD_BLOCK_SIZE] * \
428 - UCD_BLOCK_SIZE + (ch) % UCD_BLOCK_SIZE])
429 +unsigned int _pcre_ucp_othercase(const unsigned int c);
431 -#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
432 -#define UCD_SCRIPT(ch) GET_UCD(ch)->script
433 +#define UCD_CHARTYPE(ch) (pcre_uint8)g_unichar_type((gunichar)(ch))
434 +#define UCD_SCRIPT(ch) (pcre_uint8)g_unichar_get_script((gunichar)(ch))
435 #define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
436 -#define UCD_OTHERCASE(ch) (ch + GET_UCD(ch)->other_case)
437 +#define UCD_OTHERCASE(ch) (_pcre_ucp_othercase(ch))
439 #endif /* SUPPORT_UCP */
441 diff --git a/glib/pcre/pcre_tables.c b/glib/pcre/pcre_tables.c
442 index 7ac2d89..e401974 100644
443 --- a/glib/pcre/pcre_tables.c
444 +++ b/glib/pcre/pcre_tables.c
445 @@ -584,6 +584,22 @@ const ucp_type_table PRIV(utt)[] = {
447 const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
450 +_pcre_ucp_othercase(const unsigned int c)
452 + int other_case = NOTACHAR;
454 + if (g_unichar_islower(c))
455 + other_case = g_unichar_toupper(c);
456 + else if (g_unichar_isupper(c))
457 + other_case = g_unichar_tolower(c);
459 + if (other_case == c)
460 + other_case = NOTACHAR;
465 #endif /* SUPPORT_UTF */
467 /* End of pcre_tables.c */
468 diff --git a/glib/pcre/pcre_xclass.c b/glib/pcre/pcre_xclass.c
469 index dca7a39..e5a55d7 100644
470 --- a/glib/pcre/pcre_xclass.c
471 +++ b/glib/pcre/pcre_xclass.c
472 @@ -127,7 +127,7 @@ while ((t = *data++) != XCL_END)
474 else /* XCL_PROP & XCL_NOTPROP */
476 - const ucd_record *prop = GET_UCD(c);
477 + const pcre_uint8 chartype = UCD_CHARTYPE(c);
481 @@ -136,46 +136,46 @@ while ((t = *data++) != XCL_END)
485 - if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
486 - prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
487 + if ((chartype == ucp_Lu || chartype == ucp_Ll ||
488 + chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
492 - if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == (t == XCL_PROP))
493 + if ((data[1] == PRIV(ucp_gentype)[chartype]) == (t == XCL_PROP))
498 - if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated;
499 + if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
503 - if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated;
504 + if ((data[1] == UCD_SCRIPT(c)) == (t == XCL_PROP)) return !negated;
508 - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
509 - PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (t == XCL_PROP))
510 + if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
511 + PRIV(ucp_gentype)[chartype] == ucp_N) == (t == XCL_PROP))
515 case PT_SPACE: /* Perl space */
516 - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
517 + if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
518 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
523 case PT_PXSPACE: /* POSIX space */
524 - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
525 + if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
526 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
527 c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP))
532 - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
533 - PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
534 + if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
535 + PRIV(ucp_gentype)[chartype] == ucp_N || c == CHAR_UNDERSCORE)
539 diff --git a/glib/pcre/ucp.h b/glib/pcre/ucp.h
540 index 59c3bec..53a48c9 100644
541 --- a/glib/pcre/ucp.h
542 +++ b/glib/pcre/ucp.h
543 @@ -10,6 +10,7 @@ the UCD access macros. New values that are added for new releases of Unicode
544 should always be at the end of each enum, for backwards compatibility. */
546 /* These are the general character categories. */
547 +#include "gunicode.h"
551 @@ -24,148 +25,148 @@ enum {
552 /* These are the particular character types. */
555 - ucp_Cc, /* Control */
556 - ucp_Cf, /* Format */
557 - ucp_Cn, /* Unassigned */
558 - ucp_Co, /* Private use */
559 - ucp_Cs, /* Surrogate */
560 - ucp_Ll, /* Lower case letter */
561 - ucp_Lm, /* Modifier letter */
562 - ucp_Lo, /* Other letter */
563 - ucp_Lt, /* Title case letter */
564 - ucp_Lu, /* Upper case letter */
565 - ucp_Mc, /* Spacing mark */
566 - ucp_Me, /* Enclosing mark */
567 - ucp_Mn, /* Non-spacing mark */
568 - ucp_Nd, /* Decimal number */
569 - ucp_Nl, /* Letter number */
570 - ucp_No, /* Other number */
571 - ucp_Pc, /* Connector punctuation */
572 - ucp_Pd, /* Dash punctuation */
573 - ucp_Pe, /* Close punctuation */
574 - ucp_Pf, /* Final punctuation */
575 - ucp_Pi, /* Initial punctuation */
576 - ucp_Po, /* Other punctuation */
577 - ucp_Ps, /* Open punctuation */
578 - ucp_Sc, /* Currency symbol */
579 - ucp_Sk, /* Modifier symbol */
580 - ucp_Sm, /* Mathematical symbol */
581 - ucp_So, /* Other symbol */
582 - ucp_Zl, /* Line separator */
583 - ucp_Zp, /* Paragraph separator */
584 - ucp_Zs /* Space separator */
585 + ucp_Cc = G_UNICODE_CONTROL, /* Control */
586 + ucp_Cf = G_UNICODE_FORMAT, /* Format */
587 + ucp_Cn = G_UNICODE_UNASSIGNED, /* Unassigned */
588 + ucp_Co = G_UNICODE_PRIVATE_USE, /* Private use */
589 + ucp_Cs = G_UNICODE_SURROGATE, /* Surrogate */
590 + ucp_Ll = G_UNICODE_LOWERCASE_LETTER, /* Lower case letter */
591 + ucp_Lm = G_UNICODE_MODIFIER_LETTER, /* Modifier letter */
592 + ucp_Lo = G_UNICODE_OTHER_LETTER, /* Other letter */
593 + ucp_Lt = G_UNICODE_TITLECASE_LETTER, /* Title case letter */
594 + ucp_Lu = G_UNICODE_UPPERCASE_LETTER, /* Upper case letter */
595 + ucp_Mc = G_UNICODE_SPACING_MARK, /* Spacing mark */
596 + ucp_Me = G_UNICODE_ENCLOSING_MARK, /* Enclosing mark */
597 + ucp_Mn = G_UNICODE_NON_SPACING_MARK, /* Non-spacing mark */
598 + ucp_Nd = G_UNICODE_DECIMAL_NUMBER, /* Decimal number */
599 + ucp_Nl = G_UNICODE_LETTER_NUMBER, /* Letter number */
600 + ucp_No = G_UNICODE_OTHER_NUMBER, /* Other number */
601 + ucp_Pc = G_UNICODE_CONNECT_PUNCTUATION, /* Connector punctuation */
602 + ucp_Pd = G_UNICODE_DASH_PUNCTUATION, /* Dash punctuation */
603 + ucp_Pe = G_UNICODE_CLOSE_PUNCTUATION, /* Close punctuation */
604 + ucp_Pf = G_UNICODE_FINAL_PUNCTUATION, /* Final punctuation */
605 + ucp_Pi = G_UNICODE_INITIAL_PUNCTUATION, /* Initial punctuation */
606 + ucp_Po = G_UNICODE_OTHER_PUNCTUATION, /* Other punctuation */
607 + ucp_Ps = G_UNICODE_OPEN_PUNCTUATION, /* Open punctuation */
608 + ucp_Sc = G_UNICODE_CURRENCY_SYMBOL, /* Currency symbol */
609 + ucp_Sk = G_UNICODE_MODIFIER_SYMBOL, /* Modifier symbol */
610 + ucp_Sm = G_UNICODE_MATH_SYMBOL, /* Mathematical symbol */
611 + ucp_So = G_UNICODE_OTHER_SYMBOL, /* Other symbol */
612 + ucp_Zl = G_UNICODE_LINE_SEPARATOR, /* Line separator */
613 + ucp_Zp = G_UNICODE_PARAGRAPH_SEPARATOR, /* Paragraph separator */
614 + ucp_Zs = G_UNICODE_SPACE_SEPARATOR /* Space separator */
617 /* These are the script identifications. */
627 - ucp_Canadian_Aboriginal,
681 + ucp_Arabic = G_UNICODE_SCRIPT_ARABIC,
682 + ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN,
683 + ucp_Bengali = G_UNICODE_SCRIPT_BENGALI,
684 + ucp_Bopomofo = G_UNICODE_SCRIPT_BOPOMOFO,
685 + ucp_Braille = G_UNICODE_SCRIPT_BRAILLE,
686 + ucp_Buginese = G_UNICODE_SCRIPT_BUGINESE,
687 + ucp_Buhid = G_UNICODE_SCRIPT_BUHID,
688 + ucp_Canadian_Aboriginal = G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL,
689 + ucp_Cherokee = G_UNICODE_SCRIPT_CHEROKEE,
690 + ucp_Common = G_UNICODE_SCRIPT_COMMON,
691 + ucp_Coptic = G_UNICODE_SCRIPT_COPTIC,
692 + ucp_Cypriot = G_UNICODE_SCRIPT_CYPRIOT,
693 + ucp_Cyrillic = G_UNICODE_SCRIPT_CYRILLIC,
694 + ucp_Deseret = G_UNICODE_SCRIPT_DESERET,
695 + ucp_Devanagari = G_UNICODE_SCRIPT_DEVANAGARI,
696 + ucp_Ethiopic = G_UNICODE_SCRIPT_ETHIOPIC,
697 + ucp_Georgian = G_UNICODE_SCRIPT_GEORGIAN,
698 + ucp_Glagolitic = G_UNICODE_SCRIPT_GLAGOLITIC,
699 + ucp_Gothic = G_UNICODE_SCRIPT_GOTHIC,
700 + ucp_Greek = G_UNICODE_SCRIPT_GREEK,
701 + ucp_Gujarati = G_UNICODE_SCRIPT_GUJARATI,
702 + ucp_Gurmukhi = G_UNICODE_SCRIPT_GURMUKHI,
703 + ucp_Han = G_UNICODE_SCRIPT_HAN,
704 + ucp_Hangul = G_UNICODE_SCRIPT_HANGUL,
705 + ucp_Hanunoo = G_UNICODE_SCRIPT_HANUNOO,
706 + ucp_Hebrew = G_UNICODE_SCRIPT_HEBREW,
707 + ucp_Hiragana = G_UNICODE_SCRIPT_HIRAGANA,
708 + ucp_Inherited = G_UNICODE_SCRIPT_INHERITED,
709 + ucp_Kannada = G_UNICODE_SCRIPT_KANNADA,
710 + ucp_Katakana = G_UNICODE_SCRIPT_KATAKANA,
711 + ucp_Kharoshthi = G_UNICODE_SCRIPT_KHAROSHTHI,
712 + ucp_Khmer = G_UNICODE_SCRIPT_KHMER,
713 + ucp_Lao = G_UNICODE_SCRIPT_LAO,
714 + ucp_Latin = G_UNICODE_SCRIPT_LATIN,
715 + ucp_Limbu = G_UNICODE_SCRIPT_LIMBU,
716 + ucp_Linear_B = G_UNICODE_SCRIPT_LINEAR_B,
717 + ucp_Malayalam = G_UNICODE_SCRIPT_MALAYALAM,
718 + ucp_Mongolian = G_UNICODE_SCRIPT_MONGOLIAN,
719 + ucp_Myanmar = G_UNICODE_SCRIPT_MYANMAR,
720 + ucp_New_Tai_Lue = G_UNICODE_SCRIPT_NEW_TAI_LUE,
721 + ucp_Ogham = G_UNICODE_SCRIPT_OGHAM,
722 + ucp_Old_Italic = G_UNICODE_SCRIPT_OLD_ITALIC,
723 + ucp_Old_Persian = G_UNICODE_SCRIPT_OLD_PERSIAN,
724 + ucp_Oriya = G_UNICODE_SCRIPT_ORIYA,
725 + ucp_Osmanya = G_UNICODE_SCRIPT_OSMANYA,
726 + ucp_Runic = G_UNICODE_SCRIPT_RUNIC,
727 + ucp_Shavian = G_UNICODE_SCRIPT_SHAVIAN,
728 + ucp_Sinhala = G_UNICODE_SCRIPT_SINHALA,
729 + ucp_Syloti_Nagri = G_UNICODE_SCRIPT_SYLOTI_NAGRI,
730 + ucp_Syriac = G_UNICODE_SCRIPT_SYRIAC,
731 + ucp_Tagalog = G_UNICODE_SCRIPT_TAGALOG,
732 + ucp_Tagbanwa = G_UNICODE_SCRIPT_TAGBANWA,
733 + ucp_Tai_Le = G_UNICODE_SCRIPT_TAI_LE,
734 + ucp_Tamil = G_UNICODE_SCRIPT_TAMIL,
735 + ucp_Telugu = G_UNICODE_SCRIPT_TELUGU,
736 + ucp_Thaana = G_UNICODE_SCRIPT_THAANA,
737 + ucp_Thai = G_UNICODE_SCRIPT_THAI,
738 + ucp_Tibetan = G_UNICODE_SCRIPT_TIBETAN,
739 + ucp_Tifinagh = G_UNICODE_SCRIPT_TIFINAGH,
740 + ucp_Ugaritic = G_UNICODE_SCRIPT_UGARITIC,
741 + ucp_Yi = G_UNICODE_SCRIPT_YI,
742 /* New for Unicode 5.0: */
748 + ucp_Balinese = G_UNICODE_SCRIPT_BALINESE,
749 + ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM,
750 + ucp_Nko = G_UNICODE_SCRIPT_NKO,
751 + ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA,
752 + ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN,
753 /* New for Unicode 5.1: */
765 + ucp_Carian = G_UNICODE_SCRIPT_CARIAN,
766 + ucp_Cham = G_UNICODE_SCRIPT_CHAM,
767 + ucp_Kayah_Li = G_UNICODE_SCRIPT_KAYAH_LI,
768 + ucp_Lepcha = G_UNICODE_SCRIPT_LEPCHA,
769 + ucp_Lycian = G_UNICODE_SCRIPT_LYCIAN,
770 + ucp_Lydian = G_UNICODE_SCRIPT_LYDIAN,
771 + ucp_Ol_Chiki = G_UNICODE_SCRIPT_OL_CHIKI,
772 + ucp_Rejang = G_UNICODE_SCRIPT_REJANG,
773 + ucp_Saurashtra = G_UNICODE_SCRIPT_SAURASHTRA,
774 + ucp_Sundanese = G_UNICODE_SCRIPT_SUNDANESE,
775 + ucp_Vai = G_UNICODE_SCRIPT_VAI,
776 /* New for Unicode 5.2: */
779 - ucp_Egyptian_Hieroglyphs,
780 - ucp_Imperial_Aramaic,
781 - ucp_Inscriptional_Pahlavi,
782 - ucp_Inscriptional_Parthian,
787 - ucp_Old_South_Arabian,
792 + ucp_Avestan = G_UNICODE_SCRIPT_AVESTAN,
793 + ucp_Bamum = G_UNICODE_SCRIPT_BAMUM,
794 + ucp_Egyptian_Hieroglyphs = G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS,
795 + ucp_Imperial_Aramaic = G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC,
796 + ucp_Inscriptional_Pahlavi = G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI,
797 + ucp_Inscriptional_Parthian = G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN,
798 + ucp_Javanese = G_UNICODE_SCRIPT_JAVANESE,
799 + ucp_Kaithi = G_UNICODE_SCRIPT_KAITHI,
800 + ucp_Lisu = G_UNICODE_SCRIPT_LISU,
801 + ucp_Meetei_Mayek = G_UNICODE_SCRIPT_MEETEI_MAYEK,
802 + ucp_Old_South_Arabian = G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN,
803 + ucp_Old_Turkic = G_UNICODE_SCRIPT_OLD_TURKIC,
804 + ucp_Samaritan = G_UNICODE_SCRIPT_SAMARITAN,
805 + ucp_Tai_Tham = G_UNICODE_SCRIPT_TAI_THAM,
806 + ucp_Tai_Viet = G_UNICODE_SCRIPT_TAI_VIET,
807 /* New for Unicode 6.0.0: */
811 + ucp_Batak = G_UNICODE_SCRIPT_BATAK,
812 + ucp_Brahmi = G_UNICODE_SCRIPT_BRAHMI,
813 + ucp_Mandaic = G_UNICODE_SCRIPT_MANDAIC,
814 /* New for Unicode 6.1.0: */
816 - ucp_Meroitic_Cursive,
817 - ucp_Meroitic_Hieroglyphs,
822 + ucp_Chakma = G_UNICODE_SCRIPT_CHAKMA,
823 + ucp_Meroitic_Cursive = G_UNICODE_SCRIPT_MEROITIC_CURSIVE,
824 + ucp_Meroitic_Hieroglyphs = G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS,
825 + ucp_Miao = G_UNICODE_SCRIPT_MIAO,
826 + ucp_Sharada = G_UNICODE_SCRIPT_SHARADA,
827 + ucp_Sora_Sompeng = G_UNICODE_SCRIPT_SORA_SOMPENG,
828 + ucp_Takri = G_UNICODE_SCRIPT_TAKRI,
833 1.7.5.1.217.g4e3aa.dirty