3 # Note: This file is taken, and modified, from gucharmap/gen-guch-unicode-tables.pl - svn revision 1040
7 # generates in the current directory:
10 # - unicode-nameslist.h
12 # - UnicodeCategories.h
15 # usage: ./gen-guch-unicode-tables.pl UNICODE-VERSION DIRECTORY
16 # where DIRECTORY contains UnicodeData.txt Unihan.zip NamesList.txt Blocks.txt Scripts.txt
20 use vars
('$UNZIP', '$ICONV');
22 # if these things aren't in your path you can put full paths to them here
26 sub process_unicode_data_txt
($);
27 sub process_unihan_zip
($);
28 sub process_nameslist_txt
($);
29 sub process_blocks_txt
($);
30 sub process_scripts_txt
($);
32 $| = 1; # flush stdout buffer
39 Usage: $0 UNICODE-VERSION DIRECTORY
41 DIRECTORY should contain the following Unicode data files:
42 UnicodeData.txt Unihan.zip NamesList.txt Blocks.txt Scripts.txt
44 which can be found at http://www.unicode.org/Public/UNIDATA/
49 my ($unicodedata_txt, $unihan_zip, $nameslist_txt, $blocks_txt, $scripts_txt);
53 opendir (my $dir, $d) or die "Cannot open Unicode data dir $d: $!\n";
54 for my $f (readdir ($dir))
56 $unicodedata_txt = "$d/$f" if ($f =~ /UnicodeData
.*\
.txt
/);
57 # $unihan_zip = "$d/$f" if ($f =~ /Unihan.*\.zip/);
58 # $nameslist_txt = "$d/$f" if ($f =~ /NamesList.*\.txt/);
59 $blocks_txt = "$d/$f" if ($f =~ /Blocks
.*\
.txt
/);
60 $scripts_txt = "$d/$f" if ($f =~ /Scripts
.*\
.txt
/);
63 defined $unicodedata_txt or die "Did not find $d/UnicodeData.txt";
64 # defined $unihan_zip or die "Did not find $d/Unihan.zip";
65 # defined $nameslist_txt or die "Did not find $d/NamesList.txt";
66 defined $blocks_txt or die "Did not find $d/Blocks.txt";
67 defined $scripts_txt or die "Did not find $d/Scripts.txt";
69 process_unicode_data_txt
($unicodedata_txt);
70 # process_nameslist_txt ($nameslist_txt);
71 process_blocks_txt
($blocks_txt);
72 process_scripts_txt
($scripts_txt);
73 # process_unihan_zip ($unihan_zip);
78 #------------------------#
80 sub process_unicode_data_txt
($)
82 my ($unicodedata_txt) = @_;
86 open (my $unicodedata, $unicodedata_txt) or die;
87 # open (my $out, "> unicode-names.h") or die;
89 print "processing $unicodedata_txt...";
91 # print $out "/* unicode-names.h */\n";
92 # print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";
93 # print $out "/* Generated by $0 */\n";
94 # print $out "/* Generated from UCD version $v */\n\n";
96 # print $out "#ifndef UNICODE_NAMES_H\n";
97 # print $out "#define UNICODE_NAMES_H\n\n";
99 # print $out "#include <glib/gunicode.h>\n\n";
100 # print $out "#include \"gucharmap-intl.h\"\n\n";
105 # while (my $line = <$unicodedata>)
108 # $line =~ /^([^;]+);([^;]+)/ or die;
114 # push @unicode_pairs, [$hex, $name];
117 # print $out "static const char unicode_names_strings[] = \\\n";
121 # foreach my $name (sort keys %names) {
122 # print $out " \"$name\\0\"\n";
123 # $names{$name} = $offset;
124 # $offset += length($name) + 1;
131 # print $out "typedef struct _UnicodeName UnicodeName;\n\n";
133 # print $out "static const struct _UnicodeName\n";
135 # print $out " gunichar index;\n";
136 # print $out " guint32 name_offset;\n";
138 # print $out "unicode_names[] =\n";
141 # my $first_line = 1;
143 # foreach my $pair (@unicode_pairs) {
144 # if (!$first_line) {
150 # my ($hex, $name) = @{$pair};
151 # my $offset = $names{$name};
152 # print $out " {0x$hex, $offset}";
155 # print $out "\n};\n\n";
158 # static inline const char * unicode_name_get_name(const UnicodeName *entry)
160 # guint32 offset = entry->name_offset;
161 # return unicode_names_strings + offset;
166 # print $out "#endif /* #ifndef UNICODE_NAMES_H */\n";
169 # undef @unicode_pairs;
171 # close ($unicodedata);
176 open ($unicodedata, $unicodedata_txt) or die;
177 open (my $out, "> UnicodeCategories.h") or die;
179 # Map general category code onto symbolic name.
183 'Lu' => "UNICODE_UPPERCASE_LETTER",
184 'Ll' => "UNICODE_LOWERCASE_LETTER",
185 'Lt' => "UNICODE_TITLECASE_LETTER",
186 'Mn' => "UNICODE_NON_SPACING_MARK",
187 'Mc' => "UNICODE_COMBINING_MARK",
188 'Me' => "UNICODE_ENCLOSING_MARK",
189 'Nd' => "UNICODE_DECIMAL_NUMBER",
190 'Nl' => "UNICODE_LETTER_NUMBER",
191 'No' => "UNICODE_OTHER_NUMBER",
192 'Zs' => "UNICODE_SPACE_SEPARATOR",
193 'Zl' => "UNICODE_LINE_SEPARATOR",
194 'Zp' => "UNICODE_PARAGRAPH_SEPARATOR",
195 'Cc' => "UNICODE_CONTROL",
196 'Cf' => "UNICODE_FORMAT",
197 'Cs' => "UNICODE_SURROGATE",
198 'Co' => "UNICODE_PRIVATE_USE",
199 'Cn' => "UNICODE_UNASSIGNED",
202 'Lm' => "UNICODE_MODIFIER_LETTER",
203 'Lo' => "UNICODE_OTHER_LETTER",
204 'Pc' => "UNICODE_CONNECT_PUNCTUATION",
205 'Pd' => "UNICODE_DASH_PUNCTUATION",
206 'Ps' => "UNICODE_OPEN_PUNCTUATION",
207 'Pe' => "UNICODE_CLOSE_PUNCTUATION",
208 'Pi' => "UNICODE_INITIAL_PUNCTUATION",
209 'Pf' => "UNICODE_FINAL_PUNCTUATION",
210 'Po' => "UNICODE_OTHER_PUNCTUATION",
211 'Sm' => "UNICODE_MATH_SYMBOL",
212 'Sc' => "UNICODE_CURRENCY_SYMBOL",
213 'Sk' => "UNICODE_MODIFIER_SYMBOL",
214 'So' => "UNICODE_OTHER_SYMBOL"
217 # these shouldn't be -1
218 my ($codepoint, $last_codepoint, $start_codepoint) = (-999, -999, -999);
220 my ($category, $last_category) = ("FAKE1", "FAKE2");
221 my ($started_range, $finished_range) = (undef, undef);
223 print $out "/* UnicodeCategories.h */\n";
224 print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";
225 print $out "/* Generated by $0 */\n";
226 print $out "/* Generated from UCD version $v */\n\n";
228 print $out "#ifndef UNICODE_CATEGORIES_H\n";
229 print $out "#define UNICODE_CATEGORIES_H\n\n";
230 print $out "#include <QtCore/qglobal.h>\n\n";
231 print $out "enum EUnicodeCategory\n";
233 print $out " UNICODE_UPPERCASE_LETTER,\n";
234 print $out " UNICODE_LOWERCASE_LETTER,\n";
235 print $out " UNICODE_TITLECASE_LETTER,\n";
236 print $out " UNICODE_NON_SPACING_MARK,\n";
237 print $out " UNICODE_COMBINING_MARK,\n";
238 print $out " UNICODE_ENCLOSING_MARK,\n";
239 print $out " UNICODE_DECIMAL_NUMBER,\n";
240 print $out " UNICODE_LETTER_NUMBER,\n";
241 print $out " UNICODE_OTHER_NUMBER,\n";
242 print $out " UNICODE_SPACE_SEPARATOR,\n";
243 print $out " UNICODE_LINE_SEPARATOR,\n";
244 print $out " UNICODE_PARAGRAPH_SEPARATOR,\n";
245 print $out " UNICODE_CONTROL,\n";
246 print $out " UNICODE_FORMAT,\n";
247 print $out " UNICODE_SURROGATE,\n";
248 print $out " UNICODE_PRIVATE_USE,\n";
249 print $out " UNICODE_UNASSIGNED,\n";
250 print $out " UNICODE_MODIFIER_LETTER,\n";
251 print $out " UNICODE_OTHER_LETTER,\n";
252 print $out " UNICODE_CONNECT_PUNCTUATION,\n";
253 print $out " UNICODE_DASH_PUNCTUATION,\n";
254 print $out " UNICODE_OPEN_PUNCTUATION,\n";
255 print $out " UNICODE_CLOSE_PUNCTUATION,\n";
256 print $out " UNICODE_INITIAL_PUNCTUATION,\n";
257 print $out " UNICODE_FINAL_PUNCTUATION,\n";
258 print $out " UNICODE_OTHER_PUNCTUATION,\n";
259 print $out " UNICODE_MATH_SYMBOL,\n";
260 print $out " UNICODE_CURRENCY_SYMBOL,\n";
261 print $out " UNICODE_MODIFIER_SYMBOL,\n";
262 print $out " UNICODE_OTHER_SYMBOL,\n";
264 print $out " UNICODE_INVALID\n";
266 print $out "struct TUnicodeCategory\n";
268 print $out " quint32 start;\n";
269 print $out " quint32 end;\n";
270 print $out " EUnicodeCategory category;\n";
272 print $out "static const TUnicodeCategory constUnicodeCategoryList[] =\n";
275 while (my $line = <$unicodedata>)
277 $line =~ /^([0-9A-F]*);([^;]*);([^;]*);/ or die;
278 my $codepoint = hex ($1);
280 my $category = $mappings{$3};
283 or ($category ne $last_category)
284 or (not $started_range and $codepoint != $last_codepoint + 1))
286 if ($last_codepoint >= 0) {
287 printf $out (" { 0x%4.4X, 0x%4.4X, \%s },\n", $start_codepoint, $last_codepoint, $last_category);
290 $start_codepoint = $codepoint;
293 if ($name =~ /^<.*First>$/) {
295 $finished_range = undef;
297 elsif ($name =~ /^<.*Last>$/) {
298 $started_range = undef;
301 elsif ($finished_range) {
302 $finished_range = undef;
305 $last_codepoint = $codepoint;
306 $last_category = $category;
308 printf $out (" { 0x%4.4X, 0x%4.4X, \%s },\n", $start_codepoint, $last_codepoint, $last_category);
309 printf $out " { 0x0, 0x0, UNICODE_INVALID }\n";
312 print $out "#endif\n";
318 #------------------------#
320 # XXX should do kFrequency too
321 sub process_unihan_zip
($)
323 my ($unihan_zip) = @_;
325 open (my $unihan, "$UNZIP -c $unihan_zip |") or die;
326 open (my $out, "> unicode-unihan.h") or die;
328 print "processing $unihan_zip";
330 print $out "/* unicode-unihan.h */\n";
331 print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";
332 print $out "/* Generated by $0 */\n";
333 print $out "/* Generated from UCD version $v */\n\n";
335 print $out "#ifndef UNICODE_UNIHAN_H\n";
336 print $out "#define UNICODE_UNIHAN_H\n\n";
338 print $out "#include <glib/gunicode.h>\n\n";
340 print $out "typedef struct _Unihan Unihan;\n\n";
342 print $out "static const struct _Unihan\n";
344 print $out " gunichar index;\n";
345 print $out " gint32 kDefinition;\n";
346 print $out " gint32 kCantonese;\n";
347 print $out " gint32 kMandarin;\n";
348 print $out " gint32 kTang;\n";
349 print $out " gint32 kKorean;\n";
350 print $out " gint32 kJapaneseKun;\n";
351 print $out " gint32 kJapaneseOn;\n";
353 print $out "unihan[] =\n";
360 my ($kDefinition, $kCantonese, $kMandarin, $kTang, $kKorean, $kJapaneseKun, $kJapaneseOn);
363 while (my $line = <$unihan>)
366 $line =~ /^U\+([0-9A-F]+)\s+([^\s]+)\s+(.+)$/ or next;
368 my $new_wc = hex ($1);
372 $value =~ s/\\/\\\\/g;
373 $value =~ s/\"/\\"/g;
377 if (defined $kDefinition or defined $kCantonese or defined $kMandarin
378 or defined $kTang or defined $kKorean or defined $kJapaneseKun
379 or defined $kJapaneseOn)
381 printf $out (" { 0x%04X, \%d, \%d, \%d, \%d, \%d, \%d, \%d },\n",
383 (defined($kDefinition) ?
$kDefinition : -1),
384 (defined($kCantonese) ?
$kCantonese: -1),
385 (defined($kMandarin) ?
$kMandarin : -1),
386 (defined($kTang) ?
$kTang : -1),
387 (defined($kKorean) ?
$kKorean : -1),
388 (defined($kJapaneseKun) ?
$kJapaneseKun : -1),
389 (defined($kJapaneseOn) ?
$kJapaneseOn : -1));
403 for my $f qw(kDefinition kCantonese kMandarin
404 kTang kKorean kJapaneseKun kJapaneseOn) {
407 push @strings, $value;
408 my $last_offset = $offset;
409 $offset += length($value) + 1;
410 $value = $last_offset;
415 if ($field eq "kDefinition") {
416 $kDefinition = $value;
418 elsif ($field eq "kCantonese") {
419 $kCantonese = $value;
421 elsif ($field eq "kMandarin") {
424 elsif ($field eq "kTang") {
427 elsif ($field eq "kKorean") {
430 elsif ($field eq "kJapaneseKun") {
431 $kJapaneseKun = $value;
433 elsif ($field eq "kJapaneseOn") {
434 $kJapaneseOn = $value;
437 if ($i++ % 32768 == 0) {
444 print $out "static const char unihan_strings[] = \\\n";
446 for my $s (@strings) {
447 print $out " \"$s\\0\"\n";
451 print $out "static const Unihan *_get_unihan (gunichar uc)\n;";
453 for my $name qw(kDefinition kCantonese kMandarin
454 kTang kKorean kJapaneseKun kJapaneseOn) {
457 static inline const char * unihan_get_$name (const Unihan *uh)
459 gint32 offset = uh->$name;
462 return unihan_strings + offset;
465 G_CONST_RETURN gchar *
466 gucharmap_get_unicode_$name (gunichar uc)
468 const Unihan *uh = _get_unihan (uc);
472 return unihan_get_$name (uh);
478 print $out "#endif /* #ifndef UNICODE_UNIHAN_H */\n";
486 #------------------------#
490 # 0x0027 => { '=' => {
492 # 'values' => [ 'APOSTROPHE-QUOTE', 'APL quote' ]
496 # 'values' => [ 'neutral (vertical) glyph with mixed usage',
497 # '2019 is preferred for apostrophe',
498 # 'preferred characters in English for paired quotation marks are 2018 & 2019'
506 sub process_nameslist_txt
($)
508 my ($nameslist_txt) = @_;
510 open (my $nameslist, "$ICONV -f 'ISO8859-1' -t 'UTF-8' $nameslist_txt |") or die;
512 print "processing $nameslist_txt...";
514 my ($equal_i, $ex_i, $star_i, $pound_i, $colon_i) = (0, 0, 0, 0, 0);
519 while (my $line = <$nameslist>)
527 elsif ($line =~ /^([0-9A-F]+)/)
531 elsif ($line =~ /^\s+=\s+(.+)$/)
534 $value =~ s/\\/\\\\/g;
535 $value =~ s/\"/\\"/g;
537 if (not defined $nameslist_hash->{$wc}->{'='}->{'index'}) {
538 $nameslist_hash->{$wc}->{'='}->{'index'} = $equal_i;
540 push (@
{$nameslist_hash->{$wc}->{'='}->{'values'}}, $value);
544 elsif ($line =~ /^\s+\*\s+(.+)$/)
547 $value =~ s/\\/\\\\/g;
548 $value =~ s/\"/\\"/g;
550 if (not defined $nameslist_hash->{$wc}->{'*'}->{'index'}) {
551 $nameslist_hash->{$wc}->{'*'}->{'index'} = $star_i;
553 push (@
{$nameslist_hash->{$wc}->{'*'}->{'values'}}, $value);
557 elsif ($line =~ /^\s+#\s+(.+)$/)
560 $value =~ s/\\/\\\\/g;
561 $value =~ s/\"/\\"/g;
563 if (not defined $nameslist_hash->{$wc}->{'#'}->{'index'}) {
564 $nameslist_hash->{$wc}->{'#'}->{'index'} = $pound_i;
566 push (@
{$nameslist_hash->{$wc}->{'#'}->{'values'}}, $value);
570 elsif ($line =~ /^\s+:\s+(.+)$/)
573 $value =~ s/\\/\\\\/g;
574 $value =~ s/\"/\\"/g;
576 if (not defined $nameslist_hash->{$wc}->{':'}->{'index'}) {
577 $nameslist_hash->{$wc}->{':'}->{'index'} = $colon_i;
579 push (@
{$nameslist_hash->{$wc}->{':'}->{'values'}}, $value);
583 elsif ($line =~ /^\s+x\s+.*([0-9A-F]{4,6})\)$/) # this one is different
585 my $value = hex ($1);
587 if (not defined $nameslist_hash->{$wc}->{'x'}->{'index'}) {
588 $nameslist_hash->{$wc}->{'x'}->{'index'} = $ex_i;
590 push (@
{$nameslist_hash->{$wc}->{'x'}->{'values'}}, $value);
598 open (my $out, "> unicode-nameslist.h") or die;
600 print $out "/* unicode-nameslist.h */\n";
601 print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";
602 print $out "/* Generated by $0 */\n";
603 print $out "/* Generated from UCD version $v */\n\n";
605 print $out "#ifndef UNICODE_NAMESLIST_H\n";
606 print $out "#define UNICODE_NAMESLIST_H\n\n";
608 print $out "#include <glib/gunicode.h>\n\n";
610 print $out "typedef struct _UnicharString UnicharString;\n";
611 print $out "typedef struct _UnicharUnichar UnicharUnichar;\n";
612 print $out "typedef struct _NamesList NamesList;\n\n";
614 print $out "struct _UnicharString\n";
616 print $out " gunichar index;\n";
617 print $out " const gchar *value;\n";
618 print $out "}; \n\n";
620 print $out "struct _UnicharUnichar\n";
622 print $out " gunichar index;\n";
623 print $out " gunichar value;\n";
624 print $out "}; \n\n";
626 print $out "struct _NamesList\n";
628 print $out " gunichar index;\n";
629 print $out " gint equals_index; /* -1 means */\n";
630 print $out " gint stars_index; /* this character */\n";
631 print $out " gint exes_index; /* doesn't */\n";
632 print $out " gint pounds_index; /* have any */\n";
633 print $out " gint colons_index;\n";
636 print $out "static const UnicharString names_list_equals[] = \n";
638 for $wc (sort {$a <=> $b} keys %{$nameslist_hash})
640 next if not exists $nameslist_hash->{$wc}->{'='};
641 for my $value (@
{$nameslist_hash->{$wc}->{'='}->{'values'}}) {
642 printf $out (qq/ { 0x%04X, "\%s" },\n/, $wc, $value);
645 print $out " { (gunichar)(-1), 0 }\n";
648 print $out "static const UnicharString names_list_stars[] = \n";
650 for $wc (sort {$a <=> $b} keys %{$nameslist_hash})
652 next if not exists $nameslist_hash->{$wc}->{'*'};
653 for my $value (@
{$nameslist_hash->{$wc}->{'*'}->{'values'}}) {
654 printf $out (qq/ { 0x%04X, "\%s" },\n/, $wc, $value);
657 print $out " { (gunichar)(-1), 0 }\n";
660 print $out "static const UnicharString names_list_pounds[] = \n";
662 for $wc (sort {$a <=> $b} keys %{$nameslist_hash})
664 next if not exists $nameslist_hash->{$wc}->{'#'};
665 for my $value (@
{$nameslist_hash->{$wc}->{'#'}->{'values'}}) {
666 printf $out (qq/ { 0x%04X, "\%s" },\n/, $wc, $value);
669 print $out " { (gunichar)(-1), 0 }\n";
672 print $out "static const UnicharUnichar names_list_exes[] = \n";
674 for $wc (sort {$a <=> $b} keys %{$nameslist_hash})
676 next if not exists $nameslist_hash->{$wc}->{'x'};
677 for my $value (@
{$nameslist_hash->{$wc}->{'x'}->{'values'}}) {
678 printf $out (qq/ { 0x%04X, 0x%04X },\n/, $wc, $value);
681 print $out " { (gunichar)(-1), 0 }\n";
684 print $out "static const UnicharString names_list_colons[] = \n";
686 for $wc (sort {$a <=> $b} keys %{$nameslist_hash})
688 next if not exists $nameslist_hash->{$wc}->{':'};
689 for my $value (@
{$nameslist_hash->{$wc}->{':'}->{'values'}}) {
690 printf $out (qq/ { 0x%04X, "\%s" },\n/, $wc, $value);
693 print $out " { (gunichar)(-1), 0 }\n";
696 print $out "static const NamesList names_list[] =\n";
698 for $wc (sort {$a <=> $b} keys %{$nameslist_hash})
700 my $eq = exists $nameslist_hash->{$wc}->{'='}->{'index'} ?
$nameslist_hash->{$wc}->{'='}->{'index'} : -1;
701 my $star = exists $nameslist_hash->{$wc}->{'*'}->{'index'} ?
$nameslist_hash->{$wc}->{'*'}->{'index'} : -1;
702 my $ex = exists $nameslist_hash->{$wc}->{'x'}->{'index'} ?
$nameslist_hash->{$wc}->{'x'}->{'index'} : -1;
703 my $pound = exists $nameslist_hash->{$wc}->{'#'}->{'index'} ?
$nameslist_hash->{$wc}->{'#'}->{'index'} : -1;
704 my $colon = exists $nameslist_hash->{$wc}->{':'}->{'index'} ?
$nameslist_hash->{$wc}->{':'}->{'index'} : -1;
706 printf $out (" { 0x%04X, \%d, \%d, \%d, \%d, \%d },\n", $wc, $eq, $star, $ex, $pound, $colon);
710 print $out "#endif /* #ifndef UNICODE_NAMESLIST_H */\n";
717 #------------------------#
719 sub process_blocks_txt
($)
721 my ($blocks_txt) = @_;
723 open (my $blocks, $blocks_txt) or die;
724 open (my $out, "> UnicodeBlocks.h") or die;
726 print "processing $blocks_txt...";
728 print $out "/* UnicodeBlocks.h */\n";
729 print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";
730 print $out "/* Generated by $0 */\n";
731 print $out "/* Generated from UCD version $v */\n\n";
733 print $out "#ifndef __UNICODE_BLOCKS_H__\n";
734 print $out "#define __UNICODE_BLOCKS_H__\n\n";
736 print $out "#include <QtCore/qglobal.h>\n";
737 print $out "#include <klocalizedstring.h>\n\n";
739 print $out "struct TUnicodeBlock\n";
741 print $out " quint32 start,\n";
742 print $out " end;\n";
743 print $out " const char *blockName;\n";
745 print $out "static const struct TUnicodeBlock constUnicodeBlocks[] =\n";
747 while (my $line = <$blocks>)
749 $line =~ /^([0-9A-F]+)\.\.([0-9A-F]+); (.+)$/ or next;
750 print $out qq/ { 0x$1, 0x$2, I18N_NOOP("$3") },\n/;
752 print $out " { 0x0, 0x0, NULL }\n";
755 print $out "#endif\n\n";
763 #------------------------#
765 sub process_scripts_txt
($)
767 my ($scripts_txt) = @_;
772 open (my $scripts, $scripts_txt) or die;
773 open (my $out, "> UnicodeScripts.h") or die;
775 print "processing $scripts_txt...";
777 while (my $line = <$scripts>)
779 my ($start, $end, $raw_script);
781 if ($line =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s+;\s+(\S+)/)
787 elsif ($line =~ /^([0-9A-F]+)\s+;\s+(\S+)/)
798 my $script = $raw_script;
800 $script =~ s/(\w+)/\u\L$1/g;
802 $script_hash{$start} = { 'end' => $end, 'script' => $script };
803 $scripts{$script} = 1;
808 # Adds Common to make sure works with UCD <= 4.0.0
809 $scripts{"Common"} = 1;
811 print $out "/* UnicodeScripts.h */\n";
812 print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";
813 print $out "/* Generated by $0 */\n";
814 print $out "/* Generated from UCD version $v */\n\n";
816 print $out "#ifndef __UNICODE_SCRIPTS_H__\n";
817 print $out "#define __UNICODE_SCRIPTS_H__\n\n";
819 print $out "#include <QtCore/qglobal.h>\n";
820 print $out "#include <klocalizedstring.h>\n\n";
822 print $out "static const char *constUnicodeScriptList[] =\n";
825 for my $script (sort keys %scripts)
827 $scripts{$script} = $i;
828 print $out qq/ I18N_NOOP("$script"),\n/;
831 print $out " NULL\n";
834 print $out "struct TUnicodeScript\n";
836 print $out " quint32 start,\n";
837 print $out " end;\n";
838 print $out " int scriptIndex; /* index into constUnicodeScriptList */\n";
840 print $out "static const TUnicodeScript constUnicodeScripts[] =\n";
842 for my $start (sort { $a <=> $b } keys %script_hash)
844 printf $out (qq/ { 0x%04X, 0x%04X, \%2d },\n/,
845 $start, $script_hash{$start}->{'end'}, $scripts{$script_hash{$start}->{'script'}});
847 printf $out " { 0x0, 0x0, -1 }\n";
850 print $out "#endif\n\n";