add more spacing
[personal-kdebase.git] / workspace / kcontrol / kfontinst / viewpart / generate-unicode-tables.pl
blob085eca99000d38293fcc065f90cd5ba4cb89afb0
1 #!/usr/bin/perl -w
3 # Note: This file is taken, and modified, from gucharmap/gen-guch-unicode-tables.pl - svn revision 1040
5 # $Id$
7 # generates in the current directory:
8 # - UnicodeBlocks.h
9 # - unicode-names.h
10 # - unicode-nameslist.h
11 # - unicode-unihan.h
12 # - UnicodeCategories.h
13 # - UnicodeScripts.h
15 # usage: ./gen-guch-unicode-tables.pl UNICODE-VERSION DIRECTORY
16 # where DIRECTORY contains UnicodeData.txt Unihan.zip NamesList.txt Blocks.txt Scripts.txt
19 use strict;
20 use vars ('$UNZIP', '$ICONV');
22 # if these things aren't in your path you can put full paths to them here
23 $UNZIP = 'unzip';
24 $ICONV = 'iconv';
26 sub process_unicode_data_txt ($);
27 sub process_unihan_zip ($);
28 sub process_nameslist_txt ($);
29 sub process_blocks_txt ($);
30 sub process_scripts_txt ($);
32 $| = 1; # flush stdout buffer
34 if (@ARGV != 2)
36 $0 =~ s@.*/@@;
37 die <<EOF
39 Usage: $0 UNICODE-VERSION DIRECTORY
41 DIRECTORY should contain the following Unicode data files:
42 UnicodeData.txt Unihan.zip NamesList.txt Blocks.txt Scripts.txt
44 which can be found at http://www.unicode.org/Public/UNIDATA/
46 EOF
49 my ($unicodedata_txt, $unihan_zip, $nameslist_txt, $blocks_txt, $scripts_txt);
51 my $v = $ARGV[0];
52 my $d = $ARGV[1];
53 opendir (my $dir, $d) or die "Cannot open Unicode data dir $d: $!\n";
54 for my $f (readdir ($dir))
56 $unicodedata_txt = "$d/$f" if ($f =~ /UnicodeData.*\.txt/);
57 # $unihan_zip = "$d/$f" if ($f =~ /Unihan.*\.zip/);
58 # $nameslist_txt = "$d/$f" if ($f =~ /NamesList.*\.txt/);
59 $blocks_txt = "$d/$f" if ($f =~ /Blocks.*\.txt/);
60 $scripts_txt = "$d/$f" if ($f =~ /Scripts.*\.txt/);
63 defined $unicodedata_txt or die "Did not find $d/UnicodeData.txt";
64 # defined $unihan_zip or die "Did not find $d/Unihan.zip";
65 # defined $nameslist_txt or die "Did not find $d/NamesList.txt";
66 defined $blocks_txt or die "Did not find $d/Blocks.txt";
67 defined $scripts_txt or die "Did not find $d/Scripts.txt";
69 process_unicode_data_txt ($unicodedata_txt);
70 # process_nameslist_txt ($nameslist_txt);
71 process_blocks_txt ($blocks_txt);
72 process_scripts_txt ($scripts_txt);
73 # process_unihan_zip ($unihan_zip);
75 exit;
78 #------------------------#
80 sub process_unicode_data_txt ($)
82 my ($unicodedata_txt) = @_;
84 # part 1: names
86 open (my $unicodedata, $unicodedata_txt) or die;
87 # open (my $out, "> unicode-names.h") or die;
89 print "processing $unicodedata_txt...";
91 # print $out "/* unicode-names.h */\n";
92 # print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";
93 # print $out "/* Generated by $0 */\n";
94 # print $out "/* Generated from UCD version $v */\n\n";
96 # print $out "#ifndef UNICODE_NAMES_H\n";
97 # print $out "#define UNICODE_NAMES_H\n\n";
99 # print $out "#include <glib/gunicode.h>\n\n";
100 # print $out "#include \"gucharmap-intl.h\"\n\n";
102 # my @unicode_pairs;
103 # my %names;
105 # while (my $line = <$unicodedata>)
107 # chomp $line;
108 # $line =~ /^([^;]+);([^;]+)/ or die;
110 # my $hex = $1;
111 # my $name = $2;
113 # $names{$name} = 1;
114 # push @unicode_pairs, [$hex, $name];
117 # print $out "static const char unicode_names_strings[] = \\\n";
119 # my $offset = 0;
121 # foreach my $name (sort keys %names) {
122 # print $out " \"$name\\0\"\n";
123 # $names{$name} = $offset;
124 # $offset += length($name) + 1;
127 # undef $offset;
129 # print $out ";\n";
131 # print $out "typedef struct _UnicodeName UnicodeName;\n\n";
133 # print $out "static const struct _UnicodeName\n";
134 # print $out "{\n";
135 # print $out " gunichar index;\n";
136 # print $out " guint32 name_offset;\n";
137 # print $out "} \n";
138 # print $out "unicode_names[] =\n";
139 # print $out "{\n";
141 # my $first_line = 1;
143 # foreach my $pair (@unicode_pairs) {
144 # if (!$first_line) {
145 # print $out ",\n";
146 # } else {
147 # $first_line = 0;
150 # my ($hex, $name) = @{$pair};
151 # my $offset = $names{$name};
152 # print $out " {0x$hex, $offset}";
155 # print $out "\n};\n\n";
157 # print $out <<EOT;
158 # static inline const char * unicode_name_get_name(const UnicodeName *entry)
160 # guint32 offset = entry->name_offset;
161 # return unicode_names_strings + offset;
164 # EOT
166 # print $out "#endif /* #ifndef UNICODE_NAMES_H */\n";
168 # undef %names;
169 # undef @unicode_pairs;
171 # close ($unicodedata);
172 # close ($out);
174 # part 2: categories
176 open ($unicodedata, $unicodedata_txt) or die;
177 open (my $out, "> UnicodeCategories.h") or die;
179 # Map general category code onto symbolic name.
180 my %mappings =
182 # Normative.
183 'Lu' => "UNICODE_UPPERCASE_LETTER",
184 'Ll' => "UNICODE_LOWERCASE_LETTER",
185 'Lt' => "UNICODE_TITLECASE_LETTER",
186 'Mn' => "UNICODE_NON_SPACING_MARK",
187 'Mc' => "UNICODE_COMBINING_MARK",
188 'Me' => "UNICODE_ENCLOSING_MARK",
189 'Nd' => "UNICODE_DECIMAL_NUMBER",
190 'Nl' => "UNICODE_LETTER_NUMBER",
191 'No' => "UNICODE_OTHER_NUMBER",
192 'Zs' => "UNICODE_SPACE_SEPARATOR",
193 'Zl' => "UNICODE_LINE_SEPARATOR",
194 'Zp' => "UNICODE_PARAGRAPH_SEPARATOR",
195 'Cc' => "UNICODE_CONTROL",
196 'Cf' => "UNICODE_FORMAT",
197 'Cs' => "UNICODE_SURROGATE",
198 'Co' => "UNICODE_PRIVATE_USE",
199 'Cn' => "UNICODE_UNASSIGNED",
201 # Informative.
202 'Lm' => "UNICODE_MODIFIER_LETTER",
203 'Lo' => "UNICODE_OTHER_LETTER",
204 'Pc' => "UNICODE_CONNECT_PUNCTUATION",
205 'Pd' => "UNICODE_DASH_PUNCTUATION",
206 'Ps' => "UNICODE_OPEN_PUNCTUATION",
207 'Pe' => "UNICODE_CLOSE_PUNCTUATION",
208 'Pi' => "UNICODE_INITIAL_PUNCTUATION",
209 'Pf' => "UNICODE_FINAL_PUNCTUATION",
210 'Po' => "UNICODE_OTHER_PUNCTUATION",
211 'Sm' => "UNICODE_MATH_SYMBOL",
212 'Sc' => "UNICODE_CURRENCY_SYMBOL",
213 'Sk' => "UNICODE_MODIFIER_SYMBOL",
214 'So' => "UNICODE_OTHER_SYMBOL"
217 # these shouldn't be -1
218 my ($codepoint, $last_codepoint, $start_codepoint) = (-999, -999, -999);
220 my ($category, $last_category) = ("FAKE1", "FAKE2");
221 my ($started_range, $finished_range) = (undef, undef);
223 print $out "/* UnicodeCategories.h */\n";
224 print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";
225 print $out "/* Generated by $0 */\n";
226 print $out "/* Generated from UCD version $v */\n\n";
228 print $out "#ifndef UNICODE_CATEGORIES_H\n";
229 print $out "#define UNICODE_CATEGORIES_H\n\n";
230 print $out "#include <QtCore/qglobal.h>\n\n";
231 print $out "enum EUnicodeCategory\n";
232 print $out "{\n";
233 print $out " UNICODE_UPPERCASE_LETTER,\n";
234 print $out " UNICODE_LOWERCASE_LETTER,\n";
235 print $out " UNICODE_TITLECASE_LETTER,\n";
236 print $out " UNICODE_NON_SPACING_MARK,\n";
237 print $out " UNICODE_COMBINING_MARK,\n";
238 print $out " UNICODE_ENCLOSING_MARK,\n";
239 print $out " UNICODE_DECIMAL_NUMBER,\n";
240 print $out " UNICODE_LETTER_NUMBER,\n";
241 print $out " UNICODE_OTHER_NUMBER,\n";
242 print $out " UNICODE_SPACE_SEPARATOR,\n";
243 print $out " UNICODE_LINE_SEPARATOR,\n";
244 print $out " UNICODE_PARAGRAPH_SEPARATOR,\n";
245 print $out " UNICODE_CONTROL,\n";
246 print $out " UNICODE_FORMAT,\n";
247 print $out " UNICODE_SURROGATE,\n";
248 print $out " UNICODE_PRIVATE_USE,\n";
249 print $out " UNICODE_UNASSIGNED,\n";
250 print $out " UNICODE_MODIFIER_LETTER,\n";
251 print $out " UNICODE_OTHER_LETTER,\n";
252 print $out " UNICODE_CONNECT_PUNCTUATION,\n";
253 print $out " UNICODE_DASH_PUNCTUATION,\n";
254 print $out " UNICODE_OPEN_PUNCTUATION,\n";
255 print $out " UNICODE_CLOSE_PUNCTUATION,\n";
256 print $out " UNICODE_INITIAL_PUNCTUATION,\n";
257 print $out " UNICODE_FINAL_PUNCTUATION,\n";
258 print $out " UNICODE_OTHER_PUNCTUATION,\n";
259 print $out " UNICODE_MATH_SYMBOL,\n";
260 print $out " UNICODE_CURRENCY_SYMBOL,\n";
261 print $out " UNICODE_MODIFIER_SYMBOL,\n";
262 print $out " UNICODE_OTHER_SYMBOL,\n";
263 print $out "\n";
264 print $out " UNICODE_INVALID\n";
265 print $out "};\n\n";
266 print $out "struct TUnicodeCategory\n";
267 print $out "{\n";
268 print $out " quint32 start;\n";
269 print $out " quint32 end;\n";
270 print $out " EUnicodeCategory category;\n";
271 print $out "};\n\n";
272 print $out "static const TUnicodeCategory constUnicodeCategoryList[] =\n";
273 print $out "{\n";
275 while (my $line = <$unicodedata>)
277 $line =~ /^([0-9A-F]*);([^;]*);([^;]*);/ or die;
278 my $codepoint = hex ($1);
279 my $name = $2;
280 my $category = $mappings{$3};
282 if ($finished_range
283 or ($category ne $last_category)
284 or (not $started_range and $codepoint != $last_codepoint + 1))
286 if ($last_codepoint >= 0) {
287 printf $out (" { 0x%4.4X, 0x%4.4X, \%s },\n", $start_codepoint, $last_codepoint, $last_category);
290 $start_codepoint = $codepoint;
293 if ($name =~ /^<.*First>$/) {
294 $started_range = 1;
295 $finished_range = undef;
297 elsif ($name =~ /^<.*Last>$/) {
298 $started_range = undef;
299 $finished_range = 1;
301 elsif ($finished_range) {
302 $finished_range = undef;
305 $last_codepoint = $codepoint;
306 $last_category = $category;
308 printf $out (" { 0x%4.4X, 0x%4.4X, \%s },\n", $start_codepoint, $last_codepoint, $last_category);
309 printf $out " { 0x0, 0x0, UNICODE_INVALID }\n";
310 print $out "};\n\n";
312 print $out "#endif\n";
314 close ($out);
315 print " done.\n";
318 #------------------------#
320 # XXX should do kFrequency too
321 sub process_unihan_zip ($)
323 my ($unihan_zip) = @_;
325 open (my $unihan, "$UNZIP -c $unihan_zip |") or die;
326 open (my $out, "> unicode-unihan.h") or die;
328 print "processing $unihan_zip";
330 print $out "/* unicode-unihan.h */\n";
331 print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";
332 print $out "/* Generated by $0 */\n";
333 print $out "/* Generated from UCD version $v */\n\n";
335 print $out "#ifndef UNICODE_UNIHAN_H\n";
336 print $out "#define UNICODE_UNIHAN_H\n\n";
338 print $out "#include <glib/gunicode.h>\n\n";
340 print $out "typedef struct _Unihan Unihan;\n\n";
342 print $out "static const struct _Unihan\n";
343 print $out "{\n";
344 print $out " gunichar index;\n";
345 print $out " gint32 kDefinition;\n";
346 print $out " gint32 kCantonese;\n";
347 print $out " gint32 kMandarin;\n";
348 print $out " gint32 kTang;\n";
349 print $out " gint32 kKorean;\n";
350 print $out " gint32 kJapaneseKun;\n";
351 print $out " gint32 kJapaneseOn;\n";
352 print $out "} \n";
353 print $out "unihan[] =\n";
354 print $out "{\n";
356 my @strings;
357 my $offset = 0;
359 my $wc = 0;
360 my ($kDefinition, $kCantonese, $kMandarin, $kTang, $kKorean, $kJapaneseKun, $kJapaneseOn);
362 my $i = 0;
363 while (my $line = <$unihan>)
365 chomp $line;
366 $line =~ /^U\+([0-9A-F]+)\s+([^\s]+)\s+(.+)$/ or next;
368 my $new_wc = hex ($1);
369 my $field = $2;
371 my $value = $3;
372 $value =~ s/\\/\\\\/g;
373 $value =~ s/\"/\\"/g;
375 if ($new_wc != $wc)
377 if (defined $kDefinition or defined $kCantonese or defined $kMandarin
378 or defined $kTang or defined $kKorean or defined $kJapaneseKun
379 or defined $kJapaneseOn)
381 printf $out (" { 0x%04X, \%d, \%d, \%d, \%d, \%d, \%d, \%d },\n",
382 $wc,
383 (defined($kDefinition) ? $kDefinition : -1),
384 (defined($kCantonese) ? $kCantonese: -1),
385 (defined($kMandarin) ? $kMandarin : -1),
386 (defined($kTang) ? $kTang : -1),
387 (defined($kKorean) ? $kKorean : -1),
388 (defined($kJapaneseKun) ? $kJapaneseKun : -1),
389 (defined($kJapaneseOn) ? $kJapaneseOn : -1));
392 $wc = $new_wc;
394 undef $kDefinition;
395 undef $kCantonese;
396 undef $kMandarin;
397 undef $kTang;
398 undef $kKorean;
399 undef $kJapaneseKun;
400 undef $kJapaneseOn;
403 for my $f qw(kDefinition kCantonese kMandarin
404 kTang kKorean kJapaneseKun kJapaneseOn) {
406 if ($field eq $f) {
407 push @strings, $value;
408 my $last_offset = $offset;
409 $offset += length($value) + 1;
410 $value = $last_offset;
411 last;
415 if ($field eq "kDefinition") {
416 $kDefinition = $value;
418 elsif ($field eq "kCantonese") {
419 $kCantonese = $value;
421 elsif ($field eq "kMandarin") {
422 $kMandarin = $value;
424 elsif ($field eq "kTang") {
425 $kTang = $value;
427 elsif ($field eq "kKorean") {
428 $kKorean = $value;
430 elsif ($field eq "kJapaneseKun") {
431 $kJapaneseKun = $value;
433 elsif ($field eq "kJapaneseOn") {
434 $kJapaneseOn = $value;
437 if ($i++ % 32768 == 0) {
438 print ".";
442 print $out "};\n\n";
444 print $out "static const char unihan_strings[] = \\\n";
446 for my $s (@strings) {
447 print $out " \"$s\\0\"\n";
449 print $out ";\n\n";
451 print $out "static const Unihan *_get_unihan (gunichar uc)\n;";
453 for my $name qw(kDefinition kCantonese kMandarin
454 kTang kKorean kJapaneseKun kJapaneseOn) {
455 print $out <<EOT;
457 static inline const char * unihan_get_$name (const Unihan *uh)
459 gint32 offset = uh->$name;
460 if (offset == -1)
461 return NULL;
462 return unihan_strings + offset;
465 G_CONST_RETURN gchar *
466 gucharmap_get_unicode_$name (gunichar uc)
468 const Unihan *uh = _get_unihan (uc);
469 if (uh == NULL)
470 return NULL;
471 else
472 return unihan_get_$name (uh);
478 print $out "#endif /* #ifndef UNICODE_UNIHAN_H */\n";
480 close ($unihan);
481 close ($out);
483 print " done.\n";
486 #------------------------#
488 # $nameslist_hash =
490 # 0x0027 => { '=' => {
491 # 'index' => 30,
492 # 'values' => [ 'APOSTROPHE-QUOTE', 'APL quote' ]
494 # '*' => {
495 # 'index' => 50,
496 # 'values' => [ 'neutral (vertical) glyph with mixed usage',
497 # '2019 is preferred for apostrophe',
498 # 'preferred characters in English for paired quotation marks are 2018 & 2019'
501 # # etc
503 # # etc
504 # };
506 sub process_nameslist_txt ($)
508 my ($nameslist_txt) = @_;
510 open (my $nameslist, "$ICONV -f 'ISO8859-1' -t 'UTF-8' $nameslist_txt |") or die;
512 print "processing $nameslist_txt...";
514 my ($equal_i, $ex_i, $star_i, $pound_i, $colon_i) = (0, 0, 0, 0, 0);
515 my $wc = 0;
517 my $nameslist_hash;
519 while (my $line = <$nameslist>)
521 chomp ($line);
523 if ($line =~ /^@/)
525 next;
527 elsif ($line =~ /^([0-9A-F]+)/)
529 $wc = hex ($1);
531 elsif ($line =~ /^\s+=\s+(.+)$/)
533 my $value = $1;
534 $value =~ s/\\/\\\\/g;
535 $value =~ s/\"/\\"/g;
537 if (not defined $nameslist_hash->{$wc}->{'='}->{'index'}) {
538 $nameslist_hash->{$wc}->{'='}->{'index'} = $equal_i;
540 push (@{$nameslist_hash->{$wc}->{'='}->{'values'}}, $value);
542 $equal_i++;
544 elsif ($line =~ /^\s+\*\s+(.+)$/)
546 my $value = $1;
547 $value =~ s/\\/\\\\/g;
548 $value =~ s/\"/\\"/g;
550 if (not defined $nameslist_hash->{$wc}->{'*'}->{'index'}) {
551 $nameslist_hash->{$wc}->{'*'}->{'index'} = $star_i;
553 push (@{$nameslist_hash->{$wc}->{'*'}->{'values'}}, $value);
555 $star_i++;
557 elsif ($line =~ /^\s+#\s+(.+)$/)
559 my $value = $1;
560 $value =~ s/\\/\\\\/g;
561 $value =~ s/\"/\\"/g;
563 if (not defined $nameslist_hash->{$wc}->{'#'}->{'index'}) {
564 $nameslist_hash->{$wc}->{'#'}->{'index'} = $pound_i;
566 push (@{$nameslist_hash->{$wc}->{'#'}->{'values'}}, $value);
568 $pound_i++;
570 elsif ($line =~ /^\s+:\s+(.+)$/)
572 my $value = $1;
573 $value =~ s/\\/\\\\/g;
574 $value =~ s/\"/\\"/g;
576 if (not defined $nameslist_hash->{$wc}->{':'}->{'index'}) {
577 $nameslist_hash->{$wc}->{':'}->{'index'} = $colon_i;
579 push (@{$nameslist_hash->{$wc}->{':'}->{'values'}}, $value);
581 $colon_i++;
583 elsif ($line =~ /^\s+x\s+.*([0-9A-F]{4,6})\)$/) # this one is different
585 my $value = hex ($1);
587 if (not defined $nameslist_hash->{$wc}->{'x'}->{'index'}) {
588 $nameslist_hash->{$wc}->{'x'}->{'index'} = $ex_i;
590 push (@{$nameslist_hash->{$wc}->{'x'}->{'values'}}, $value);
592 $ex_i++;
596 close ($nameslist);
598 open (my $out, "> unicode-nameslist.h") or die;
600 print $out "/* unicode-nameslist.h */\n";
601 print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";
602 print $out "/* Generated by $0 */\n";
603 print $out "/* Generated from UCD version $v */\n\n";
605 print $out "#ifndef UNICODE_NAMESLIST_H\n";
606 print $out "#define UNICODE_NAMESLIST_H\n\n";
608 print $out "#include <glib/gunicode.h>\n\n";
610 print $out "typedef struct _UnicharString UnicharString;\n";
611 print $out "typedef struct _UnicharUnichar UnicharUnichar;\n";
612 print $out "typedef struct _NamesList NamesList;\n\n";
614 print $out "struct _UnicharString\n";
615 print $out "{\n";
616 print $out " gunichar index;\n";
617 print $out " const gchar *value;\n";
618 print $out "}; \n\n";
620 print $out "struct _UnicharUnichar\n";
621 print $out "{\n";
622 print $out " gunichar index;\n";
623 print $out " gunichar value;\n";
624 print $out "}; \n\n";
626 print $out "struct _NamesList\n";
627 print $out "{\n";
628 print $out " gunichar index;\n";
629 print $out " gint equals_index; /* -1 means */\n";
630 print $out " gint stars_index; /* this character */\n";
631 print $out " gint exes_index; /* doesn't */\n";
632 print $out " gint pounds_index; /* have any */\n";
633 print $out " gint colons_index;\n";
634 print $out "};\n\n";
636 print $out "static const UnicharString names_list_equals[] = \n";
637 print $out "{\n";
638 for $wc (sort {$a <=> $b} keys %{$nameslist_hash})
640 next if not exists $nameslist_hash->{$wc}->{'='};
641 for my $value (@{$nameslist_hash->{$wc}->{'='}->{'values'}}) {
642 printf $out (qq/ { 0x%04X, "\%s" },\n/, $wc, $value);
645 print $out " { (gunichar)(-1), 0 }\n";
646 print $out "};\n\n";
648 print $out "static const UnicharString names_list_stars[] = \n";
649 print $out "{\n";
650 for $wc (sort {$a <=> $b} keys %{$nameslist_hash})
652 next if not exists $nameslist_hash->{$wc}->{'*'};
653 for my $value (@{$nameslist_hash->{$wc}->{'*'}->{'values'}}) {
654 printf $out (qq/ { 0x%04X, "\%s" },\n/, $wc, $value);
657 print $out " { (gunichar)(-1), 0 }\n";
658 print $out "};\n\n";
660 print $out "static const UnicharString names_list_pounds[] = \n";
661 print $out "{\n";
662 for $wc (sort {$a <=> $b} keys %{$nameslist_hash})
664 next if not exists $nameslist_hash->{$wc}->{'#'};
665 for my $value (@{$nameslist_hash->{$wc}->{'#'}->{'values'}}) {
666 printf $out (qq/ { 0x%04X, "\%s" },\n/, $wc, $value);
669 print $out " { (gunichar)(-1), 0 }\n";
670 print $out "};\n\n";
672 print $out "static const UnicharUnichar names_list_exes[] = \n";
673 print $out "{\n";
674 for $wc (sort {$a <=> $b} keys %{$nameslist_hash})
676 next if not exists $nameslist_hash->{$wc}->{'x'};
677 for my $value (@{$nameslist_hash->{$wc}->{'x'}->{'values'}}) {
678 printf $out (qq/ { 0x%04X, 0x%04X },\n/, $wc, $value);
681 print $out " { (gunichar)(-1), 0 }\n";
682 print $out "};\n\n";
684 print $out "static const UnicharString names_list_colons[] = \n";
685 print $out "{\n";
686 for $wc (sort {$a <=> $b} keys %{$nameslist_hash})
688 next if not exists $nameslist_hash->{$wc}->{':'};
689 for my $value (@{$nameslist_hash->{$wc}->{':'}->{'values'}}) {
690 printf $out (qq/ { 0x%04X, "\%s" },\n/, $wc, $value);
693 print $out " { (gunichar)(-1), 0 }\n";
694 print $out "};\n\n";
696 print $out "static const NamesList names_list[] =\n";
697 print $out "{\n";
698 for $wc (sort {$a <=> $b} keys %{$nameslist_hash})
700 my $eq = exists $nameslist_hash->{$wc}->{'='}->{'index'} ? $nameslist_hash->{$wc}->{'='}->{'index'} : -1;
701 my $star = exists $nameslist_hash->{$wc}->{'*'}->{'index'} ? $nameslist_hash->{$wc}->{'*'}->{'index'} : -1;
702 my $ex = exists $nameslist_hash->{$wc}->{'x'}->{'index'} ? $nameslist_hash->{$wc}->{'x'}->{'index'} : -1;
703 my $pound = exists $nameslist_hash->{$wc}->{'#'}->{'index'} ? $nameslist_hash->{$wc}->{'#'}->{'index'} : -1;
704 my $colon = exists $nameslist_hash->{$wc}->{':'}->{'index'} ? $nameslist_hash->{$wc}->{':'}->{'index'} : -1;
706 printf $out (" { 0x%04X, \%d, \%d, \%d, \%d, \%d },\n", $wc, $eq, $star, $ex, $pound, $colon);
708 print $out "};\n\n";
710 print $out "#endif /* #ifndef UNICODE_NAMESLIST_H */\n";
712 close ($out);
714 print " done.\n";
717 #------------------------#
719 sub process_blocks_txt ($)
721 my ($blocks_txt) = @_;
723 open (my $blocks, $blocks_txt) or die;
724 open (my $out, "> UnicodeBlocks.h") or die;
726 print "processing $blocks_txt...";
728 print $out "/* UnicodeBlocks.h */\n";
729 print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";
730 print $out "/* Generated by $0 */\n";
731 print $out "/* Generated from UCD version $v */\n\n";
733 print $out "#ifndef __UNICODE_BLOCKS_H__\n";
734 print $out "#define __UNICODE_BLOCKS_H__\n\n";
736 print $out "#include <QtCore/qglobal.h>\n";
737 print $out "#include <klocalizedstring.h>\n\n";
739 print $out "struct TUnicodeBlock\n";
740 print $out "{\n";
741 print $out " quint32 start,\n";
742 print $out " end;\n";
743 print $out " const char *blockName;\n";
744 print $out "};\n\n";
745 print $out "static const struct TUnicodeBlock constUnicodeBlocks[] =\n";
746 print $out "{\n";
747 while (my $line = <$blocks>)
749 $line =~ /^([0-9A-F]+)\.\.([0-9A-F]+); (.+)$/ or next;
750 print $out qq/ { 0x$1, 0x$2, I18N_NOOP("$3") },\n/;
752 print $out " { 0x0, 0x0, NULL }\n";
753 print $out "};\n\n";
755 print $out "#endif\n\n";
757 close ($blocks);
758 close ($out);
760 print " done.\n";
763 #------------------------#
765 sub process_scripts_txt ($)
767 my ($scripts_txt) = @_;
769 my %script_hash;
770 my %scripts;
772 open (my $scripts, $scripts_txt) or die;
773 open (my $out, "> UnicodeScripts.h") or die;
775 print "processing $scripts_txt...";
777 while (my $line = <$scripts>)
779 my ($start, $end, $raw_script);
781 if ($line =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s+;\s+(\S+)/)
783 $start = hex ($1);
784 $end = hex ($2);
785 $raw_script = $3;
787 elsif ($line =~ /^([0-9A-F]+)\s+;\s+(\S+)/)
789 $start = hex ($1);
790 $end = $start;
791 $raw_script = $2;
793 else
795 next;
798 my $script = $raw_script;
799 $script =~ tr/_/ /;
800 $script =~ s/(\w+)/\u\L$1/g;
802 $script_hash{$start} = { 'end' => $end, 'script' => $script };
803 $scripts{$script} = 1;
806 close ($scripts);
808 # Adds Common to make sure works with UCD <= 4.0.0
809 $scripts{"Common"} = 1;
811 print $out "/* UnicodeScripts.h */\n";
812 print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";
813 print $out "/* Generated by $0 */\n";
814 print $out "/* Generated from UCD version $v */\n\n";
816 print $out "#ifndef __UNICODE_SCRIPTS_H__\n";
817 print $out "#define __UNICODE_SCRIPTS_H__\n\n";
819 print $out "#include <QtCore/qglobal.h>\n";
820 print $out "#include <klocalizedstring.h>\n\n";
822 print $out "static const char *constUnicodeScriptList[] =\n";
823 print $out "{\n";
824 my $i = 0;
825 for my $script (sort keys %scripts)
827 $scripts{$script} = $i;
828 print $out qq/ I18N_NOOP("$script"),\n/;
829 $i++;
831 print $out " NULL\n";
832 print $out "};\n\n";
834 print $out "struct TUnicodeScript\n";
835 print $out "{\n";
836 print $out " quint32 start,\n";
837 print $out " end;\n";
838 print $out " int scriptIndex; /* index into constUnicodeScriptList */\n";
839 print $out "};\n\n";
840 print $out "static const TUnicodeScript constUnicodeScripts[] =\n";
841 print $out "{\n";
842 for my $start (sort { $a <=> $b } keys %script_hash)
844 printf $out (qq/ { 0x%04X, 0x%04X, \%2d },\n/,
845 $start, $script_hash{$start}->{'end'}, $scripts{$script_hash{$start}->{'script'}});
847 printf $out " { 0x0, 0x0, -1 }\n";
848 print $out "};\n\n";
850 print $out "#endif\n\n";
852 close ($out);
853 print " done.\n";