3 # This Source Code Form is subject to the terms of the Mozilla Public
4 # License, v. 2.0. If a copy of the MPL was not distributed with this
5 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 # This tool is used to prepare lookup tables of Unicode character properties
8 # needed by gfx code to support text shaping operations. The properties are
9 # read from the Unicode Character Database and compiled into multi-level arrays
10 # for efficient lookup.
12 # To regenerate the tables in nsUnicodePropertyData.cpp:
14 # (1) Download the current Unicode data files from
16 # http://www.unicode.org/Public/UNIDATA/
18 # NB: not all the files are actually needed; currently, we require
21 # - EastAsianWidth.txt
23 # - HangulSyllableType.txt
24 # - ReadMe.txt (to record version/date of the UCD)
25 # - Unihan_Variants.txt (from Unihan.zip)
26 # though this may change if we find a need for additional properties.
28 # The Unicode data files listed above should be together in one directory.
30 # We also require the file
31 # http://www.unicode.org/Public/security/latest/xidmodifications.txt
32 # This file should be in a sub-directory "security" immediately below the
33 # directory containing the other Unicode data files.
35 # We also require the latest data file for UTR50, currently revision-13:
36 # http://www.unicode.org/Public/vertical/revision-13/VerticalOrientation-13.txt
37 # This file should be in a sub-directory "vertical" immediately below the
38 # directory containing the other Unicode data files.
41 # (2) Run this tool using a command line of the form
43 # perl genUnicodePropertyData.pl \
44 # /path/to/harfbuzz/src \
45 # /path/to/UCD-directory
47 # This will generate (or overwrite!) the files
49 # nsUnicodePropertyData.cpp
50 # nsUnicodeScriptCodes.h
52 # in the current directory.
55 use List
::Util
qw(first);
59 # Run this tool using a command line of the form
61 # perl genUnicodePropertyData.pl \\
62 # /path/to/harfbuzz/src \\
63 # /path/to/UCD-directory
65 # where harfbuzz/src is the directory containing harfbuzz .cc and .hh files,
66 # and UCD-directory is a directory containing the current Unicode Character
67 # Database files (UnicodeData.txt, etc), available from
68 # http://www.unicode.org/Public/UNIDATA/, with additional resources as
69 # detailed in the source comments.
71 # This will generate (or overwrite!) the files
73 # nsUnicodePropertyData.cpp
74 # nsUnicodeScriptCodes.h
76 # in the current directory.
81 # load HB_Script and HB_Category constants
83 # NOTE that HB_SCRIPT_* constants are now "tag" values, NOT sequentially-allocated
84 # script codes as used by Glib/Pango/etc.
85 # We therefore define a set of MOZ_SCRIPT_* constants that are script _codes_
86 # compatible with those libraries, and map these to HB_SCRIPT_* _tags_ as needed.
88 # CHECK that this matches Pango source (as found for example at
89 # http://git.gnome.org/browse/pango/tree/pango/pango-script.h)
90 # for as many codes as that defines (currently up through Unicode 5.1)
91 # and the GLib enumeration
92 # http://developer.gnome.org/glib/2.30/glib-Unicode-Manipulation.html#GUnicodeScript
93 # (currently defined up through Unicode 6.0).
94 # Constants beyond these may be regarded as unstable for now, but we don't actually
95 # depend on the specific values.
138 CANADIAN_ABORIGINAL
=> 40,
144 # unicode 4.0 additions
153 # unicode 4.1 additions
161 # unicode 5.0 additions
168 # unicode 5.1 additions
180 # unicode 5.2 additions
183 EGYPTIAN_HIEROGLYPHS
=> 80,
184 IMPERIAL_ARAMAIC
=> 81,
185 INSCRIPTIONAL_PAHLAVI
=> 82,
186 INSCRIPTIONAL_PARTHIAN
=> 83,
191 OLD_SOUTH_ARABIAN
=> 88,
196 # unicode 6.0 additions
200 # unicode 6.1 additions
202 MEROITIC_CURSIVE
=> 97,
203 MEROITIC_HIEROGLYPHS
=> 98,
208 # unicode 7.0 additions
210 CAUCASIAN_ALBANIAN
=> 104,
219 MENDE_KIKAKUI
=> 113,
223 OLD_NORTH_ARABIAN
=> 117,
228 PSALTER_PAHLAVI
=> 122,
233 # additional "script" code, not from Unicode (but matches ISO 15924's Zmth tag)
234 MATHEMATICAL_NOTATION
=> 126,
241 my @scriptCodeToName;
243 sub readHarfBuzzHeader
246 open FH
, "< $ARGV[0]/$file" or die "can't open harfbuzz header $ARGV[0]/$file\n";
248 s/CANADIAN_SYLLABICS/CANADIAN_ABORIGINAL/; # harfbuzz and unicode disagree on this name :(
249 if (m/HB_SCRIPT_([A-Z_]+)\s*=\s*HB_TAG\s*\(('.','.','.','.')\)\s*,/) {
250 unless (exists $scriptCode{$1}) {
251 warn "unknown script name $1 found in $file\n";
254 $sc = $scriptCode{$1};
255 $scriptCodeToTag[$sc] = $2;
256 $scriptCodeToName[$sc] = $1;
258 if (m/HB_UNICODE_GENERAL_CATEGORY_([A-Z_]+)/) {
266 &readHarfBuzzHeader
("hb-common.h");
267 &readHarfBuzzHeader
("hb-unicode.h");
269 die "didn't find HarfBuzz script codes\n" if $sc == -1;
270 die "didn't find HarfBuzz category codes\n" if $cc == -1;
272 # Additional code not present in HarfBuzz headers:
273 $sc = $scriptCode{"MATHEMATICAL_NOTATION"};
274 $scriptCodeToTag[$sc] = "'Z','m','t','h'";
275 $scriptCodeToName[$sc] = "MATHEMATICAL_NOTATION";
280 'default-ignorable' => 2,
290 my %bidicategoryCode = (
291 "L" => "0", # Left-to-Right
292 "R" => "1", # Right-to-Left
293 "EN" => "2", # European Number
294 "ES" => "3", # European Number Separator
295 "ET" => "4", # European Number Terminator
296 "AN" => "5", # Arabic Number
297 "CS" => "6", # Common Number Separator
298 "B" => "7", # Paragraph Separator
299 "S" => "8", # Segment Separator
300 "WS" => "9", # Whitespace
301 "ON" => "10", # Other Neutrals
302 "LRE" => "11", # Left-to-Right Embedding
303 "LRO" => "12", # Left-to-Right Override
304 "AL" => "13", # Right-to-Left Arabic
305 "RLE" => "14", # Right-to-Left Embedding
306 "RLO" => "15", # Right-to-Left Override
307 "PDF" => "16", # Pop Directional Format
308 "NSM" => "17", # Non-Spacing Mark
309 "BN" => "18" # Boundary Neutral
312 my %verticalOrientationCode = (
313 'U' => 0, # U - Upright, the same orientation as in the code charts
314 'R' => 1, # R - Rotated 90 degrees clockwise compared to the code charts
315 'Tu' => 2, # Tu - Transformed typographically, with fallback to Upright
316 'Tr' => 3 # Tr - Transformed typographically, with fallback to Rotated
319 # initialize default properties
332 my @verticalOrientation;
333 for (my $i = 0; $i < 0x110000; ++$i) {
334 $script[$i] = $scriptCode{"UNKNOWN"};
335 $category[$i] = $catCode{"UNASSIGNED"};
338 $xidmod[$i] = $xidmodCode{"not-chars"};
339 $numericvalue[$i] = -1;
341 $bidicategory[$i] = $bidicategoryCode{"L"};
343 $verticalOrientation[$i] = 1; # default for unlisted codepoints is 'R'
346 # blocks where the default for bidi category is not L
347 for my $i (0x0600..0x07BF
, 0x08A0..0x08FF
, 0xFB50..0xFDCF
, 0xFDF0..0xFDFF
, 0xFE70..0xFEFF
, 0x1EE00..0x0001EEFF
) {
348 $bidicategory[$i] = $bidicategoryCode{"AL"};
350 for my $i (0x0590..0x05FF
, 0x07C0..0x089F
, 0xFB1D..0xFB4F
, 0x00010800..0x00010FFF
, 0x0001E800..0x0001EDFF
, 0x0001EF00..0x0001EFFF
) {
351 $bidicategory[$i] = $bidicategoryCode{"R"};
353 for my $i (0x20A0..0x20CF
) {
354 $bidicategory[$i] = $bidicategoryCode{"ET"};
360 'Cn' => 'UNASSIGNED',
361 'Co' => 'PRIVATE_USE',
363 'Ll' => 'LOWERCASE_LETTER',
364 'Lm' => 'MODIFIER_LETTER',
365 'Lo' => 'OTHER_LETTER',
366 'Lt' => 'TITLECASE_LETTER',
367 'Lu' => 'UPPERCASE_LETTER',
368 'Mc' => 'SPACING_MARK',
369 'Me' => 'ENCLOSING_MARK',
370 'Mn' => 'NON_SPACING_MARK',
371 'Nd' => 'DECIMAL_NUMBER',
372 'Nl' => 'LETTER_NUMBER',
373 'No' => 'OTHER_NUMBER',
374 'Pc' => 'CONNECT_PUNCTUATION',
375 'Pd' => 'DASH_PUNCTUATION',
376 'Pe' => 'CLOSE_PUNCTUATION',
377 'Pf' => 'FINAL_PUNCTUATION',
378 'Pi' => 'INITIAL_PUNCTUATION',
379 'Po' => 'OTHER_PUNCTUATION',
380 'Ps' => 'OPEN_PUNCTUATION',
381 'Sc' => 'CURRENCY_SYMBOL',
382 'Sk' => 'MODIFIER_SYMBOL',
383 'Sm' => 'MATH_SYMBOL',
384 'So' => 'OTHER_SYMBOL',
385 'Zl' => 'LINE_SEPARATOR',
386 'Zp' => 'PARAGRAPH_SEPARATOR',
387 'Zs' => 'SPACE_SEPARATOR'
392 open FH
, "< $ARGV[1]/ReadMe.txt" or die "can't open Unicode ReadMe.txt file\n";
395 push @versionInfo, $_;
399 my $kTitleToUpper = 0x80000000;
400 my $kUpperToLower = 0x40000000;
401 my $kLowerToTitle = 0x20000000;
402 my $kLowerToUpper = 0x10000000;
403 my $kCaseMapCharMask = 0x001fffff;
405 # read UnicodeData.txt
406 open FH
, "< $ARGV[1]/UnicodeData.txt" or die "can't open UCD file UnicodeData.txt\n";
409 my @fields = split /;/;
410 if ($fields[1] =~ /First/) {
411 my $first = hex "0x$fields[0]";
414 if ($fields[1] =~ /Last/) {
415 my $last = hex "0x$fields[0]";
417 $category[$first] = $catCode{$ucd2hb{$fields[2]}};
418 $combining[$first] = $fields[3];
419 $bidicategory[$first] = $bidicategoryCode{$fields[4]};
420 unless (length($fields[7]) == 0) {
421 $numericvalue[$first] = $fields[7];
423 if ($fields[1] =~ /CJK/) {
424 @hanVariant[$first] = 3;
427 } while ($first <= $last);
429 die "didn't find Last code for range!\n";
432 my $usv = hex "0x$fields[0]";
433 $category[$usv] = $catCode{$ucd2hb{$fields[2]}};
434 $combining[$usv] = $fields[3];
435 my $upper = hex $fields[12];
436 my $lower = hex $fields[13];
437 my $title = hex $fields[14];
438 # we only store one mapping for each character,
439 # but also record what kind of mapping it is
440 if ($upper && $lower) {
441 $casemap[$usv] |= $kTitleToUpper;
442 $casemap[$usv] |= ($usv ^ $upper);
445 $casemap[$usv] |= $kUpperToLower;
446 $casemap[$usv] |= ($usv ^ $lower);
448 elsif ($title && ($title != $upper)) {
449 $casemap[$usv] |= $kLowerToTitle;
450 $casemap[$usv] |= ($usv ^ $title);
453 $casemap[$usv] |= $kLowerToUpper;
454 $casemap[$usv] |= ($usv ^ $upper);
456 $bidicategory[$usv] = $bidicategoryCode{$fields[4]};
457 unless (length($fields[7]) == 0) {
458 $numericvalue[$usv] = $fields[7];
460 if ($fields[1] =~ /CJK/) {
461 @hanVariant[$usv] = 3;
463 if ($fields[5] =~ /^<narrow>/) {
464 my $wideChar = hex(substr($fields[5], 9));
465 die "didn't expect supplementary-plane values here" if $usv > 0xffff || $wideChar > 0xffff;
466 $fullWidth[$usv] = $wideChar;
468 elsif ($fields[5] =~ /^<wide>/) {
469 my $narrowChar = hex(substr($fields[5], 7));
470 die "didn't expect supplementary-plane values here" if $usv > 0xffff || $narrowChar > 0xffff;
471 $fullWidth[$narrowChar] = $usv;
478 open FH
, "< $ARGV[1]/Scripts.txt" or die "can't open UCD file Scripts.txt\n";
479 push @versionInfo, "";
482 push @versionInfo, $_;
486 if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s+;\s+([^ ]+)/) {
488 warn "unknown script $script" unless exists $scriptCode{$script};
489 $script = $scriptCode{$script};
490 my $start = hex "0x$1";
491 my $end = (defined $2) ?
hex "0x$2" : $start;
492 for (my $i = $start; $i <= $end; ++$i) {
493 $script[$i] = $script;
499 # read EastAsianWidth.txt
501 'A' => 0, # ; Ambiguous
502 'F' => 1, # ; Fullwidth
503 'H' => 2, # ; Halfwidth
504 'N' => 3, # ; Neutral
508 open FH
, "< $ARGV[1]/EastAsianWidth.txt" or die "can't open UCD file EastAsianWidth.txt\n";
509 push @versionInfo, "";
512 push @versionInfo, $_;
517 if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s*;\s*([^ ]+)/) {
519 warn "unknown EAW code $eaw" unless exists $eawCode{$eaw};
520 $eaw = $eawCode{$eaw};
521 my $start = hex "0x$1";
522 my $end = (defined $2) ?
hex "0x$2" : $start;
523 for (my $i = $start; $i <= $end; ++$i) {
530 # read BidiMirroring.txt
534 open FH
, "< $ARGV[1]/BidiMirroring.txt" or die "can't open UCD file BidiMirroring.txt\n";
535 push @versionInfo, "";
538 push @versionInfo, $_;
543 if (m/([0-9A-F]{4,6});\s*([0-9A-F]{4,6})/) {
544 my $mirrorOffset = hex("0x$2") - hex("0x$1");
545 my $offsetIndex = first
{ $offsets[$_] eq $mirrorOffset } 0..$#offsets;
546 if ($offsetIndex == undef) {
547 die "too many offset codes\n" if scalar @offsets == 31;
548 push @offsets, $mirrorOffset;
549 $offsetIndex = $#offsets;
551 $mirror[hex "0x$1"] = $offsetIndex;
556 # read HangulSyllableType.txt
564 open FH
, "< $ARGV[1]/HangulSyllableType.txt" or die "can't open UCD file HangulSyllableType.txt\n";
565 push @versionInfo, "";
568 push @versionInfo, $_;
573 if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s*;\s*([^ ]+)/) {
575 warn "unknown Hangul syllable type" unless exists $hangulType{$hangul};
576 $hangul = $hangulType{$hangul};
577 my $start = hex "0x$1";
578 my $end = (defined $2) ?
hex "0x$2" : $start;
579 for (my $i = $start; $i <= $end; ++$i) {
580 $hangul[$i] = $hangul;
586 # read xidmodifications.txt
587 open FH
, "< $ARGV[1]/security/xidmodifications.txt" or die "can't open UCD file xidmodifications.txt\n";
588 push @versionInfo, "";
591 unless (/\xef\xbb\xbf/) {
592 push @versionInfo, $_;
594 last if /Generated:/;
597 if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s+;\s+[^ ]+\s+;\s+([^ ]+)/) {
599 warn "unknown Identifier Modification $xidmod" unless exists $xidmodCode{$xidmod};
600 $xidmod = $xidmodCode{$xidmod};
601 my $start = hex "0x$1";
602 my $end = (defined $2) ?
hex "0x$2" : $start;
603 for (my $i = $start; $i <= $end; ++$i) {
604 $xidmod[$i] = $xidmod;
609 # special case U+30FB KATAKANA MIDDLE DOT -- see bug 857490
612 open FH
, "< $ARGV[1]/Unihan_Variants.txt" or die "can't open UCD file Unihan_Variants.txt (from Unihan.zip)\n";
613 push @versionInfo, "";
616 push @versionInfo, $_;
624 if (m/U\+([0-9A-F]{4,6})\s+k([^ ]+)Variant/) {
625 my $usv = hex "0x$1";
626 if ($usv != $savedusv) {
627 unless ($savedusv == 0) {
628 if ($hasTC && !$hasSC) {
629 $hanVariant[$savedusv] = 1;
630 } elsif (!$hasTC && $hasSC) {
631 $hanVariant[$savedusv] = 2;
638 if ($2 eq "Traditional") {
641 if ($2 eq "Simplified") {
648 # read VerticalOrientation-13.txt
649 open FH
, "< $ARGV[1]/vertical/VerticalOrientation-13.txt" or die "can't open UTR50 data file VerticalOrientation-13.txt\n";
650 push @versionInfo, "";
653 push @versionInfo, $_;
659 if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s*;\s*([^ ]+)/) {
661 warn "unknown Vertical_Orientation code $vo"
662 unless exists $verticalOrientationCode{$vo};
663 $vo = $verticalOrientationCode{$vo};
664 my $start = hex "0x$1";
665 my $end = (defined $2) ?
hex "0x$2" : $start;
666 for (my $i = $start; $i <= $end; ++$i) {
667 $verticalOrientation[$i] = $vo;
673 my $timestamp = gmtime();
675 open DATA_TABLES
, "> nsUnicodePropertyData.cpp" or die "unable to open nsUnicodePropertyData.cpp for output";
677 my $licenseBlock = q
[
678 /* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
679 /* This Source Code Form is subject to the terms of the Mozilla Public
680 * License
, v
. 2.0. If a copy of the MPL was
not distributed with this
681 * file
, You can obtain one at http
://mozilla
.org
/MPL/2.0/. */
684 * Derived from the Unicode Character Database by genUnicodePropertyData
.pl
686 * For Unicode terms of
use, see http
://www
.unicode
.org
/terms_of_use
.html
690 my $versionInfo = join("\n", @versionInfo);
692 print DATA_TABLES
<<__END
;
695 * Created on
$timestamp from UCD data files with version info
:
701 * * * * * This file contains MACHINE
-GENERATED DATA
, do not edit
! * * * * *
705 #include "harfbuzz/hb.h"
709 open HEADER
, "> nsUnicodeScriptCodes.h" or die "unable to open nsUnicodeScriptCodes.h for output";
711 print HEADER
<<__END
;
714 * Created on
$timestamp from UCD data files with version info
:
720 * * * * * This file contains MACHINE
-GENERATED DATA
, do not edit
! * * * * *
723 #ifndef NS_UNICODE_SCRIPT_CODES
724 #define NS_UNICODE_SCRIPT_CODES
728 print DATA_TABLES
"static const uint32_t sScriptCodeToTag[] = {\n";
729 for (my $i = 0; $i < scalar @scriptCodeToTag; ++$i) {
730 printf DATA_TABLES
" HB_TAG(%s)", $scriptCodeToTag[$i];
731 print DATA_TABLES
$i < $#scriptCodeToTag ?
",\n" : "\n";
733 print DATA_TABLES
"};\n\n";
737 print DATA_TABLES
"static const int16_t sMirrorOffsets[] = {\n";
738 for (my $i = 0; $i < scalar @offsets; ++$i) {
739 printf DATA_TABLES
" $offsets[$i]";
740 print DATA_TABLES
$i < $#offsets ?
",\n" : "\n";
742 print DATA_TABLES
"};\n\n";
744 print HEADER
"#pragma pack(1)\n\n";
749 return sprintf("{%d,%d,%d}, ", $mirror[$usv], $hangul[$usv], $combining[$usv]);
752 struct nsCharProps1
{
753 unsigned char mMirrorOffsetIndex
:5;
754 unsigned char mHangulType
:3;
755 unsigned char mCombiningClass
:8;
758 &genTables
("CharProp1", $type, "nsCharProps1", 11, 5, \
&sprintCharProps1
, 1, 2, 1);
763 return sprintf("{%d,%d,%d,%d,%d,%d,%d},",
764 $script[$usv], $eaw[$usv], $category[$usv],
765 $bidicategory[$usv], $xidmod[$usv], $numericvalue[$usv],
766 $verticalOrientation[$usv]);
769 struct nsCharProps2
{
770 unsigned char mScriptCode
:8;
771 unsigned char mEAW
:3;
772 unsigned char mCategory
:5;
773 unsigned char mBidiCategory
:5;
774 unsigned char mXidmod
:4;
775 signed char mNumericValue
:5;
776 unsigned char mVertOrient
:2;
779 &genTables
("CharProp2", $type, "nsCharProps2", 11, 5, \
&sprintCharProps2
, 16, 4, 1);
781 print HEADER
"#pragma pack()\n\n";
783 sub sprintHanVariants
788 while ($varShift < 8) {
789 $val |= $hanVariant[$baseUsv++] << $varShift;
792 return sprintf("0x%02x,", $val);
794 &genTables
("HanVariant", "", "uint8_t", 9, 7, \
&sprintHanVariants
, 2, 1, 4);
799 return sprintf("0x%04x,", $fullWidth[$usv]);
801 &genTables
("FullWidth", "", "uint16_t", 10, 6, \
&sprintFullWidth
, 0, 2, 1);
806 return sprintf("0x%08x,", $casemap[$usv]);
808 &genTables
("CaseMap", "", "uint32_t", 11, 5, \
&sprintCasemap
, 1, 4, 1);
810 print STDERR
"Total data = $totalData\n";
812 printf DATA_TABLES
"const uint32_t kTitleToUpper = 0x%08x;\n", $kTitleToUpper;
813 printf DATA_TABLES
"const uint32_t kUpperToLower = 0x%08x;\n", $kUpperToLower;
814 printf DATA_TABLES
"const uint32_t kLowerToTitle = 0x%08x;\n", $kLowerToTitle;
815 printf DATA_TABLES
"const uint32_t kLowerToUpper = 0x%08x;\n", $kLowerToUpper;
816 printf DATA_TABLES
"const uint32_t kCaseMapCharMask = 0x%08x;\n\n", $kCaseMapCharMask;
820 my ($prefix, $typedef, $type, $indexBits, $charBits, $func, $maxPlane, $bytesPerEntry, $charsPerEntry) = @_;
822 print DATA_TABLES
"#define k${prefix}MaxPlane $maxPlane\n";
823 print DATA_TABLES
"#define k${prefix}IndexBits $indexBits\n";
824 print DATA_TABLES
"#define k${prefix}CharBits $charBits\n";
826 my $indexLen = 1 << $indexBits;
827 my $charsPerPage = 1 << $charBits;
829 my %pageMapIndex = ();
833 my $planeMap = "\x00" x
$maxPlane;
834 foreach my $plane (0 .. $maxPlane) {
835 my $pageMap = "\x00" x
$indexLen * 2;
836 foreach my $page (0 .. $indexLen - 1) {
838 for (my $ch = 0; $ch < $charsPerPage; $ch += $charsPerEntry) {
839 my $usv = $plane * 0x10000 + $page * $charsPerPage + $ch;
840 $charValues .= &$func($usv);
844 unless (exists $charIndex{$charValues}) {
845 $charIndex{$charValues} = scalar keys %charIndex;
846 $char[$charIndex{$charValues}] = $charValues;
848 substr($pageMap, $page * 2, 2) = pack('S', $charIndex{$charValues});
851 unless (exists $pageMapIndex{$pageMap}) {
852 $pageMapIndex{$pageMap} = scalar keys %pageMapIndex;
853 $pageMap[$pageMapIndex{$pageMap}] = $pageMap;
856 substr($planeMap, $plane - 1, 1) = pack('C', $pageMapIndex{$pageMap});
861 print DATA_TABLES
"static const uint8_t s${prefix}Planes[$maxPlane] = {";
862 print DATA_TABLES
join(',', map { sprintf("%d", $_) } unpack('C*', $planeMap));
863 print DATA_TABLES
"};\n\n";
866 my $chCount = scalar @char;
867 my $pmBits = $chCount > 255 ?
16 : 8;
868 my $pmCount = scalar @pageMap;
869 if ($maxPlane == 0) {
870 die "there should only be one pageMap entry!" if $pmCount > 1;
871 print DATA_TABLES
"static const uint${pmBits}_t s${prefix}Pages[$indexLen] = {\n";
873 print DATA_TABLES
"static const uint${pmBits}_t s${prefix}Pages[$pmCount][$indexLen] = {\n";
875 for (my $i = 0; $i < scalar @pageMap; ++$i) {
876 print DATA_TABLES
$maxPlane > 0 ?
" {" : " ";
877 print DATA_TABLES
join(',', map { sprintf("%d", $_) } unpack('S*', $pageMap[$i]));
878 print DATA_TABLES
$maxPlane > 0 ?
($i < $#pageMap ?
"},\n" : "}\n") : "\n";
880 print DATA_TABLES
"};\n\n";
882 print HEADER
"$typedef\n\n" if $typedef ne '';
884 my $pageLen = $charsPerPage / $charsPerEntry;
885 print DATA_TABLES
"static const $type s${prefix}Values[$chCount][$pageLen] = {\n";
886 for (my $i = 0; $i < scalar @char; ++$i) {
887 print DATA_TABLES
" {";
888 print DATA_TABLES
$char[$i];
889 print DATA_TABLES
$i < $#char ?
"},\n" : "}\n";
891 print DATA_TABLES
"};\n\n";
893 my $dataSize = $pmCount * $indexLen * $pmBits/8 +
894 $chCount * $pageLen * $bytesPerEntry +
896 $totalData += $dataSize;
898 print STDERR
"Data for $prefix = $dataSize\n";
901 print DATA_TABLES
<<__END
;
903 * * * * * This file contains MACHINE
-GENERATED DATA
, do not edit
! * * * * *
909 print HEADER
"enum {\n";
910 for (my $i = 0; $i < scalar @scriptCodeToName; ++$i) {
911 print HEADER
" MOZ_SCRIPT_", $scriptCodeToName[$i], " = ", $i, ",\n";
913 print HEADER
"\n MOZ_NUM_SCRIPT_CODES = ", scalar @scriptCodeToName, ",\n";
914 print HEADER
"\n MOZ_SCRIPT_INVALID = -1\n";
915 print HEADER
"};\n\n";
917 print HEADER
<<__END
;
920 * * * * * This file contains MACHINE
-GENERATED DATA
, do not edit
! * * * * *