3 # This Source Code Form is subject to the terms of the Mozilla Public
4 # License, v. 2.0. If a copy of the MPL was not distributed with this
5 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 # This tool is used to prepare lookup tables of Unicode character properties.
8 # The properties are read from the Unicode Character Database and compiled into
9 # multi-level arrays for efficient lookup.
11 # To regenerate the tables in VerticalOrientationData.cxx:
13 # (1) Download the current Unicode data files from
15 # We require the latest data file for UTR50, currently revision-17:
16 # http://www.unicode.org/Public/vertical/revision-17/VerticalOrientation-17.txt
19 # (2) Run this tool using a command line of the form
21 # perl genVerticalOrientationData.pl \
22 # /path/to/VerticalOrientation-17.txt
24 # This will generate (or overwrite!) the files
26 # VerticalOrientationData.cxx
28 # in the current directory.
31 use List
::Util
qw(first);
33 my $DATA_FILE = $ARGV[0];
35 my %verticalOrientationCode = (
36 'U' => 0, # U - Upright, the same orientation as in the code charts
37 'R' => 1, # R - Rotated 90 degrees clockwise compared to the code charts
38 'Tu' => 2, # Tu - Transformed typographically, with fallback to Upright
39 'Tr' => 3 # Tr - Transformed typographically, with fallback to Rotated
42 my @verticalOrientation;
43 for (my $i = 0; $i < 0x110000; ++$i) {
44 $verticalOrientation[$i] = 1; # default for unlisted codepoints is 'R'
47 # read VerticalOrientation-17.txt
49 open FH
, "< $DATA_FILE" or die "can't open UTR50 data file VerticalOrientation-17.txt\n";
50 push @versionInfo, "";
53 push @versionInfo, $_;
59 if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s*;\s*([^ ]+)/) {
61 warn "unknown Vertical_Orientation code $vo"
62 unless exists $verticalOrientationCode{$vo};
63 $vo = $verticalOrientationCode{$vo};
64 my $start = hex "0x$1";
65 my $end = (defined $2) ?
hex "0x$2" : $start;
66 for (my $i = $start; $i <= $end; ++$i) {
67 $verticalOrientation[$i] = $vo;
73 my $timestamp = gmtime();
75 open DATA_TABLES
, "> VerticalOrientationData.cxx" or die "unable to open VerticalOrientationData.cxx for output";
79 * This file is part of the LibreOffice project
.
81 * This Source Code Form is subject to the terms of the Mozilla Public
82 * License
, v
. 2.0. If a copy of the MPL was
not distributed with this
83 * file
, You can obtain one at http
://mozilla
.org
/MPL/2.0/.
87 * Derived from the Unicode Character Database by genVerticalOrientationData
.pl
89 * For Unicode terms of
use, see http
://www
.unicode
.org
/terms_of_use
.html
93 my $versionInfo = join("\n", @versionInfo);
95 print DATA_TABLES
<<__END
;
98 * Created on
$timestamp from UCD data files with version info
:
104 * * * * * This file contains MACHINE
-GENERATED DATA
, do not edit
! * * * * *
111 sub sprintVerticalOrientation
114 return sprintf("%d,",
115 $verticalOrientation[$usv]);
118 &genTables
("VerticalOrientation", "uint8_t", 9, 7, \
&sprintVerticalOrientation
, 16, 1, 1);
122 my ($prefix, $type, $indexBits, $charBits, $func, $maxPlane, $bytesPerEntry, $charsPerEntry) = @_;
124 print DATA_TABLES
"#define k${prefix}MaxPlane $maxPlane\n";
125 print DATA_TABLES
"#define k${prefix}IndexBits $indexBits\n";
126 print DATA_TABLES
"#define k${prefix}CharBits $charBits\n";
128 my $indexLen = 1 << $indexBits;
129 my $charsPerPage = 1 << $charBits;
131 my %pageMapIndex = ();
135 my $planeMap = "\x00" x
$maxPlane;
136 foreach my $plane (0 .. $maxPlane) {
137 my $pageMap = "\x00" x
$indexLen * 2;
138 foreach my $page (0 .. $indexLen - 1) {
140 for (my $ch = 0; $ch < $charsPerPage; $ch += $charsPerEntry) {
141 my $usv = $plane * 0x10000 + $page * $charsPerPage + $ch;
142 $charValues .= &$func($usv);
146 unless (exists $charIndex{$charValues}) {
147 $charIndex{$charValues} = scalar keys %charIndex;
148 $char[$charIndex{$charValues}] = $charValues;
150 substr($pageMap, $page * 2, 2) = pack('S', $charIndex{$charValues});
153 unless (exists $pageMapIndex{$pageMap}) {
154 $pageMapIndex{$pageMap} = scalar keys %pageMapIndex;
155 $pageMap[$pageMapIndex{$pageMap}] = $pageMap;
158 substr($planeMap, $plane - 1, 1) = pack('C', $pageMapIndex{$pageMap});
163 print DATA_TABLES
"static const uint8_t s${prefix}Planes[$maxPlane] = {";
164 print DATA_TABLES
join(',', map { sprintf("%d", $_) } unpack('C*', $planeMap));
165 print DATA_TABLES
"};\n\n";
168 my $chCount = scalar @char;
169 my $pmBits = $chCount > 255 ?
16 : 8;
170 my $pmCount = scalar @pageMap;
171 if ($maxPlane == 0) {
172 die "there should only be one pageMap entry!" if $pmCount > 1;
173 print DATA_TABLES
"static const uint${pmBits}_t s${prefix}Pages[$indexLen] = {\n";
175 print DATA_TABLES
"static const uint${pmBits}_t s${prefix}Pages[$pmCount][$indexLen] = {\n";
177 for (my $i = 0; $i < scalar @pageMap; ++$i) {
178 print DATA_TABLES
$maxPlane > 0 ?
" {" : " ";
179 print DATA_TABLES
join(',', map { sprintf("%d", $_) } unpack('S*', $pageMap[$i]));
180 print DATA_TABLES
$maxPlane > 0 ?
($i < $#pageMap ?
"},\n" : "}\n") : "\n";
182 print DATA_TABLES
"};\n\n";
184 my $pageLen = $charsPerPage / $charsPerEntry;
185 print DATA_TABLES
"static const $type s${prefix}Values[$chCount][$pageLen] = {\n";
186 for (my $i = 0; $i < scalar @char; ++$i) {
187 print DATA_TABLES
" {";
188 print DATA_TABLES
$char[$i];
189 print DATA_TABLES
$i < $#char ?
"},\n" : "}\n";
191 print DATA_TABLES
"};\n";
193 my $dataSize = $pmCount * $indexLen * $pmBits/8 +
194 $chCount * $pageLen * $bytesPerEntry +
196 $totalData += $dataSize;
198 print STDERR
"Data for $prefix = $dataSize\n";
200 print DATA_TABLES
<<__END
;
202 * * * * * This file contains MACHINE
-GENERATED DATA
, do not edit
! * * * * *