Version 6.4.0.0.beta1, tag libreoffice-6.4.0.0.beta1
[LibreOffice.git] / vcl / source / gdi / genVerticalOrientationData.pl
blob328727b269b339a88a7f494a1f7e593021f5f711
1 #!/usr/bin/env perl
3 # This Source Code Form is subject to the terms of the Mozilla Public
4 # License, v. 2.0. If a copy of the MPL was not distributed with this
5 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 # This tool is used to prepare lookup tables of Unicode character properties.
8 # The properties are read from the Unicode Character Database and compiled into
9 # multi-level arrays for efficient lookup.
11 # To regenerate the tables in VerticalOrientationData.cxx:
13 # (1) Download the current Unicode data files from
15 # We require the latest data file for UTR50, currently revision-17:
16 # http://www.unicode.org/Public/vertical/revision-17/VerticalOrientation-17.txt
19 # (2) Run this tool using a command line of the form
21 # perl genVerticalOrientationData.pl \
22 # /path/to/VerticalOrientation-17.txt
24 # This will generate (or overwrite!) the files
26 # VerticalOrientationData.cxx
28 # in the current directory.
30 use strict;
31 use List::Util qw(first);
33 my $DATA_FILE = $ARGV[0];
35 my %verticalOrientationCode = (
36 'U' => 0, # U - Upright, the same orientation as in the code charts
37 'R' => 1, # R - Rotated 90 degrees clockwise compared to the code charts
38 'Tu' => 2, # Tu - Transformed typographically, with fallback to Upright
39 'Tr' => 3 # Tr - Transformed typographically, with fallback to Rotated
42 my @verticalOrientation;
43 for (my $i = 0; $i < 0x110000; ++$i) {
44 $verticalOrientation[$i] = 1; # default for unlisted codepoints is 'R'
47 # read VerticalOrientation-17.txt
48 my @versionInfo;
49 open FH, "< $DATA_FILE" or die "can't open UTR50 data file VerticalOrientation-17.txt\n";
50 push @versionInfo, "";
51 while (<FH>) {
52 chomp;
53 push @versionInfo, $_;
54 last if /Date:/;
56 while (<FH>) {
57 chomp;
58 s/#.*//;
59 if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s*;\s*([^ ]+)/) {
60 my $vo = $3;
61 warn "unknown Vertical_Orientation code $vo"
62 unless exists $verticalOrientationCode{$vo};
63 $vo = $verticalOrientationCode{$vo};
64 my $start = hex "0x$1";
65 my $end = (defined $2) ? hex "0x$2" : $start;
66 for (my $i = $start; $i <= $end; ++$i) {
67 $verticalOrientation[$i] = $vo;
71 close FH;
73 my $timestamp = gmtime();
75 open DATA_TABLES, "> VerticalOrientationData.cxx" or die "unable to open VerticalOrientationData.cxx for output";
77 my $licenseBlock = q[
79 * This file is part of the LibreOffice project.
81 * This Source Code Form is subject to the terms of the Mozilla Public
82 * License, v. 2.0. If a copy of the MPL was not distributed with this
83 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
87 * Derived from the Unicode Character Database by genVerticalOrientationData.pl
89 * For Unicode terms of use, see http://www.unicode.org/terms_of_use.html
93 my $versionInfo = join("\n", @versionInfo);
95 print DATA_TABLES <<__END;
96 $licenseBlock
98 * Created on $timestamp from UCD data files with version info:
101 $versionInfo
104 * * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * *
107 __END
109 our $totalData = 0;
111 sub sprintVerticalOrientation
113 my $usv = shift;
114 return sprintf("%d,",
115 $verticalOrientation[$usv]);
118 &genTables("VerticalOrientation", "uint8_t", 9, 7, \&sprintVerticalOrientation, 16, 1, 1);
120 sub genTables
122 my ($prefix, $type, $indexBits, $charBits, $func, $maxPlane, $bytesPerEntry, $charsPerEntry) = @_;
124 print DATA_TABLES "#define k${prefix}MaxPlane $maxPlane\n";
125 print DATA_TABLES "#define k${prefix}IndexBits $indexBits\n";
126 print DATA_TABLES "#define k${prefix}CharBits $charBits\n";
128 my $indexLen = 1 << $indexBits;
129 my $charsPerPage = 1 << $charBits;
130 my %charIndex = ();
131 my %pageMapIndex = ();
132 my @pageMap = ();
133 my @char = ();
135 my $planeMap = "\x00" x $maxPlane;
136 foreach my $plane (0 .. $maxPlane) {
137 my $pageMap = "\x00" x $indexLen * 2;
138 foreach my $page (0 .. $indexLen - 1) {
139 my $charValues = "";
140 for (my $ch = 0; $ch < $charsPerPage; $ch += $charsPerEntry) {
141 my $usv = $plane * 0x10000 + $page * $charsPerPage + $ch;
142 $charValues .= &$func($usv);
144 chop $charValues;
146 unless (exists $charIndex{$charValues}) {
147 $charIndex{$charValues} = scalar keys %charIndex;
148 $char[$charIndex{$charValues}] = $charValues;
150 substr($pageMap, $page * 2, 2) = pack('S', $charIndex{$charValues});
153 unless (exists $pageMapIndex{$pageMap}) {
154 $pageMapIndex{$pageMap} = scalar keys %pageMapIndex;
155 $pageMap[$pageMapIndex{$pageMap}] = $pageMap;
157 if ($plane > 0) {
158 substr($planeMap, $plane - 1, 1) = pack('C', $pageMapIndex{$pageMap});
162 if ($maxPlane) {
163 print DATA_TABLES "static const uint8_t s${prefix}Planes[$maxPlane] = {";
164 print DATA_TABLES join(',', map { sprintf("%d", $_) } unpack('C*', $planeMap));
165 print DATA_TABLES "};\n\n";
168 my $chCount = scalar @char;
169 my $pmBits = $chCount > 255 ? 16 : 8;
170 my $pmCount = scalar @pageMap;
171 if ($maxPlane == 0) {
172 die "there should only be one pageMap entry!" if $pmCount > 1;
173 print DATA_TABLES "static const uint${pmBits}_t s${prefix}Pages[$indexLen] = {\n";
174 } else {
175 print DATA_TABLES "static const uint${pmBits}_t s${prefix}Pages[$pmCount][$indexLen] = {\n";
177 for (my $i = 0; $i < scalar @pageMap; ++$i) {
178 print DATA_TABLES $maxPlane > 0 ? " {" : " ";
179 print DATA_TABLES join(',', map { sprintf("%d", $_) } unpack('S*', $pageMap[$i]));
180 print DATA_TABLES $maxPlane > 0 ? ($i < $#pageMap ? "},\n" : "}\n") : "\n";
182 print DATA_TABLES "};\n\n";
184 my $pageLen = $charsPerPage / $charsPerEntry;
185 print DATA_TABLES "static const $type s${prefix}Values[$chCount][$pageLen] = {\n";
186 for (my $i = 0; $i < scalar @char; ++$i) {
187 print DATA_TABLES " {";
188 print DATA_TABLES $char[$i];
189 print DATA_TABLES $i < $#char ? "},\n" : "}\n";
191 print DATA_TABLES "};\n";
193 my $dataSize = $pmCount * $indexLen * $pmBits/8 +
194 $chCount * $pageLen * $bytesPerEntry +
195 $maxPlane;
196 $totalData += $dataSize;
198 print STDERR "Data for $prefix = $dataSize\n";
200 print DATA_TABLES <<__END;
202 * * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * *
204 __END
206 close DATA_TABLES;