Version 4.2.0.1, tag libreoffice-4.2.0.1
[LibreOffice.git] / sal / textenc / generate / big5hkscs2001.pl
blob1ba83a692dbae6997fc91d1f6334f430ef79d87d
1 #!/usr/bin/perl
3 # This file is part of the LibreOffice project.
5 # This Source Code Form is subject to the terms of the Mozilla Public
6 # License, v. 2.0. If a copy of the MPL was not distributed with this
7 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 # This file incorporates work covered by the following license notice:
11 # Licensed to the Apache Software Foundation (ASF) under one or more
12 # contributor license agreements. See the NOTICE file distributed
13 # with this work for additional information regarding copyright
14 # ownership. The ASF licenses this file to you under the Apache
15 # License, Version 2.0 (the "License"); you may not use this file
16 # except in compliance with the License. You may obtain a copy of
17 # the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 # The following files must be available in a ./input subdir:
22 # <http://www.info.gov.hk/digital21/eng/hkscs/download/big5-iso.txt>
24 # <http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT>:
25 # "Unicode version: 1.1 Table version: 0.0d3 Date: 11 February 1994"
26 # Only used to track Unicode characters that are mapped from both Big5 and
27 # HKSCS.
29 # <http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT>:
30 # "Unicode version: 2.0 Table version: 2.01 Date: 1/7/2000"
31 # Only used to track Unicode characters that are mapped from both CP950 and
32 # HKSCS.
34 $surrogates = 0; # set to 1 to allow mappings to Unicode beyond Plane 0
36 $id = "Big5Hkscs2001";
38 sub isValidUtf32
40 my $utf32 = $_[0];
41 return $utf32 >= 0 && $utf32 <= 0x10FFFF
42 && !($utf32 >= 0xD800 && $utf32 <= 0xDFFF)
43 && !($utf32 >= 0xFDD0 && $utf32 <= 0xFDEF)
44 && ($utf32 & 0xFFFF) < 0xFFFE;
47 sub printUtf32
49 my $utf32 = $_[0];
50 return sprintf("U+%04X", $utf32);
53 sub isValidBig5
55 my $big5 = $_[0];
56 my $big5_row = $big5 >> 8;
57 my $big5_column = $big5 & 0xFF;
58 return $big5_row >= 0x81 && $big5_row <= 0xFE
59 && ($big5_column >= 0x40 && $big5_column <= 0x7E
60 || $big5_column >= 0xA1 && $big5_column <= 0xFE);
63 sub printBig5
65 my $big5 = $_[0];
66 return sprintf("%04X", $big5);
69 sub printStats
71 my $used = $_[0];
72 my $space = $_[1];
73 return sprintf("%d/%d bytes (%.1f%%)",
74 $used,
75 $space,
76 $used * 100 / $space);
79 sub printSpaces
81 my $column_width = $_[0];
82 my $columns_per_line = $_[1];
83 my $end = $_[2];
84 $output = "";
85 for ($i = int($end / $columns_per_line) * $columns_per_line;
86 $i < $end;
87 ++$i)
89 for ($j = 0; $j < $column_width; ++$j)
91 $output = $output . " ";
94 return $output;
97 sub addMapping
99 my $utf32 = $_[0];
100 my $big5 = $_[1];
101 my $comp = $_[2];
103 $uni_plane = $utf32 >> 16;
104 $uni_page = ($utf32 >> 8) & 0xFF;
105 $uni_index = $utf32 & 0xFF;
107 if (!defined($uni_plane_used[$uni_plane])
108 || !defined($uni_page_used[$uni_plane][$uni_page])
109 || !defined($uni_map[$uni_plane][$uni_page][$uni_index]))
111 $uni_map[$uni_plane][$uni_page][$uni_index] = $big5;
112 $uni_plane_used[$uni_plane] = 1;
113 $uni_page_used[$uni_plane][$uni_page] = 1;
114 if ($comp != -1)
116 ++$compat[$comp];
119 else
121 $big5_1 = $uni_map[$uni_plane][$uni_page][$uni_index];
122 print "WARNING! Mapping ", printUtf32($utf32), " to ",
123 printBig5($big5_1), ", NOT ", ($comp ? "compat " : ""),
124 printBig5($big5), "\n";
128 # Build mappings to track Unicode characters that are mapped from both Big5/
129 # CP950 and HKSCS:
131 $filename = "BIG5.TXT";
132 open IN, ("input/" . $filename) or die "Cannot read " . $filename;
133 while (<IN>)
135 if (/(0x[0-9A-F][0-9A-F][0-9A-F][0-9A-F])[ \t]+(0x[0-9A-F]+)[ \t]+\#.*$/)
137 $big5 = oct($1);
138 $utf32 = oct($2);
139 isValidBig5($big5)
140 or die "Bad Big5 char " . printBig5($big5);
141 isValidUtf32($utf32)
142 or die "Bad UTF32 char " . printUtf32($utf32);
143 if ($utf32 != 0xFFFD)
145 if (defined($underlying_big5[$utf32]))
147 print "WARNING! In ", $filename, ", both ",
148 printBig5($underlying_big5[$utf32]), " and ",
149 printBig5($big5), " map to ", printUtf32($utf32),
150 "\n";
152 else
154 $underlying_big5[$utf32] = $big5;
159 close IN;
161 $filename = "CP950.TXT";
162 open IN, ("input/" . $filename) or die "Cannot read " . $filename;
163 while (<IN>)
165 if (/(0x[0-9A-F][0-9A-F][0-9A-F][0-9A-F])[ \t]+(0x[0-9A-F]+)[ \t]+\#.*$/)
167 $big5 = oct($1);
168 $utf32 = oct($2);
169 isValidBig5($big5)
170 or die "Bad Big5 char " . printBig5($big5);
171 isValidUtf32($utf32)
172 or die "Bad UTF32 char " . printUtf32($utf32);
173 if (defined($underlying_cp950[$utf32]))
175 print "WARNING! In ", $filename, ", both ",
176 printBig5($underlying_cp950[$utf32]), " and ",
177 printBig5($big5), " map to ", printUtf32($utf32), "\n";
179 else
181 $underlying_cp950[$utf32] = $big5;
185 close IN;
188 # The following are mapped by the underlying RTL_TEXTENCODING_BIG5 to some
189 # nonstandard Unicode points, so they are explicitly mentioned here to map
190 # to the standard Unicode PUA points. (In the other direction, the unofficial
191 # mappings from Unicode to RTL_TEXTENCODING_BIG5 C6A1--C7FE are harmless,
192 # since all Unicode characters involved are already covered by the official
193 # Big5-HKSCS mappings.)
194 $big5_map[0xC6][0xCF] = 0xF6E0; addMapping(0xF6E0, 0xC6CF, -1);
195 $big5_map[0xC6][0xD3] = 0xF6E4; addMapping(0xF6E4, 0xC6D3, -1);
196 $big5_map[0xC6][0xD5] = 0xF6E6; addMapping(0xF6E6, 0xC6D5, -1);
197 $big5_map[0xC6][0xD7] = 0xF6E8; addMapping(0xF6E8, 0xC6D7, -1);
198 $big5_map[0xC6][0xDE] = 0xF6EF; addMapping(0xF6EF, 0xC6DE, -1);
199 $big5_map[0xC6][0xDF] = 0xF6F0; addMapping(0xF6F0, 0xC6DF, -1);
201 # The following implements the mapping of Big5-HKSCS compatibility points
202 # (GCCS characters unified with other HKSCS characters) to Unicode. In the
203 # other direction, characters from Unicode's PUA will map to these Big5-HKSCS
204 # compatibility points. (See the first list in <http://www.info.gov.hk/
205 # digital21/eng/hkscs/download/big5cmp.txt>.)
206 $big5_map[0x8E][0x69] = 0x7BB8;
207 $big5_map[0x8E][0x6F] = 0x7C06;
208 $big5_map[0x8E][0x7E] = 0x7CCE;
209 $big5_map[0x8E][0xAB] = 0x7DD2;
210 $big5_map[0x8E][0xB4] = 0x7E1D;
211 $big5_map[0x8E][0xCD] = 0x8005;
212 $big5_map[0x8E][0xD0] = 0x8028;
213 $big5_map[0x8F][0x57] = 0x83C1;
214 $big5_map[0x8F][0x69] = 0x84A8;
215 $big5_map[0x8F][0x6E] = 0x840F;
216 $big5_map[0x8F][0xCB] = 0x89A6;
217 $big5_map[0x8F][0xCC] = 0x89A9;
218 $big5_map[0x8F][0xFE] = 0x8D77;
219 $big5_map[0x90][0x6D] = 0x90FD;
220 $big5_map[0x90][0x7A] = 0x92B9;
221 $big5_map[0x90][0xDC] = 0x975C;
222 $big5_map[0x90][0xF1] = 0x97FF;
223 $big5_map[0x91][0xBF] = 0x9F16;
224 $big5_map[0x92][0x44] = 0x8503;
225 $big5_map[0x92][0xAF] = 0x5159;
226 $big5_map[0x92][0xB0] = 0x515B;
227 $big5_map[0x92][0xB1] = 0x515D;
228 $big5_map[0x92][0xB2] = 0x515E;
229 $big5_map[0x92][0xC8] = 0x936E;
230 $big5_map[0x92][0xD1] = 0x7479;
231 $big5_map[0x94][0x47] = 0x6D67;
232 $big5_map[0x94][0xCA] = 0x799B;
233 $big5_map[0x95][0xD9] = 0x9097;
234 $big5_map[0x96][0x44] = 0x975D;
235 $big5_map[0x96][0xED] = 0x701E;
236 $big5_map[0x96][0xFC] = 0x5B28;
237 $big5_map[0x9B][0x76] = 0x7201;
238 $big5_map[0x9B][0x78] = 0x77D7;
239 $big5_map[0x9B][0x7B] = 0x7E87;
240 $big5_map[0x9B][0xC6] = 0x99D6;
241 $big5_map[0x9B][0xDE] = 0x91D4;
242 $big5_map[0x9B][0xEC] = 0x60DE;
243 $big5_map[0x9B][0xF6] = 0x6FB6;
244 $big5_map[0x9C][0x42] = 0x8F36;
245 $big5_map[0x9C][0x53] = 0x4FBB;
246 $big5_map[0x9C][0x62] = 0x71DF;
247 $big5_map[0x9C][0x68] = 0x9104;
248 $big5_map[0x9C][0x6B] = 0x9DF0;
249 $big5_map[0x9C][0x77] = 0x83CF;
250 $big5_map[0x9C][0xBC] = 0x5C10;
251 $big5_map[0x9C][0xBD] = 0x79E3;
252 $big5_map[0x9C][0xD0] = 0x5A67;
253 $big5_map[0x9D][0x57] = 0x8F0B;
254 $big5_map[0x9D][0x5A] = 0x7B51;
255 $big5_map[0x9D][0xC4] = 0x62D0;
256 $big5_map[0x9E][0xA9] = 0x6062;
257 $big5_map[0x9E][0xEF] = 0x75F9;
258 $big5_map[0x9E][0xFD] = 0x6C4A;
259 $big5_map[0x9F][0x60] = 0x9B2E;
260 $big5_map[0x9F][0x66] = 0x9F17;
261 $big5_map[0x9F][0xCB] = 0x50ED;
262 $big5_map[0x9F][0xD8] = 0x5F0C;
263 $big5_map[0xA0][0x63] = 0x880F;
264 $big5_map[0xA0][0x77] = 0x62CE;
265 $big5_map[0xA0][0xD5] = 0x7468;
266 $big5_map[0xA0][0xDF] = 0x7162;
267 $big5_map[0xA0][0xE4] = 0x7250;
268 $big5_map[0xFA][0x5F] = 0x5029;
269 $big5_map[0xFA][0x66] = 0x507D;
270 $big5_map[0xFA][0xBD] = 0x5305;
271 $big5_map[0xFA][0xC5] = 0x5344;
272 $big5_map[0xFA][0xD5] = 0x537F;
273 $big5_map[0xFB][0x48] = 0x5605;
274 $big5_map[0xFB][0xB8] = 0x5A77;
275 $big5_map[0xFB][0xF3] = 0x5E75;
276 $big5_map[0xFB][0xF9] = 0x5ED0;
277 $big5_map[0xFC][0x4F] = 0x5F58;
278 $big5_map[0xFC][0x6C] = 0x60A4;
279 $big5_map[0xFC][0xB9] = 0x6490;
280 $big5_map[0xFC][0xE2] = 0x6674;
281 $big5_map[0xFC][0xF1] = 0x675E;
282 $big5_map[0xFD][0xB7] = 0x6C9C;
283 $big5_map[0xFD][0xB8] = 0x6E1D;
284 $big5_map[0xFD][0xBB] = 0x6E2F;
285 $big5_map[0xFD][0xF1] = 0x716E;
286 $big5_map[0xFE][0x52] = 0x732A;
287 $big5_map[0xFE][0x6F] = 0x745C;
288 $big5_map[0xFE][0xAA] = 0x74E9;
289 $big5_map[0xFE][0xDD] = 0x7809;
291 $pua = 0;
292 $compat[0] = 0; # 1993
293 $compat[1] = 0; # 2000
294 $compat[2] = 0; # 2001
296 $filename = "big5-iso.txt";
297 open IN, ("input/" . $filename) or die "Cannot read " . $filename;
298 while (<IN>)
300 if (/^([0-9A-F]+) +([0-9A-F]+) +([0-9A-F]+) +([0-9A-F]+)$/)
302 $big5 = oct("0x" . $1);
303 $utf32_1993 = oct("0x" . $2);
304 $utf32_2000 = oct("0x" . $3);
305 $utf32_2001 = oct("0x" . $4);
306 isValidBig5($big5)
307 or die "Bad Big5 char " . printBig5($big5);
308 isValidUtf32($utf32_1993)
309 or die "Bad UTF32 char " . printUtf32($utf32_1993);
310 isValidUtf32($utf32_2000)
311 or die "Bad UTF32 char " . printUtf32($utf32_2000);
312 isValidUtf32($utf32_2001)
313 or die "Bad UTF32 char " . printUtf32($utf32_2001);
315 $utf32 = $surrogates ? $utf32_2001 : $utf32_2000;
317 if (defined($underlying_big5[$utf32])
318 || defined($underlying_cp950[$utf32]))
320 if (defined($underlying_big5[$utf32])
321 && defined($underlying_cp950[$utf32])
322 && $underlying_big5[$utf32] == $underlying_cp950[$utf32]
323 && $underlying_big5[$utf32] == $big5
325 defined($underlying_big5[$utf32])
326 && !defined($underlying_cp950[$utf32])
327 && $underlying_big5[$utf32] == $big5
329 !defined($underlying_big5[$utf32])
330 && defined($underlying_cp950[$utf32])
331 && $underlying_cp950[$utf32] == $big5)
333 # ignore
335 # Depending on real underlying mapping (cf.
336 # ../convertbig5hkscs.tab), it would be possible to save some
337 # table space by dropping those HKSCS code points that are
338 # already covered by the underlying mapping.
340 else
342 print "XXX mapping underlying";
343 if (defined($underlying_big5[$utf32])
344 && defined($underlying_cp950[$utf32])
345 && $underlying_big5[$utf32] == $underlying_cp950[$utf32])
347 print " Big5/CP950 ", printBig5($underlying_big5[$utf32]);
349 else
351 if (defined($underlying_big5[$utf32]))
353 print " Big5 ", printBig5($underlying_big5[$utf32]);
355 if (defined($underlying_cp950[$utf32]))
357 print " CP950 ", printBig5($underlying_cp950[$utf32]);
360 print " and HKSCS ", printBig5($big5), " to ",
361 printUtf32($utf32), "\n";
365 if ($utf32 >= 0xE000 && $utf32 <= 0xF8FF)
367 ++$pua;
370 $big5_row = $big5 >> 8;
371 $big5_column = $big5 & 0xFF;
372 if (defined($big5_map[$big5_row][$big5_column]))
374 die "Bad Big5 mapping " . printBig5($big5);
376 $big5_map[$big5_row][$big5_column] = $utf32;
378 addMapping($utf32, $big5, -1);
380 if ($utf32_2001 != $utf32)
382 addMapping($utf32_2001, $big5, 2);
384 if ($utf32_2000 != $utf32 && $utf32_2000 != $utf32_2001)
386 addMapping($utf32_2000, $big5, 1);
388 if ($utf32_1993 != $utf32 && $utf32_1993 != $utf32_2000
389 && $utf32_1993 != $utf32_2001)
391 addMapping($utf32_1993, $big5, 0);
395 close IN;
397 print $pua, " mappings to PUA\n";
398 print $compat[0], " 1993 compatibility mappings\n" if ($compat[0] != 0);
399 print $compat[1], " 2000 compatibility mappings\n" if ($compat[1] != 0);
400 print $compat[2], " 2001 compatibility mappings\n" if ($compat[2] != 0);
402 if (defined($uni_plane_used[0]) && defined($uni_page_used[0][0]))
404 for ($utf32 = 0; $utf32 <= 0x7F; ++$utf32)
406 if (defined($uni_map[0][0][$uni_index]))
408 $big5 = $uni_map[0][0][$utf32];
409 die "Mapping " . printUtf32($utf32) . " to " . printBig5($big5);
414 $filename = lc($id) . ".tab";
415 open OUT, ("> " . $filename) or die "Cannot write " . $filename;
418 $filename = lc($id). ".pl";
419 open IN, $filename or die "Cannot read ". $filename;
420 $first = 1;
421 while (<IN>)
423 if (/^\#!.*$/)
426 elsif (/^\#(\*.*)$/)
428 if ($first == 1)
430 print OUT "/", $1, "\n";
431 $first = 0;
433 else
435 print OUT " ", substr($1, 0, length($1) - 1), "/\n";
438 elsif (/^\# (.*)$/)
440 print OUT " *", $1, "\n";
442 elsif (/^\#(.*)$/)
444 print OUT " *", $1, "\n";
446 else
448 goto done;
451 done:
454 print OUT "\n",
455 "#ifndef _SAL_TYPES_H_\n",
456 "#include \"sal/types.h\"\n",
457 "#endif\n",
458 "\n";
460 print OUT "static sal_uInt16 const aImpl", $id, "ToUnicodeData[] = {\n";
461 $big5_data_index = 0;
462 $big5_rows = 0;
463 $big5_chars = 0;
464 for ($big5_row = 0; $big5_row <= 255; ++$big5_row)
466 $big5_row_first = -1;
467 for ($big5_column = 0; $big5_column <= 255; ++$big5_column)
469 if (defined($big5_map[$big5_row][$big5_column]))
471 if ($big5_row_first == -1)
473 $big5_row_first = $big5_column;
475 $big5_row_last = $big5_column;
478 if ($big5_row_first != -1)
480 $big5_data_offsets[$big5_row] = $big5_data_index;
481 ++$big5_rows;
482 print OUT " /* row ", $big5_row, " */\n";
484 $big5_row_surrogates_first = -1;
485 $big5_row_chars = 0;
486 $big5_row_surrogates = 0;
488 print OUT " ", $big5_row_first, " | (", $big5_row_last,
489 " << 8), /* first, last */\n";
490 ++$big5_data_index;
492 print OUT " ", printSpaces(7, 10, $big5_row_first);
493 $bol = 0;
494 for ($big5_column = $big5_row_first;
495 $big5_column <= $big5_row_last;
496 ++$big5_column)
498 if ($bol == 1)
500 print OUT " ";
501 $bol = 0;
503 if (defined($big5_map[$big5_row][$big5_column]))
505 $utf32 = $big5_map[$big5_row][$big5_column];
506 ++$big5_row_chars;
507 if ($utf32 <= 0xFFFF)
509 printf OUT "0x%04X,", $utf32;
511 else
513 ++$big5_row_surrogates;
514 printf OUT "0x%04X,",
515 (0xD800 | (($utf32 - 0x10000) >> 10));
516 if ($big5_row_surrogates_first == -1)
518 $big5_row_surrogates_first = $big5_column;
520 $big5_row_surrogates_last = $big5_column;
523 else
525 printf OUT "0xffff,";
527 ++$big5_data_index;
528 if ($big5_column % 10 == 9)
530 print OUT "\n";
531 $bol = 1;
534 if ($bol == 0)
536 print OUT "\n";
539 if ($big5_row_surrogates_first != -1)
541 print OUT " ", $big5_row_surrogates_first,
542 ", /* first low-surrogate */\n";
543 ++$big5_data_index;
545 print OUT " ", printSpaces(7, 10, $big5_row_surrogates_first);
546 $bol = 0;
547 for ($big5_column = $big5_row_surrogates_first;
548 $big5_column <= $big5_row_surrogates_last;
549 ++$big5_column)
551 if ($bol == 1)
553 print OUT " ";
554 $bol = 0;
556 $utf32 = 0;
557 if (defined($big5_map[$big5_row][$big5_column]))
559 $utf32 = $big5_map[$big5_row][$big5_column];
561 if ($utf32 <= 0xFFFF)
563 printf OUT " 0,";
565 else
567 printf OUT "0x%04X,",
568 (0xDC00 | (($utf32 - 0x10000) & 0x3FF));
570 ++$big5_data_index;
571 if ($big5_column % 10 == 9)
573 print OUT "\n";
574 $bol = 1;
577 if ($bol == 0)
579 print OUT "\n";
583 $big5_chars += $big5_row_chars;
584 $big5_data_space[$big5_row]
585 = ($big5_data_index - $big5_data_offsets[$big5_row]) * 2;
586 $big5_data_used[$big5_row]
587 = (1 + $big5_row_chars + ($big5_row_surrogates == 0 ?
588 0 : 1 + $big5_row_surrogates))
589 * 2;
591 else
593 print OUT " /* row ", $big5_row, ": --- */\n";
594 $big5_data_offsets[$big5_row] = -1;
597 print OUT "};\n\n";
598 print "big5 rows = ", $big5_rows, ", chars = ", $big5_chars, "\n";
600 print OUT "static sal_Int32 const aImpl", $id, "ToUnicodeRowOffsets[] = {\n";
601 $big5_rowoffsets_used = 0;
602 for ($big5_row = 0; $big5_row <= 255; ++$big5_row)
604 if ($big5_data_offsets[$big5_row] == -1)
606 print OUT " -1, /* row ", $big5_row, " */\n";
608 else
610 print OUT " ",
611 $big5_data_offsets[$big5_row],
612 ", /* row ",
613 $big5_row,
614 "; ",
615 printStats($big5_data_used[$big5_row],
616 $big5_data_space[$big5_row]),
617 " */\n";
618 $big5_rowoffsets_used += 4;
621 print OUT "};\n\n";
623 print OUT "static sal_uInt16 const aImplUnicodeTo", $id, "Data[] = {\n";
624 $uni_data_index = 0;
625 for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
627 if (defined($uni_plane_used[$uni_plane]))
629 for ($uni_page = 0; $uni_page <= 255; ++$uni_page)
631 if (defined($uni_page_used[$uni_plane][$uni_page]))
633 $uni_data_offsets[$uni_plane][$uni_page] = $uni_data_index;
634 print OUT " /* plane ", $uni_plane, ", page ", $uni_page,
635 " */\n";
637 $uni_page_first = -1;
638 for ($uni_index = 0; $uni_index <= 255; ++$uni_index)
640 if (defined($uni_map[$uni_plane][$uni_page][$uni_index]))
642 if ($uni_page_first == -1)
644 $uni_page_first = $uni_index;
646 $uni_page_last = $uni_index;
650 $uni_data_used[$uni_plane][$uni_page] = 0;
652 print OUT " ", $uni_page_first, " | (", $uni_page_last,
653 " << 8), /* first, last */\n";
654 ++$uni_data_index;
655 $uni_data_used[$uni_plane][$uni_page] += 2;
657 print OUT " ", printSpaces(7, 10, $uni_page_first);
658 $bol = 0;
659 for ($uni_index = $uni_page_first;
660 $uni_index <= $uni_page_last;
661 ++$uni_index)
663 if ($bol == 1)
665 print OUT " ";
666 $bol = 0;
668 if (defined($uni_map[$uni_plane][$uni_page][$uni_index]))
670 $big5 = $uni_map[$uni_plane][$uni_page][$uni_index];
671 printf OUT "0x%04X,", $big5;
672 $uni_data_used[$uni_plane][$uni_page] += 2;
674 else
676 print OUT " 0,";
678 ++$uni_data_index;
679 if ($uni_index % 10 == 9)
681 print OUT "\n";
682 $bol = 1;
685 if ($bol == 0)
687 print OUT "\n";
690 $uni_data_space[$uni_plane][$uni_page]
691 = ($uni_data_index
692 - $uni_data_offsets[$uni_plane][$uni_page]) * 2;
694 else
696 $uni_data_offsets[$uni_plane][$uni_page] = -1;
697 print OUT " /* plane ", $uni_plane, ", page ", $uni_page,
698 ": --- */\n";
702 else
704 print OUT " /* plane ", $uni_plane, ": --- */\n";
707 print OUT "};\n\n";
709 print OUT "static sal_Int32 const aImplUnicodeTo", $id, "PageOffsets[] = {\n";
710 for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
712 if (defined($uni_plane_used[$uni_plane]))
714 $uni_pageoffsets_used[$uni_plane] = 0;
715 $uni_data_used_sum[$uni_plane] = 0;
716 $uni_data_space_sum[$uni_plane] = 0;
717 for ($uni_page = 0; $uni_page <= 255; ++$uni_page)
719 $offset = $uni_data_offsets[$uni_plane][$uni_page];
720 if ($offset == -1)
722 print OUT " -1, /* plane ",
723 $uni_plane,
724 ", page ",
725 $uni_page,
726 " */\n";
728 else
730 print OUT " ",
731 $offset,
732 ", /* plane ",
733 $uni_plane,
734 ", page ",
735 $uni_page,
736 "; ",
737 printStats($uni_data_used[$uni_plane][$uni_page],
738 $uni_data_space[$uni_plane][$uni_page]),
739 " */\n";
740 $uni_pageoffsets_used[$uni_plane] += 4;
741 $uni_data_used_sum[$uni_plane]
742 += $uni_data_used[$uni_plane][$uni_page];
743 $uni_data_space_sum[$uni_plane]
744 += $uni_data_space[$uni_plane][$uni_page];
748 else
750 print OUT " /* plane ", $uni_plane, ": --- */\n";
753 print OUT "};\n\n";
755 print OUT "static sal_Int32 const aImplUnicodeTo",
756 $id,
757 "PlaneOffsets[] = {\n";
758 $uni_page_offset = 0;
759 $uni_planeoffsets_used = 0;
760 $uni_pageoffsets_used_sum = 0;
761 $uni_pageoffsets_space_sum = 0;
762 $uni_data_used_sum2 = 0;
763 $uni_data_space_sum2 = 0;
764 for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
766 if (defined ($uni_plane_used[$uni_plane]))
768 print OUT " ",
769 $uni_page_offset++,
770 " * 256, /* plane ",
771 $uni_plane,
772 "; ",
773 printStats($uni_pageoffsets_used[$uni_plane], 256 * 4),
774 ", ",
775 printStats($uni_data_used_sum[$uni_plane],
776 $uni_data_space_sum[$uni_plane]),
777 " */\n";
778 $uni_planeoffsets_used += 4;
779 $uni_pageoffsets_used_sum += $uni_pageoffsets_used[$uni_plane];
780 $uni_pageoffsets_space_sum += 256 * 4;
781 $uni_data_used_sum2 += $uni_data_used_sum[$uni_plane];
782 $uni_data_space_sum2 += $uni_data_space_sum[$uni_plane];
784 else
786 print OUT " -1, /* plane ", $uni_plane, " */\n";
789 print OUT " /* ",
790 printStats($uni_planeoffsets_used, 17 * 4),
791 ", ",
792 printStats($uni_pageoffsets_used_sum, $uni_pageoffsets_space_sum),
793 ", ",
794 printStats($uni_data_used_sum2, $uni_data_space_sum2),
795 " */\n};\n";
797 close OUT;