Update ooo320-m1
[ooovba.git] / sal / textenc / generate / big5hkscs2001.pl
blobfea5a558715c32dfc9f42d771c6964b6ed86cffa
1 #!/usr/bin/perl
2 #*************************************************************************
4 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 #
6 # Copyright 2008 by Sun Microsystems, Inc.
8 # OpenOffice.org - a multi-platform office productivity suite
10 # $RCSfile: big5hkscs2001.pl,v $
12 # $Revision: 1.5 $
14 # This file is part of OpenOffice.org.
16 # OpenOffice.org is free software: you can redistribute it and/or modify
17 # it under the terms of the GNU Lesser General Public License version 3
18 # only, as published by the Free Software Foundation.
20 # OpenOffice.org is distributed in the hope that it will be useful,
21 # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 # GNU Lesser General Public License version 3 for more details
24 # (a copy is included in the LICENSE file that accompanied this code).
26 # You should have received a copy of the GNU Lesser General Public License
27 # version 3 along with OpenOffice.org. If not, see
28 # <http://www.openoffice.org/license.html>
29 # for a copy of the LGPLv3 License.
31 #*************************************************************************
33 # The following files must be available in a ./input subdir:
35 # <http://www.info.gov.hk/digital21/eng/hkscs/download/big5-iso.txt>
37 # <http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT>:
38 # "Unicode version: 1.1 Table version: 0.0d3 Date: 11 February 1994"
39 # Only used to track Unicode characters that are mapped from both Big5 and
40 # HKSCS.
42 # <http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT>:
43 # "Unicode version: 2.0 Table version: 2.01 Date: 1/7/2000"
44 # Only used to track Unicode characters that are mapped from both CP950 and
45 # HKSCS.
47 $surrogates = 0; # set to 1 to allow mappings to Unicode beyond Plane 0
49 $id = "Big5Hkscs2001";
51 sub isValidUtf32
53 my $utf32 = $_[0];
54 return $utf32 >= 0 && $utf32 <= 0x10FFFF
55 && !($utf32 >= 0xD800 && $utf32 <= 0xDFFF)
56 && !($utf32 >= 0xFDD0 && $utf32 <= 0xFDEF)
57 && ($utf32 & 0xFFFF) < 0xFFFE;
60 sub printUtf32
62 my $utf32 = $_[0];
63 return sprintf("U+%04X", $utf32);
66 sub isValidBig5
68 my $big5 = $_[0];
69 my $big5_row = $big5 >> 8;
70 my $big5_column = $big5 & 0xFF;
71 return $big5_row >= 0x81 && $big5_row <= 0xFE
72 && ($big5_column >= 0x40 && $big5_column <= 0x7E
73 || $big5_column >= 0xA1 && $big5_column <= 0xFE);
76 sub printBig5
78 my $big5 = $_[0];
79 return sprintf("%04X", $big5);
82 sub printStats
84 my $used = $_[0];
85 my $space = $_[1];
86 return sprintf("%d/%d bytes (%.1f%%)",
87 $used,
88 $space,
89 $used * 100 / $space);
92 sub printSpaces
94 my $column_width = $_[0];
95 my $columns_per_line = $_[1];
96 my $end = $_[2];
97 $output = "";
98 for ($i = int($end / $columns_per_line) * $columns_per_line;
99 $i < $end;
100 ++$i)
102 for ($j = 0; $j < $column_width; ++$j)
104 $output = $output . " ";
107 return $output;
110 sub addMapping
112 my $utf32 = $_[0];
113 my $big5 = $_[1];
114 my $comp = $_[2];
116 $uni_plane = $utf32 >> 16;
117 $uni_page = ($utf32 >> 8) & 0xFF;
118 $uni_index = $utf32 & 0xFF;
120 if (!defined($uni_plane_used[$uni_plane])
121 || !defined($uni_page_used[$uni_plane][$uni_page])
122 || !defined($uni_map[$uni_plane][$uni_page][$uni_index]))
124 $uni_map[$uni_plane][$uni_page][$uni_index] = $big5;
125 $uni_plane_used[$uni_plane] = 1;
126 $uni_page_used[$uni_plane][$uni_page] = 1;
127 if ($comp != -1)
129 ++$compat[$comp];
132 else
134 $big5_1 = $uni_map[$uni_plane][$uni_page][$uni_index];
135 print "WARNING! Mapping ", printUtf32($utf32), " to ",
136 printBig5($big5_1), ", NOT ", ($comp ? "compat " : ""),
137 printBig5($big5), "\n";
141 # Build mappings to track Unicode characters that are mapped from both Big5/
142 # CP950 and HKSCS:
144 $filename = "BIG5.TXT";
145 open IN, ("input/" . $filename) or die "Cannot read " . $filename;
146 while (<IN>)
148 if (/(0x[0-9A-F][0-9A-F][0-9A-F][0-9A-F])[ \t]+(0x[0-9A-F]+)[ \t]+\#.*$/)
150 $big5 = oct($1);
151 $utf32 = oct($2);
152 isValidBig5($big5)
153 or die "Bad Big5 char " . printBig5($big5);
154 isValidUtf32($utf32)
155 or die "Bad UTF32 char " . printUtf32($utf32);
156 if ($utf32 != 0xFFFD)
158 if (defined($underlying_big5[$utf32]))
160 print "WARNING! In ", $filename, ", both ",
161 printBig5($underlying_big5[$utf32]), " and ",
162 printBig5($big5), " map to ", printUtf32($utf32),
163 "\n";
165 else
167 $underlying_big5[$utf32] = $big5;
172 close IN;
174 $filename = "CP950.TXT";
175 open IN, ("input/" . $filename) or die "Cannot read " . $filename;
176 while (<IN>)
178 if (/(0x[0-9A-F][0-9A-F][0-9A-F][0-9A-F])[ \t]+(0x[0-9A-F]+)[ \t]+\#.*$/)
180 $big5 = oct($1);
181 $utf32 = oct($2);
182 isValidBig5($big5)
183 or die "Bad Big5 char " . printBig5($big5);
184 isValidUtf32($utf32)
185 or die "Bad UTF32 char " . printUtf32($utf32);
186 if (defined($underlying_cp950[$utf32]))
188 print "WARNING! In ", $filename, ", both ",
189 printBig5($underlying_cp950[$utf32]), " and ",
190 printBig5($big5), " map to ", printUtf32($utf32), "\n";
192 else
194 $underlying_cp950[$utf32] = $big5;
198 close IN;
201 # The following are mapped by the underlying RTL_TEXTENCODING_BIG5 to some
202 # nonstandard Unicode points, so they are explicitly mentioned here to map
203 # to the standard Unicode PUA points. (In the other direction, the unofficial
204 # mappings from Unicode to RTL_TEXTENCODING_BIG5 C6A1--C7FE are harmless,
205 # since all Unicode characters involved are already covered by the official
206 # Big5-HKSCS mappings.)
207 $big5_map[0xC6][0xCF] = 0xF6E0; addMapping(0xF6E0, 0xC6CF, -1);
208 $big5_map[0xC6][0xD3] = 0xF6E4; addMapping(0xF6E4, 0xC6D3, -1);
209 $big5_map[0xC6][0xD5] = 0xF6E6; addMapping(0xF6E6, 0xC6D5, -1);
210 $big5_map[0xC6][0xD7] = 0xF6E8; addMapping(0xF6E8, 0xC6D7, -1);
211 $big5_map[0xC6][0xDE] = 0xF6EF; addMapping(0xF6EF, 0xC6DE, -1);
212 $big5_map[0xC6][0xDF] = 0xF6F0; addMapping(0xF6F0, 0xC6DF, -1);
214 # The following implements the mapping of Big5-HKSCS compatibility points
215 # (GCCS characters unified with other HKSCS characters) to Unicode. In the
216 # other direction, characters from Unicode's PUA will map to these Big5-HKSCS
217 # compatibility points. (See the first list in <http://www.info.gov.hk/
218 # digital21/eng/hkscs/download/big5cmp.txt>.)
219 $big5_map[0x8E][0x69] = 0x7BB8;
220 $big5_map[0x8E][0x6F] = 0x7C06;
221 $big5_map[0x8E][0x7E] = 0x7CCE;
222 $big5_map[0x8E][0xAB] = 0x7DD2;
223 $big5_map[0x8E][0xB4] = 0x7E1D;
224 $big5_map[0x8E][0xCD] = 0x8005;
225 $big5_map[0x8E][0xD0] = 0x8028;
226 $big5_map[0x8F][0x57] = 0x83C1;
227 $big5_map[0x8F][0x69] = 0x84A8;
228 $big5_map[0x8F][0x6E] = 0x840F;
229 $big5_map[0x8F][0xCB] = 0x89A6;
230 $big5_map[0x8F][0xCC] = 0x89A9;
231 $big5_map[0x8F][0xFE] = 0x8D77;
232 $big5_map[0x90][0x6D] = 0x90FD;
233 $big5_map[0x90][0x7A] = 0x92B9;
234 $big5_map[0x90][0xDC] = 0x975C;
235 $big5_map[0x90][0xF1] = 0x97FF;
236 $big5_map[0x91][0xBF] = 0x9F16;
237 $big5_map[0x92][0x44] = 0x8503;
238 $big5_map[0x92][0xAF] = 0x5159;
239 $big5_map[0x92][0xB0] = 0x515B;
240 $big5_map[0x92][0xB1] = 0x515D;
241 $big5_map[0x92][0xB2] = 0x515E;
242 $big5_map[0x92][0xC8] = 0x936E;
243 $big5_map[0x92][0xD1] = 0x7479;
244 $big5_map[0x94][0x47] = 0x6D67;
245 $big5_map[0x94][0xCA] = 0x799B;
246 $big5_map[0x95][0xD9] = 0x9097;
247 $big5_map[0x96][0x44] = 0x975D;
248 $big5_map[0x96][0xED] = 0x701E;
249 $big5_map[0x96][0xFC] = 0x5B28;
250 $big5_map[0x9B][0x76] = 0x7201;
251 $big5_map[0x9B][0x78] = 0x77D7;
252 $big5_map[0x9B][0x7B] = 0x7E87;
253 $big5_map[0x9B][0xC6] = 0x99D6;
254 $big5_map[0x9B][0xDE] = 0x91D4;
255 $big5_map[0x9B][0xEC] = 0x60DE;
256 $big5_map[0x9B][0xF6] = 0x6FB6;
257 $big5_map[0x9C][0x42] = 0x8F36;
258 $big5_map[0x9C][0x53] = 0x4FBB;
259 $big5_map[0x9C][0x62] = 0x71DF;
260 $big5_map[0x9C][0x68] = 0x9104;
261 $big5_map[0x9C][0x6B] = 0x9DF0;
262 $big5_map[0x9C][0x77] = 0x83CF;
263 $big5_map[0x9C][0xBC] = 0x5C10;
264 $big5_map[0x9C][0xBD] = 0x79E3;
265 $big5_map[0x9C][0xD0] = 0x5A67;
266 $big5_map[0x9D][0x57] = 0x8F0B;
267 $big5_map[0x9D][0x5A] = 0x7B51;
268 $big5_map[0x9D][0xC4] = 0x62D0;
269 $big5_map[0x9E][0xA9] = 0x6062;
270 $big5_map[0x9E][0xEF] = 0x75F9;
271 $big5_map[0x9E][0xFD] = 0x6C4A;
272 $big5_map[0x9F][0x60] = 0x9B2E;
273 $big5_map[0x9F][0x66] = 0x9F17;
274 $big5_map[0x9F][0xCB] = 0x50ED;
275 $big5_map[0x9F][0xD8] = 0x5F0C;
276 $big5_map[0xA0][0x63] = 0x880F;
277 $big5_map[0xA0][0x77] = 0x62CE;
278 $big5_map[0xA0][0xD5] = 0x7468;
279 $big5_map[0xA0][0xDF] = 0x7162;
280 $big5_map[0xA0][0xE4] = 0x7250;
281 $big5_map[0xFA][0x5F] = 0x5029;
282 $big5_map[0xFA][0x66] = 0x507D;
283 $big5_map[0xFA][0xBD] = 0x5305;
284 $big5_map[0xFA][0xC5] = 0x5344;
285 $big5_map[0xFA][0xD5] = 0x537F;
286 $big5_map[0xFB][0x48] = 0x5605;
287 $big5_map[0xFB][0xB8] = 0x5A77;
288 $big5_map[0xFB][0xF3] = 0x5E75;
289 $big5_map[0xFB][0xF9] = 0x5ED0;
290 $big5_map[0xFC][0x4F] = 0x5F58;
291 $big5_map[0xFC][0x6C] = 0x60A4;
292 $big5_map[0xFC][0xB9] = 0x6490;
293 $big5_map[0xFC][0xE2] = 0x6674;
294 $big5_map[0xFC][0xF1] = 0x675E;
295 $big5_map[0xFD][0xB7] = 0x6C9C;
296 $big5_map[0xFD][0xB8] = 0x6E1D;
297 $big5_map[0xFD][0xBB] = 0x6E2F;
298 $big5_map[0xFD][0xF1] = 0x716E;
299 $big5_map[0xFE][0x52] = 0x732A;
300 $big5_map[0xFE][0x6F] = 0x745C;
301 $big5_map[0xFE][0xAA] = 0x74E9;
302 $big5_map[0xFE][0xDD] = 0x7809;
304 $pua = 0;
305 $compat[0] = 0; # 1993
306 $compat[1] = 0; # 2000
307 $compat[2] = 0; # 2001
309 $filename = "big5-iso.txt";
310 open IN, ("input/" . $filename) or die "Cannot read " . $filename;
311 while (<IN>)
313 if (/^([0-9A-F]+) +([0-9A-F]+) +([0-9A-F]+) +([0-9A-F]+)$/)
315 $big5 = oct("0x" . $1);
316 $utf32_1993 = oct("0x" . $2);
317 $utf32_2000 = oct("0x" . $3);
318 $utf32_2001 = oct("0x" . $4);
319 isValidBig5($big5)
320 or die "Bad Big5 char " . printBig5($big5);
321 isValidUtf32($utf32_1993)
322 or die "Bad UTF32 char " . printUtf32($utf32_1993);
323 isValidUtf32($utf32_2000)
324 or die "Bad UTF32 char " . printUtf32($utf32_2000);
325 isValidUtf32($utf32_2001)
326 or die "Bad UTF32 char " . printUtf32($utf32_2001);
328 $utf32 = $surrogates ? $utf32_2001 : $utf32_2000;
330 if (defined($underlying_big5[$utf32])
331 || defined($underlying_cp950[$utf32]))
333 if (defined($underlying_big5[$utf32])
334 && defined($underlying_cp950[$utf32])
335 && $underlying_big5[$utf32] == $underlying_cp950[$utf32]
336 && $underlying_big5[$utf32] == $big5
338 defined($underlying_big5[$utf32])
339 && !defined($underlying_cp950[$utf32])
340 && $underlying_big5[$utf32] == $big5
342 !defined($underlying_big5[$utf32])
343 && defined($underlying_cp950[$utf32])
344 && $underlying_cp950[$utf32] == $big5)
346 # ignore
348 # Depending on real underlying mapping (cf.
349 # ../convertbig5hkscs.tab), it would be possible to save some
350 # table space by dropping those HKSCS code points that are
351 # already covered by the underlying mapping.
353 else
355 print "XXX mapping underlying";
356 if (defined($underlying_big5[$utf32])
357 && defined($underlying_cp950[$utf32])
358 && $underlying_big5[$utf32] == $underlying_cp950[$utf32])
360 print " Big5/CP950 ", printBig5($underlying_big5[$utf32]);
362 else
364 if (defined($underlying_big5[$utf32]))
366 print " Big5 ", printBig5($underlying_big5[$utf32]);
368 if (defined($underlying_cp950[$utf32]))
370 print " CP950 ", printBig5($underlying_cp950[$utf32]);
373 print " and HKSCS ", printBig5($big5), " to ",
374 printUtf32($utf32), "\n";
378 if ($utf32 >= 0xE000 && $utf32 <= 0xF8FF)
380 ++$pua;
383 $big5_row = $big5 >> 8;
384 $big5_column = $big5 & 0xFF;
385 if (defined($big5_map[$big5_row][$big5_column]))
387 die "Bad Big5 mapping " . printBig5($big5);
389 $big5_map[$big5_row][$big5_column] = $utf32;
391 addMapping($utf32, $big5, -1);
393 if ($utf32_2001 != $utf32)
395 addMapping($utf32_2001, $big5, 2);
397 if ($utf32_2000 != $utf32 && $utf32_2000 != $utf32_2001)
399 addMapping($utf32_2000, $big5, 1);
401 if ($utf32_1993 != $utf32 && $utf32_1993 != $utf32_2000
402 && $utf32_1993 != $utf32_2001)
404 addMapping($utf32_1993, $big5, 0);
408 close IN;
410 print $pua, " mappings to PUA\n";
411 print $compat[0], " 1993 compatibility mappings\n" if ($compat[0] != 0);
412 print $compat[1], " 2000 compatibility mappings\n" if ($compat[1] != 0);
413 print $compat[2], " 2001 compatibility mappings\n" if ($compat[2] != 0);
415 if (defined($uni_plane_used[0]) && defined($uni_page_used[0][0]))
417 for ($utf32 = 0; $utf32 <= 0x7F; ++$utf32)
419 if (defined($uni_map[0][0][$uni_index]))
421 $big5 = $uni_map[0][0][$utf32];
422 die "Mapping " . printUtf32($utf32) . " to " . printBig5($big5);
427 $filename = lc($id) . ".tab";
428 open OUT, ("> " . $filename) or die "Cannot write " . $filename;
431 $filename = lc($id). ".pl";
432 open IN, $filename or die "Cannot read ". $filename;
433 $first = 1;
434 while (<IN>)
436 if (/^\#!.*$/)
439 elsif (/^\#(\*.*)$/)
441 if ($first == 1)
443 print OUT "/", $1, "\n";
444 $first = 0;
446 else
448 print OUT " ", substr($1, 0, length($1) - 1), "/\n";
451 elsif (/^\# (.*)$/)
453 print OUT " *", $1, "\n";
455 elsif (/^\#(.*)$/)
457 print OUT " *", $1, "\n";
459 else
461 goto done;
464 done:
467 print OUT "\n",
468 "#ifndef _SAL_TYPES_H_\n",
469 "#include \"sal/types.h\"\n",
470 "#endif\n",
471 "\n";
473 print OUT "static sal_uInt16 const aImpl", $id, "ToUnicodeData[] = {\n";
474 $big5_data_index = 0;
475 $big5_rows = 0;
476 $big5_chars = 0;
477 for ($big5_row = 0; $big5_row <= 255; ++$big5_row)
479 $big5_row_first = -1;
480 for ($big5_column = 0; $big5_column <= 255; ++$big5_column)
482 if (defined($big5_map[$big5_row][$big5_column]))
484 if ($big5_row_first == -1)
486 $big5_row_first = $big5_column;
488 $big5_row_last = $big5_column;
491 if ($big5_row_first != -1)
493 $big5_data_offsets[$big5_row] = $big5_data_index;
494 ++$big5_rows;
495 print OUT " /* row ", $big5_row, " */\n";
497 $big5_row_surrogates_first = -1;
498 $big5_row_chars = 0;
499 $big5_row_surrogates = 0;
501 print OUT " ", $big5_row_first, " | (", $big5_row_last,
502 " << 8), /* first, last */\n";
503 ++$big5_data_index;
505 print OUT " ", printSpaces(7, 10, $big5_row_first);
506 $bol = 0;
507 for ($big5_column = $big5_row_first;
508 $big5_column <= $big5_row_last;
509 ++$big5_column)
511 if ($bol == 1)
513 print OUT " ";
514 $bol = 0;
516 if (defined($big5_map[$big5_row][$big5_column]))
518 $utf32 = $big5_map[$big5_row][$big5_column];
519 ++$big5_row_chars;
520 if ($utf32 <= 0xFFFF)
522 printf OUT "0x%04X,", $utf32;
524 else
526 ++$big5_row_surrogates;
527 printf OUT "0x%04X,",
528 (0xD800 | (($utf32 - 0x10000) >> 10));
529 if ($big5_row_surrogates_first == -1)
531 $big5_row_surrogates_first = $big5_column;
533 $big5_row_surrogates_last = $big5_column;
536 else
538 printf OUT "0xffff,";
540 ++$big5_data_index;
541 if ($big5_column % 10 == 9)
543 print OUT "\n";
544 $bol = 1;
547 if ($bol == 0)
549 print OUT "\n";
552 if ($big5_row_surrogates_first != -1)
554 print OUT " ", $big5_row_surrogates_first,
555 ", /* first low-surrogate */\n";
556 ++$big5_data_index;
558 print OUT " ", printSpaces(7, 10, $big5_row_surrogates_first);
559 $bol = 0;
560 for ($big5_column = $big5_row_surrogates_first;
561 $big5_column <= $big5_row_surrogates_last;
562 ++$big5_column)
564 if ($bol == 1)
566 print OUT " ";
567 $bol = 0;
569 $utf32 = 0;
570 if (defined($big5_map[$big5_row][$big5_column]))
572 $utf32 = $big5_map[$big5_row][$big5_column];
574 if ($utf32 <= 0xFFFF)
576 printf OUT " 0,";
578 else
580 printf OUT "0x%04X,",
581 (0xDC00 | (($utf32 - 0x10000) & 0x3FF));
583 ++$big5_data_index;
584 if ($big5_column % 10 == 9)
586 print OUT "\n";
587 $bol = 1;
590 if ($bol == 0)
592 print OUT "\n";
596 $big5_chars += $big5_row_chars;
597 $big5_data_space[$big5_row]
598 = ($big5_data_index - $big5_data_offsets[$big5_row]) * 2;
599 $big5_data_used[$big5_row]
600 = (1 + $big5_row_chars + ($big5_row_surrogates == 0 ?
601 0 : 1 + $big5_row_surrogates))
602 * 2;
604 else
606 print OUT " /* row ", $big5_row, ": --- */\n";
607 $big5_data_offsets[$big5_row] = -1;
610 print OUT "};\n\n";
611 print "big5 rows = ", $big5_rows, ", chars = ", $big5_chars, "\n";
613 print OUT "static sal_Int32 const aImpl", $id, "ToUnicodeRowOffsets[] = {\n";
614 $big5_rowoffsets_used = 0;
615 for ($big5_row = 0; $big5_row <= 255; ++$big5_row)
617 if ($big5_data_offsets[$big5_row] == -1)
619 print OUT " -1, /* row ", $big5_row, " */\n";
621 else
623 print OUT " ",
624 $big5_data_offsets[$big5_row],
625 ", /* row ",
626 $big5_row,
627 "; ",
628 printStats($big5_data_used[$big5_row],
629 $big5_data_space[$big5_row]),
630 " */\n";
631 $big5_rowoffsets_used += 4;
634 print OUT "};\n\n";
636 print OUT "static sal_uInt16 const aImplUnicodeTo", $id, "Data[] = {\n";
637 $uni_data_index = 0;
638 for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
640 if (defined($uni_plane_used[$uni_plane]))
642 for ($uni_page = 0; $uni_page <= 255; ++$uni_page)
644 if (defined($uni_page_used[$uni_plane][$uni_page]))
646 $uni_data_offsets[$uni_plane][$uni_page] = $uni_data_index;
647 print OUT " /* plane ", $uni_plane, ", page ", $uni_page,
648 " */\n";
650 $uni_page_first = -1;
651 for ($uni_index = 0; $uni_index <= 255; ++$uni_index)
653 if (defined($uni_map[$uni_plane][$uni_page][$uni_index]))
655 if ($uni_page_first == -1)
657 $uni_page_first = $uni_index;
659 $uni_page_last = $uni_index;
663 $uni_data_used[$uni_plane][$uni_page] = 0;
665 print OUT " ", $uni_page_first, " | (", $uni_page_last,
666 " << 8), /* first, last */\n";
667 ++$uni_data_index;
668 $uni_data_used[$uni_plane][$uni_page] += 2;
670 print OUT " ", printSpaces(7, 10, $uni_page_first);
671 $bol = 0;
672 for ($uni_index = $uni_page_first;
673 $uni_index <= $uni_page_last;
674 ++$uni_index)
676 if ($bol == 1)
678 print OUT " ";
679 $bol = 0;
681 if (defined($uni_map[$uni_plane][$uni_page][$uni_index]))
683 $big5 = $uni_map[$uni_plane][$uni_page][$uni_index];
684 printf OUT "0x%04X,", $big5;
685 $uni_data_used[$uni_plane][$uni_page] += 2;
687 else
689 print OUT " 0,";
691 ++$uni_data_index;
692 if ($uni_index % 10 == 9)
694 print OUT "\n";
695 $bol = 1;
698 if ($bol == 0)
700 print OUT "\n";
703 $uni_data_space[$uni_plane][$uni_page]
704 = ($uni_data_index
705 - $uni_data_offsets[$uni_plane][$uni_page]) * 2;
707 else
709 $uni_data_offsets[$uni_plane][$uni_page] = -1;
710 print OUT " /* plane ", $uni_plane, ", page ", $uni_page,
711 ": --- */\n";
715 else
717 print OUT " /* plane ", $uni_plane, ": --- */\n";
720 print OUT "};\n\n";
722 print OUT "static sal_Int32 const aImplUnicodeTo", $id, "PageOffsets[] = {\n";
723 for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
725 if (defined($uni_plane_used[$uni_plane]))
727 $uni_pageoffsets_used[$uni_plane] = 0;
728 $uni_data_used_sum[$uni_plane] = 0;
729 $uni_data_space_sum[$uni_plane] = 0;
730 for ($uni_page = 0; $uni_page <= 255; ++$uni_page)
732 $offset = $uni_data_offsets[$uni_plane][$uni_page];
733 if ($offset == -1)
735 print OUT " -1, /* plane ",
736 $uni_plane,
737 ", page ",
738 $uni_page,
739 " */\n";
741 else
743 print OUT " ",
744 $offset,
745 ", /* plane ",
746 $uni_plane,
747 ", page ",
748 $uni_page,
749 "; ",
750 printStats($uni_data_used[$uni_plane][$uni_page],
751 $uni_data_space[$uni_plane][$uni_page]),
752 " */\n";
753 $uni_pageoffsets_used[$uni_plane] += 4;
754 $uni_data_used_sum[$uni_plane]
755 += $uni_data_used[$uni_plane][$uni_page];
756 $uni_data_space_sum[$uni_plane]
757 += $uni_data_space[$uni_plane][$uni_page];
761 else
763 print OUT " /* plane ", $uni_plane, ": --- */\n";
766 print OUT "};\n\n";
768 print OUT "static sal_Int32 const aImplUnicodeTo",
769 $id,
770 "PlaneOffsets[] = {\n";
771 $uni_page_offset = 0;
772 $uni_planeoffsets_used = 0;
773 $uni_pageoffsets_used_sum = 0;
774 $uni_pageoffsets_space_sum = 0;
775 $uni_data_used_sum2 = 0;
776 $uni_data_space_sum2 = 0;
777 for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
779 if (defined ($uni_plane_used[$uni_plane]))
781 print OUT " ",
782 $uni_page_offset++,
783 " * 256, /* plane ",
784 $uni_plane,
785 "; ",
786 printStats($uni_pageoffsets_used[$uni_plane], 256 * 4),
787 ", ",
788 printStats($uni_data_used_sum[$uni_plane],
789 $uni_data_space_sum[$uni_plane]),
790 " */\n";
791 $uni_planeoffsets_used += 4;
792 $uni_pageoffsets_used_sum += $uni_pageoffsets_used[$uni_plane];
793 $uni_pageoffsets_space_sum += 256 * 4;
794 $uni_data_used_sum2 += $uni_data_used_sum[$uni_plane];
795 $uni_data_space_sum2 += $uni_data_space_sum[$uni_plane];
797 else
799 print OUT " -1, /* plane ", $uni_plane, " */\n";
802 print OUT " /* ",
803 printStats($uni_planeoffsets_used, 17 * 4),
804 ", ",
805 printStats($uni_pageoffsets_used_sum, $uni_pageoffsets_space_sum),
806 ", ",
807 printStats($uni_data_used_sum2, $uni_data_space_sum2),
808 " */\n};\n";
810 close OUT;