3 # Copyright (c) 2003-2004, Artem B. Bityuckiy, SoftMine Corporation.
5 # Redistribution and use in source and binary forms, with or without
6 # modification, are permitted provided that the following conditions
8 # 1. Redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer.
10 # 2. Redistributions in binary form must reproduce the above copyright
11 # notice, this list of conditions and the following disclaimer in the
12 # documentation and/or other materials provided with the distribution.
14 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 # ##############################################################################
34 # FUNCTION PROTOTYPES AND GLOBAL DATA DECLARATION SECTION
36 # ##############################################################################
39 # SUPPLEMENTARY FUNCTIONS FORWARD DECLARATIONS
42 sub Generate8bitToUCS
();
44 sub Generate16bitSize
($);
45 sub Output8bitToUCS
(;$);
46 sub Output8bitFromUCS
(;$);
50 # VARIABLES RELATING TO COMMAND-LINE OPTIONS
51 my $Verbose; # Be verbose if true
52 my $Source; # Output C source code instead of binary .cct file if true
53 my $Plane; # Use this plane if defined
54 my $InFile; # Use this file for input
55 my $OutFile; # Use this file for output
56 my $CCSName; # Use this CCS name
57 my $NoSpeed; # Don't generate speed-optimized tables (binary files only)
58 my $NoSize; # Don't generate size-optimized tables (binary files only)
59 my $NoBE; # Don't generate big-endian tables (binary files only)
60 my $NoLE; # Don't generate big-endian tables (binary files only)
61 my $NoTo; # Don't generate "to_ucs" table (binary files only)
62 my $NoFrom; # Don't generate "from_ucs" table (binary files only)
63 my $CCSCol; # CCS column number in source file
64 my $UCSCol; # UCS column number in source file
67 # DATA STRUCTURES WITH "TO_UCS" AND "FROM_UCS" SPEED/SIZE -OPTIMIZED TABLES
68 my (@FromSpeedTbl, @ToSpeedTbl, @FromSizeTbl, @ToSizeTbl);
69 # "TO_UCS" AND "FROM_UCS" SPEED/SIZE -OPTIMIZED TABLES SIZE IN BYTES
70 my ($FromSpeedBytes, $ToSpeedBytes, $FromSizeBytes, $ToSizeBytes) =
73 my (%CCSUCS, %UCSCCS); # CCS->UCS and UCS->CCS mappings
74 my $Bits = 8; # Table bits (8 or 16);
76 # SPECIAL MARKER CODES
77 my $InvCode = 0xFFFF; # FFFF indicates 18 bit invalid codes
78 my $InvBlock = 0xFFFF; # FFFF also mark empty blocks in speed-optimized tables
79 my $LostCode = 0x3F; # ASCII '?' marks codes lost during CCS->UCS mapping
80 # To mark invalid codes in 8bit encodings 0xFF is used CCS's 0xFF mapping is saved
81 # separately. $FFMap variable stores real 0xFF mapping if defined.
82 my $InvCode8bit = 0xFF;
85 # 8 Bit "From UCS" table header size (bytes)
86 my $Hdr8bitFromUCS = 2;
87 # Binary table header size (bytes)
90 # At first all lost CCS codes are marked by $TmpLost to distinguish between
91 # code which is equivalent to $LostCode and lost codes. This is done in order to
92 # output $MacroLostCode instead of $LostCode in source file.
93 my $TmpLost = 0x1FFFF;
95 # VARIABLES RELATING TO C SOURCE CODE
96 my $MacroInvCode = 'INVALC';
97 my $MacroInvBlock = 'INVBLK';
98 my $MacroLostCode = 'LOST_C';
99 my $MacroCCSName = 'ICONV_CCS_%s';
100 my $GuardSize = 'defined (TABLE_USE_SIZE_OPTIMIZATION)';
101 my $GuardToUCS = "ICONV_TO_UCS_CCS_%s";
102 my $GuardFromUCS = "ICONV_FROM_UCS_CCS_%s";
103 my $MacroSpeedTable = 'TABLE_SPEED_OPTIMIZED';
104 my $MacroSizeTable = 'TABLE_SIZE_OPTIMIZED';
105 my $Macro8bitTable = 'TABLE_8BIT';
106 my $Macro16bitTable = 'TABLE_16BIT';
107 my $MacroVer1Table = 'TABLE_VERSION_1';
108 my $TypeBICCS = 'iconv_ccs_t';
109 my $VarToUCSSize = "to_ucs_size_%s";
110 my $VarToUCSSpeed = "to_ucs_speed_%s";
111 my $VarFromUCSSize = "from_ucs_size_%s";
112 my $VarFromUCSSpeed = "from_ucs_speed_%s";
113 my $VarBICCS = "_iconv_ccs_%s";
114 # Text block that visually separates tables.
115 my $Separator = '=' x
70;
117 # ##############################################################################
121 # ##############################################################################
124 # Parse command-line options, check them and set correspondent global variables
127 # Initialize global variables tat depend on CCS name.
128 $_ = sprintf $_, $CCSName foreach +($VarToUCSSpeed,
134 $_ = sprintf $_, "\U$CCSName" foreach +($GuardToUCS,
138 # Open input and output files
139 Err
"Can't open \"$InFile\" file for reading: $!.\n", 1
140 unless open(INFILE
, '<', $InFile);
141 Err
"Can't open \"$OutFile\" file for writing: $!.\n", 1
142 unless open(OUTFILE
, '>', $OutFile);
144 # ==============================================================================
145 # EXTRACT CODES MAP FROM INPUT FILE
146 # ==============================================================================
148 for (my $ln = 1; my $l = <INFILE
>; $ln += 1)
150 # Skip comment and empty lines, remove ending CR symbol
151 next if $l =~ /^#.*$/ or $l =~ /^\s*$/;
152 $l =~ s/^(.*)\n$/$1/, $l =~ s/^(.*)\r$/$1/;
154 # Remove comment and extra spaces
155 $l =~ s/(.*)\s*#.*/$1/;
157 $l =~ s/(.*)\s*$/$1/;
159 # Split line into individual codes
160 my @codes = split / /, $l;
162 # Skip line if there is no needed columns
163 unless (defined $codes[$CCSCol])
165 print("Warning (line $ln): no CCS column, skip.\n") if $Verbose;
168 unless (defined $codes[$UCSCol])
170 print("Warning (line $ln): no UCS column, skip.\n") if $Verbose;
174 # Extract codes strings from needed columns
175 my ($ccs, $ucs) = ($codes[$CCSCol], $codes[$UCSCol]);
176 my $patt = qr/(0[xX])?[0-9a-fA-F]{1,8}/; # HEX digit regexp pattern.
178 # Check that CCS and UCS code strings has right format.
179 unless ($ccs =~ m/^$patt$/)
181 print("Warning (line $ln): $ccs CCS code isn't recognized, skip.\n")
185 unless ($ucs =~ m/^($patt(,|\+))*$patt$/)
187 print("Warning (line $ln): $ucs UCS code isn't recognized, skip.\n")
192 # Convert code to numeric format (assume hex).
195 if ($ucs =~ m/,/ or $ucs =~ m/\+/)
197 # Mark CCS codes with "one to many" mappings as lost
198 printf "Warning (line $ln): only one to one mapping is supported, "
199 . "mark 0x%.4X CCS code as lost.\n", hex $ccs if $Verbose;
204 # Convert code to numeric format
207 # Check that UCS code isn't longer than 16 bits.
210 printf("Warning (line $ln): UCS code should fit 16 bits, "
211 . "mark 0x%.4X CCS code as lost.\n", hex $ccs) if $Verbose;
216 # If CCS value > 0xFFFF user should specify plane number.
217 if ($ccs > 0xFFFF && !defined $Plane)
219 print("Warning (line $ln): $ccs is > 16 bit, plane number should be specified,"
220 . " skip this mapping.\n") if $Verbose;
226 next if (($ccs & 0xFFFF0000) >> 16) != hex $Plane; # Skip alien plane.
230 # Check that reserved codes aren't used.
231 if ($ccs == $InvCode or $ucs == $InvCode)
233 print("Warning (line $ln): $InvCode is reserved to mark invalid codes and "
234 . "shouldn't be used in mappings, skip.\n") if $Verbose;
238 # Save mapping in UCSCCS and CCSUCS hash arrays.
239 $UCSCCS{$ucs} = $ccs if $ucs != $TmpLost && !defined $UCSCCS{$ucs};
240 $CCSUCS{$ccs} = $ucs if !defined $CCSUCS{$ccs};
242 $Bits = 16 if $ccs > 0xFF;
247 Err
"Error: there is no plane $Plane in \"$0\".\n" if defined $Plane;
248 Err
"Error: mapping wasn't found.\n";
252 # ==============================================================================
253 # GENERATE TABLE DATA
254 # ==============================================================================
258 $FFMap = $CCSUCS{0xFF};
259 $FFMap = $InvCode if !defined $FFMap;
264 Generate8bitToUCS
() unless $NoTo;
268 GenerateSpeed
("to_ucs") unless $NoTo || $NoSpeed;
269 Generate16bitSize
("to_ucs") unless $NoTo || $NoSize;
272 GenerateSpeed
("from_ucs") unless $NoFrom || $NoSpeed;
273 Generate16bitSize
("from_ucs") unless $NoFrom || $NoSize;
275 # ==============================================================================
277 # ==============================================================================
284 * This file was generated automatically - don't edit it.
285 * File contains iconv CCS tables for $CCSName encoding.
290 #if defined ($GuardToUCS) \\
291 || defined ($GuardFromUCS)
294 #include <sys/types.h>
295 #include <sys/param.h>
297 #include \"ccsnames.h\"
304 "#if (BYTE_ORDER == LITTLE_ENDIAN)
305 # define W(word) (word) & 0xFF, (word) >> 8
306 #elif (BYTE_ORDER == BIG_ENDIAN)
307 # define W(word) (word) >> 8, (word) & 0xFF
309 # error \"Unknown byte order.\"
323 OutputSpeed
("to_ucs") unless $NoSpeed;
324 OutputSize
("to_ucs") unless $NoSize;
335 OutputSpeed
("from_ucs") unless $NoSpeed;
336 OutputSize
("from_ucs") unless $NoSize;
340 # OUTPUT TABLE DESCRIPTION STRUCTURE
343 * $CCSName CCS description table.
349 \t$MacroVer1Table, /* Table version */
350 \t$MacroCCSName, /* CCS name */
355 "\t$Macro8bitTable, /* Table bits */
357 #if defined ($GuardFromUCS)
358 \t(__uint16_t *)&$VarFromUCSSpeed, /* UCS -> $CCSName table */
360 \t(__uint16_t *)NULL,
363 #if defined ($GuardToUCS)
364 \t(__uint16_t *)&$VarToUCSSpeed /* $CCSName -> UCS table */
366 \t(__uint16_t *)NULL,
373 "\t$Macro16bitTable, /* Table bits */
374 #if defined ($GuardFromUCS) \\
377 \t(__uint16_t *)&$VarFromUCSSize, /* UCS -> $CCSName table size-optimized table */
378 #elif defined ($GuardFromUCS) \\
381 \t(__uint16_t *)&$VarFromUCSSpeed, /* UCS -> $CCSName table speed-optimized table */
384 \t(__uint16_t *)NULL,
386 #if defined ($GuardToUCS) \\
389 \t(__uint16_t *)&$VarToUCSSize /* $CCSName -> UCS table speed-optimized table */
390 #elif defined ($GuardToUCS) \\
393 \t(__uint16_t *)&$VarToUCSSpeed /* $CCSName -> UCS table speed-optimized table */
396 \t(__uint16_t *)NULL,
400 print OUTFILE
"\n#endif /* $GuardToUCS) || ... */\n\n";
404 # OUTPUT BINARY TABLES DESCRIPTION STRUCTURE (ALWAYS BIG ENDIAN)
405 print OUTFILE
pack "n", 1;
406 print OUTFILE
pack "n", $Bits;
407 my $len = length $CCSName;
408 print OUTFILE
pack "N", $len;
409 print OUTFILE
pack "a$len", $CCSName;
411 my $pos = $HdrBinary + $len;
414 my $l = 4 - ($pos & 3);
415 print OUTFILE
pack "a$l", 'XXX';
422 for (my $i = 0; $i < 16; $i++)
427 $tables[0] = $pos, $tables[1] = $FromSpeedBytes, $pos += $FromSpeedBytes
428 unless $NoFrom || $NoSpeed || $NoBE;
429 $tables[2] = $pos, $tables[3] = $FromSpeedBytes, $pos += $FromSpeedBytes
430 unless $NoFrom || $NoSpeed || $NoLE;
433 $tables[4] = $pos, $tables[5] = $FromSizeBytes, $pos += $FromSizeBytes
434 unless $NoFrom || $NoSize || $NoBE;
435 $tables[6] = $pos, $tables[7] = $FromSizeBytes, $pos += $FromSizeBytes
436 unless $NoFrom || $NoSize || $NoLE;
438 $tables[8] = $pos, $tables[9] = $ToSpeedBytes, $pos += $ToSpeedBytes
439 unless $NoTo || $NoSpeed || $NoBE;
440 $tables[10] = $pos, $tables[11] = $ToSpeedBytes, $pos += $ToSpeedBytes
441 unless $NoTo || $NoSpeed || $NoLE;
444 $tables[12] = $pos, $tables[13] = $ToSizeBytes, $pos += $ToSizeBytes
445 unless $NoTo || $NoSize || $NoBE;
446 $tables[14] = $pos, $tables[15] = $ToSizeBytes, $pos += $ToSizeBytes
447 unless $NoTo || $NoSize || $NoLE;
450 print OUTFILE
pack("N", $_) foreach @tables;
452 print "Total bytes for output: $pos.\n" if $Verbose;
454 # OUTPUT BINARY TABLES
459 Output8bitFromUCS
("n") unless $NoBE;
460 Output8bitFromUCS
("v") unless $NoLE;
466 OutputSpeed
("from_ucs", "n") unless $NoBE;
467 OutputSpeed
("from_ucs", "v") unless $NoLE;
471 OutputSize
("from_ucs", "n") unless $NoBE;
472 OutputSize
("from_ucs", "v") unless $NoLE;
480 Output8bitToUCS
("n") unless $NoBE;
481 Output8bitToUCS
("v") unless $NoLE;
487 OutputSpeed
("to_ucs", "n") unless $NoBE;
488 OutputSpeed
("to_ucs", "v") unless $NoLE;
492 OutputSize
("to_ucs", "n") unless $NoBE;
493 OutputSize
("to_ucs", "v") unless $NoLE;
504 # ##############################################################################
506 # SUPPLEMENTARY FUNCTIONS
508 # ##############################################################################
511 # =============================================================================
513 # Generate 8bit "to_ucs" table. Store table data in %ToSpeedTbl hash.
514 # Store table size in $ToSpeedBytes scalar.
516 # =============================================================================
517 sub Generate8bitToUCS
()
519 for (my $i = 0; $i <= 255; $i++)
521 $ToSpeedTbl[$i] = defined $CCSUCS{$i} ?
$CCSUCS{$i} : $InvCode;
523 $ToSpeedBytes = 256*2;
527 # =============================================================================
529 # Generate speed-optimized table.
532 # "to_ucs" - generate "to_ucs" table, store table data in @ToSpeedTbl
533 # array, store table size in $ToSpeedBytes scalar.
534 # "from_ucs" - generate "from_ucs" table, store table data in @FromSpeedTbl
535 # array, store table size in $FromSpeedBytes scalar.
537 # Data is written to @ToSpeedTbl or @FromSpeedTbl (@map) table and has the
539 # $table[0] - 256-element array (control block);
540 # $table[1 .. $#table] - 256-element arrays (data blocks).
542 # =============================================================================
549 if ($_[0] eq "to_ucs")
553 $bytes = \
$ToSpeedBytes;
555 elsif ($_[0] eq "from_ucs")
558 $tbl = \
@FromSpeedTbl;
559 $bytes = \
$FromSpeedBytes;
563 Err
"Internal script error in GenerateSpeed()\n";
566 # Identify unused blocks
568 $busy_blocks[$_ >> 8] = 1 foreach (keys %$map);
570 # GENERATE FIRST 256-ELEMENT CONTROL BLOCK
572 my $idx = $Bits == 16 ?
0 : 256 + $Hdr8bitFromUCS;
575 $tbl->[0]->[$i] = $busy_blocks[$i] ?
$idx += 256 : undef;
578 # GENERATE DATA BLOCKS
580 for (my $i = 0; $i <= 0xFF; $i++)
582 next unless $busy_blocks[$i];
584 for (my $j = 0; $j <= 0xFF; $j++)
586 $tbl->[$i+1]->[$j] = $map->{($i << 8) | $j};
589 $$bytes *= 2 if $Bits == 16;
590 $$bytes += $Hdr8bitFromUCS if $Bits == 8;
595 # =============================================================================
597 # Generate 16bit size-optimized table.
600 # "to_ucs" - generate "to_ucs" table, store table data in @ToSizeTbl
601 # array, store table size in $ToSizeBytes scalar.
602 # "from_ucs" - generate "from_ucs" table, store table data in @FromSizeTbl
603 # array, store table size in $FromSizeBytes scalar.
605 # Data is written to @ToSizeTbl or @FromSizeTbl (@map) table and has the
607 # $table[0] - number of ranges;
608 # $table[1] - number of unranged codes;
609 # $table[2] - unranged codes index in resulting array;
610 # $table[3]->[0 .. $table[0]] - array of arrays of ranges:
611 # $table[3]->[x]->[0] - first code;
612 # $table[3]->[x]->[1] - last code;
613 # $table[3]->[x]->[2] - range index in resulting array;
614 # $table[4]->[0 .. $table[0]] - array of arrays of ranges content;
615 # $table[5]->[0 .. $table[1]] - array of arrays of unranged codes;
616 # $table[5]->[x]->[0] - CCS code;
617 # $table[5]->[x]->[0] - UCS code;
619 # =============================================================================
620 sub Generate16bitSize
($)
626 if ($_[0] eq "to_ucs")
630 $bytes = \
$ToSizeBytes;
632 elsif ($_[0] eq "from_ucs")
635 $tbl = \
@FromSizeTbl;
636 $bytes = \
$FromSizeBytes;
640 Err
"Internal script error Generate16bitSize()\n";
643 # CREATE LIST OF RANGES.
644 my @codes = sort {$a <=> $b} keys %$map;
645 my @ranges; # Code ranges
646 my @range; # Current working range
649 if (not @range or $_ - 1 == $range[$#range])
668 # OPTIMIZE LIST OF RANGES.
669 my $r = 0; # Working range number
672 last if ($r == $#ranges);
674 my @r1 = @
{$ranges[$r]};
675 my @r2 = @
{$ranges[$r + 1]};
677 # Calculate how many array entries two ranges need
685 { $s1 = $#r1 + 1 + 3; }
692 { $s2 = $#r2 + 1 + 3; }
696 # Calculate how many array entries will be needed if we join them
697 my $one = $r2[$#r2] - $r1[0] + 1 + 3;
699 $r += 1, next if ($one > $two);
703 push @r, $_ foreach (@r1);
704 for (my $i = $r1[$#r1]+1; $i < $r2[0]; $i++)
708 push @r, $_ foreach (@r2);
710 splice @ranges, $r+1, 1;
713 # SEPARATE RANGED AND UNRANGED CODES. SPLIT 2-CODES RANGES ON 2 UNRANGED.
719 push @unranged, $$_[0];
724 push @unranged, $$_[0];
725 push @unranged, $$_[1];
730 # DELETE UNUSED ELEMENTS
731 for (my $i = 0; $i <= $#ranges; $i++)
733 splice @ranges, $i--, 1 unless defined $ranges[$i];
736 # CALCULATE UNRANGED CODES ARRAY INDEX
737 my $idx = 3 + ($#ranges + 1)*3;
738 $idx += $#$_ + 1 foreach @ranges;
741 $tbl->[0] = $#ranges + 1; # Number of ranges
742 $tbl->[1] = $#unranged + 1; # Number of unranged codes
743 $tbl->[2] = $idx; # Array index of unranged codes
745 # Generate ranges list
746 $idx = 3 + ($#ranges + 1)*3; # First range data index
751 $tbl->[3]->[$num]->[0] = $_->[0];
752 $tbl->[3]->[$num]->[1] = $_->[$#$_];
753 $tbl->[3]->[$num]->[2] = $idx;
758 # Generate ranges content
762 for (my $i = 0; $i <= $#$_; $i++)
764 $tbl->[4]->[$num]->[$i] = defined $_->[$i] ?
$map->{$_->[$i]} : undef;
767 $$bytes += ($#$_ + 1)*2;
770 # Generate unranged codes list
774 $tbl->[5]->[$num]->[0] = $_;
775 $tbl->[5]->[$num]->[1] = $map->{$_};
779 $$bytes += ($#unranged + 1)*4;
783 # =============================================================================
785 # Output 8bit "to UCS" table. Output table's source code if $Source
786 # and table's binary data if !$Source.
788 # Parameter 1: Not used when sources are output. Output BE binary if 'n' and
791 # =============================================================================
792 sub Output8bitToUCS
(;$)
797 printf "Output%s 8-bit UCS -> $CCSName table ($ToSpeedBytes bytes).\n",
798 defined $endian ?
($endian eq 'n' ?
799 " Big Endian" : " Little Endian") : "" if $Verbose;
802 # Output heading information
805 * 8-bit $CCSName -> UCS table ($ToSpeedBytes bytes).
808 #if defined ($GuardToUCS)
810 static const __uint16_t
811 ${VarToUCSSpeed}\[] =
817 foreach (@ToSpeedTbl)
824 printf OUTFILE
"0x%.4X,", $_;
828 print OUTFILE
"$MacroLostCode,";
833 print OUTFILE
"$MacroInvCode,";
835 print(OUTFILE
"\n\t"), $br = 0 unless $br % 8;
837 print OUTFILE
"\n};\n\n#endif /* $GuardToUCS */\n\n";
841 foreach (@ToSpeedTbl)
843 print OUTFILE
pack($endian, $_ == $TmpLost ?
$LostCode : $_);
849 # =============================================================================
851 # Output 8bit "from UCS" table. Output table's source code if $Source
852 # and table's binary data if !$Source.
854 # Parameter 1: Not used when sources are output. Output BE binary if 'n' and
857 # =============================================================================
858 sub Output8bitFromUCS
(;$)
862 printf "Output%s 8-bit $CCSName -> UCS table ($FromSpeedBytes bytes).\n",
863 defined $endian ?
($endian eq 'n' ?
864 " Big Endian" : " Little Endian") : "" if $Verbose;
869 * 8-bit UCS -> $CCSName speed-optimized table ($FromSpeedBytes bytes).
873 #if defined ($GuardFromUCS)
875 static const unsigned char
876 ${VarFromUCSSpeed}\[] =
884 printf OUTFILE
"\tW(0x%.4X), /* Real 0xFF mapping. 0xFF is used "
885 . "to mark invalid codes */\n", $FFMap;
889 print OUTFILE
pack($endian, $FFMap);
892 # OUTPUT HEADING BLOCK (ALWAYS 16 BIT)
896 print OUTFILE
"\t/* Heading Block */";
897 for (my $i = 0, my $br = 0; $i < 256; $br = ++$i % 4)
899 print OUTFILE
"\n\t" unless $br;
900 if (defined $FromSpeedTbl[0]->[$i])
902 printf OUTFILE
"W(0x%.4X),", $FromSpeedTbl[0]->[$i];
906 print OUTFILE
"W($MacroInvBlock),";
912 print OUTFILE
pack($endian, defined $_ ?
$_ : $InvBlock)
913 foreach @
{$FromSpeedTbl[0]};
918 my $index = 512 + $Hdr8bitFromUCS;
919 for (my $blk = 1; $blk <= $#FromSpeedTbl; $blk++)
921 next unless defined $FromSpeedTbl[$blk];
922 printf OUTFILE
"\n\t/* Block $blk, Array index 0x%.4X */", $index;
924 for (my $i = 0, my $br = 0; $i < 256; $i++, $br = $i % 8)
926 print OUTFILE
"\n\t" unless $br;
927 my $code = $FromSpeedTbl[$blk]->[$i];
930 printf OUTFILE
"0x%.2X,", $InvCode8bit;
934 printf OUTFILE
"0x%.2X,", $code == $TmpLost ?
$LostCode : $code;
938 print OUTFILE
"\n};\n\n#endif /* $GuardFromUCS */\n\n";
942 for (my $blk = 1; $blk <= $#FromSpeedTbl; $blk++)
944 next unless defined $FromSpeedTbl[$blk];
945 for (my $i = 0, my $br = 0; $i < 256; $br = ++$i % 8)
947 my $code = $FromSpeedTbl[$blk]->[$i];
950 printf OUTFILE
pack 'C', $InvCode8bit;
954 print OUTFILE
$code == $TmpLost ?
pack('C', $LostCode)
963 # =============================================================================
965 # Output 16bit Speed-optimized table. Output table's source code if $Source
966 # and table's binary data if !$Source.
969 # "to_ucs" - Output "to_ucs" table.
970 # "from_ucs" - Output "from_ucs" table.
971 # Parameter 2: Not used when sources are output. Output BE binary if 'n' and
974 # =============================================================================
979 my ($direction, $optimiz, $e, $bytes);
980 $optimiz = $Bits == 16 ?
" speed-optimized" : "";
981 $e = $endian ?
($endian eq 'n' ?
" Big Endian" : " Little Endian") : "";
982 if ($_[0] eq "to_ucs")
985 $direction = " $CCSName -> UCS";
986 $bytes = $ToSpeedBytes;
992 * 16-bit $CCSName -> UCS speed-optimized table ($ToSpeedBytes bytes).
995 #if defined ($GuardToUCS) \\
998 static const __uint16_t
999 ${VarToUCSSpeed}\[] =
1004 elsif ($_[0] eq "from_ucs")
1006 $tbl = \
@FromSpeedTbl;
1007 $direction = " UCS -> $CCSName";
1008 $bytes = $FromSpeedBytes;
1014 * 16-bit UCS -> $CCSName speed-optimized table ($FromSpeedBytes bytes).
1018 #if defined ($GuardFromUCS) \\
1021 static const __uint16_t
1022 ${VarFromUCSSpeed}\[] =
1029 Err
"Internal script error Output16bitSpeed()\n";
1032 printf "Output%s 16-bit%s%s table (%d bytes).\n",
1033 $e, $direction, $optimiz, $bytes if $Verbose;
1035 # OUTPUT HEADING BLOCK (ALWAYS 16 BIT)
1039 print OUTFILE
"\t/* Heading Block */";
1040 for (my $i = 0, my $br = 0; $i < 256; $br = ++$i % 8)
1042 print OUTFILE
"\n\t" unless $br;
1043 if (defined $tbl->[0]->[$i])
1045 printf OUTFILE
"0x%.4X,", $tbl->[0]->[$i];
1049 print OUTFILE
"$MacroInvBlock,";
1055 print OUTFILE
pack($endian, defined $_ ?
$_ : $InvBlock)
1056 foreach @
{$tbl->[0]};
1059 # OUTPUT OTHER BLOCKS
1063 for (my $blk = 1; $blk <= $#$tbl; $blk++)
1065 next unless defined $tbl->[$blk];
1066 printf OUTFILE
"\n\t/* Block $blk, Array index 0x%.4X */", $index;
1068 for (my $i = 0, my $br = 0; $i < 256; $br = ++$i % 8)
1070 print OUTFILE
"\n\t" unless $br;
1071 my $code = $tbl->[$blk]->[$i];
1072 print OUTFILE
defined $code ?
1073 ($code == $TmpLost ?
$MacroLostCode : sprintf "0x%.4X", $code)
1074 : $MacroInvCode, ",";
1080 for (my $blk = 1; $blk <= $#$tbl; $blk++)
1082 next unless defined $tbl->[$blk];
1083 for (my $i = 0, my $br = 0; $i < 256; $br = ++$i % 8)
1085 my $code = $tbl->[$blk]->[$i];
1086 print OUTFILE
pack($endian,
1087 defined $code ?
($code == $TmpLost ?
$LostCode : $code) : $InvCode);
1094 if ($_[0] eq "to_ucs")
1100 #endif /* $GuardToUCS && !$GuardSize */
1110 #endif /* $GuardFromUCS && !$GuardSize */
1117 # =============================================================================
1119 # Output 16bit Size-optimized table. Output table's source code if $Source
1120 # and table's binary data if !$Source.
1123 # "to_ucs" - Output "to_ucs" table.
1124 # "from_ucs" - Output "from_ucs" table.
1125 # Parameter 2: Not used when sources are output. Output BE binary if 'n' and
1128 # =============================================================================
1133 my ($direction, $optimiz, $e, $bytes);
1134 $optimiz = $Bits == 16 ?
" size-optimized" : "";
1135 $e = $endian ?
($endian eq 'n' ?
" Big Endian" : " Little Endian") : "";
1136 if ($_[0] eq "to_ucs")
1139 $direction = " $CCSName -> UCS";
1140 $bytes = $ToSizeBytes;
1146 * 16-bit $CCSName -> UCS size-optimized table ($ToSizeBytes bytes).
1149 #if defined ($GuardToUCS) \\
1152 static const __uint16_t
1153 ${VarToUCSSize}\[] =
1158 elsif ($_[0] eq "from_ucs")
1160 $tbl = \
@FromSizeTbl;
1161 $direction = " UCS -> $CCSName";
1162 $bytes = $FromSizeBytes;
1167 * 16-bit UCS -> $CCSName size-optimized table ($FromSizeBytes bytes).
1171 #if defined ($GuardFromUCS) \\
1174 static const __uint16_t
1175 ${VarFromUCSSize}\[] =
1182 Err
"Internal script error Output16bitSize()\n";
1185 printf "Output%s 16-bit%s%s table (%d bytes).\n",
1186 $e, $direction, $optimiz, $bytes if $Verbose;
1188 # OUTPUT FIRST 3 ELEMENTS
1191 printf OUTFILE
"\t0x%.4X, /* Ranges number */\n", $tbl->[0];
1192 printf OUTFILE
"\t0x%.4X, /* Unranged codes number */\n", $tbl->[1];
1193 printf OUTFILE
"\t0x%.4X, /* First unranged code index */\n", $tbl->[2];
1197 printf OUTFILE
pack $endian, $tbl->[0];
1198 printf OUTFILE
pack $endian, $tbl->[1];
1199 printf OUTFILE
pack $endian, $tbl->[2];
1206 print OUTFILE
"\t/* Ranges list: first code, last Code, array index. */\n";
1207 for (my $range = 0; $range <= $#{$tbl->[3]}; $range++)
1209 printf OUTFILE
"\t/* Array index: 0x%.4X */ 0x%.4X, 0x%.4X, 0x%.4X,\n",
1211 $tbl->[3]->[$range]->[0],
1212 $tbl->[3]->[$range]->[1],
1213 $tbl->[3]->[$range]->[2];
1218 for (my $range = 0; $range <= $#{$tbl->[3]}; $range++)
1220 print OUTFILE
pack($endian, $tbl->[3]->[$range]->[0]),
1221 pack($endian, $tbl->[3]->[$range]->[1]),
1222 pack($endian, $tbl->[3]->[$range]->[2]);
1227 # OUTPUT RANGES CONTENT
1230 print OUTFILE
"\t/* Ranges content */";
1231 for (my $range = 0; $range <= $#{$tbl->[3]}; $range++)
1233 printf OUTFILE
"\n\t/* Range 0x%.4X - 0x%.4X, array index: 0x%.4X */",
1234 $tbl->[3]->[$range]->[0], $tbl->[3]->[$range]->[1], $idx;
1235 $idx += $tbl->[3]->[$range]->[1] - $tbl->[3]->[$range]->[0] + 1;
1236 for (my $elt = 0, my $br = 0;
1237 $elt <= $#{$tbl->[4]->[$range]};
1240 print OUTFILE
"\n\t" unless $br;
1241 if (defined $tbl->[4]->[$range]->[$elt])
1243 if ($tbl->[4]->[$range]->[$elt] != $TmpLost)
1245 printf OUTFILE
"0x%.4X,", $tbl->[4]->[$range]->[$elt];
1249 print OUTFILE
"$MacroLostCode,";
1254 print OUTFILE
"$MacroInvCode,";
1261 for (my $range = 0; $range <= $#{$tbl->[3]}; $range++)
1263 for (my $elt = 0; $elt <= $#{$tbl->[4]->[$range]}; $elt++)
1265 if (defined $tbl->[4]->[$range]->[$elt])
1267 if ($tbl->[4]->[$range]->[$elt] != $TmpLost)
1269 print OUTFILE
pack $endian, $tbl->[4]->[$range]->[$elt];
1273 print OUTFILE
pack $endian, $LostCode;
1278 print OUTFILE
pack $endian, $InvCode;
1284 # OUTPUT UNRANGED CODES
1287 printf OUTFILE
"\n\t/* Unranged codes (%d codes) */", $#{$tbl->[4]} + 1;
1288 for (my $i = 0; $i <= $#{$tbl->[5]}; $i++)
1290 printf OUTFILE
"\n\t/* Array index: 0x%.4X */ 0x%.4X,0x%.4X,",
1291 $idx, $tbl->[5]->[$i]->[0], $tbl->[5]->[$i]->[1];
1296 for (my $i = 0; $i <= $#{$tbl->[5]}; $i++)
1298 print OUTFILE
pack($endian, $tbl->[5]->[$i]->[0]),
1299 pack($endian, $tbl->[5]->[$i]->[1]);
1305 if ($_[0] eq "to_ucs")
1311 #endif /* $GuardToUCS && $GuardSize */
1321 #endif /* $GuardFromUCS && $GuardSize */
1329 # =============================================================================
1331 # Parse command line options
1333 # =============================================================================
1334 sub ProcessOptions
()
1336 my $help_opt = 'h'; # Print help option
1337 my $input_opt = 'i'; # Input file name option
1338 my $output_opt = 'o'; # Output file name option
1339 my $source_opt = 'S'; # Generate C source file option
1340 my $enc_opt = 'N'; # Encoding name
1341 my $plane_opt = 'p'; # Plane number
1342 my $verbose_opt = 'v'; # Verbose output
1343 my $ccscol_opt = 'x'; # Encoding's column number
1344 my $ucscol_opt = 'y'; # UCS column number
1345 my $nosize_opt = 'l'; # Don't generate size-optimized tables
1346 my $nospeed_opt = 'b'; # Don't generate speed-optimized tables
1347 my $nobe_opt = 'B'; # Don't generate big-endian tables
1348 my $nole_opt = 'L'; # Don't generate big-endian tables
1349 my $noto_opt = 't'; # Don't generate "to_ucs" table
1350 my $nofrom_opt = 'f'; # Don't generate "from_ucs" table
1352 my %args; # Command line arguments found by getopts()
1354 my $getopts_string =
1355 "$help_opt$source_opt$enc_opt:$verbose_opt$input_opt:$output_opt:$plane_opt:"
1356 . "$nosize_opt$nospeed_opt$nobe_opt$nole_opt$noto_opt$nofrom_opt$ccscol_opt:"
1359 getopts
($getopts_string, \
%args) || Err
"getopts() failed: $!.\n", 1;
1361 # Print usage rules and exit.
1362 if ($args{$help_opt})
1366 -$help_opt - this help message;
1367 -$input_opt - input file name (required);
1368 -$output_opt - output file name;
1369 -$enc_opt - CCS or encoding name;
1370 -$plane_opt - plane number (high 16 bits) to use (in hex);
1371 -$source_opt - generate C source file;
1372 -$nospeed_opt - don't generate speed-optimized tables (binary files only);
1373 -$nosize_opt - don't generate size-optimized tables (binary files only);
1374 -$nobe_opt - don't generate Big Endian tables (binary files only);
1375 -$nole_opt - don't generate Little Endian tables (binary files only);
1376 -$noto_opt - don't generate "to_ucs" table;
1377 -$nofrom_opt - don't generate "from_ucs" table;
1378 -$ccscol_opt - encoding's column number;
1379 -$ucscol_opt - UCS column number;
1380 -$verbose_opt - verbose output.
1382 If output file name isn't specified, <infile>.c (for sources) or
1383 <infile>.cct (for binaries) is assumed.
1384 If encoding name isn't specified <infile> is assumed.
1385 <infile> is normalized (small letters, "-" are substituted by "_") input file
1386 name base (no extension). For example, for Koi8-r.txt input file, <infile>
1393 $Verbose = $args{$verbose_opt};
1394 $Source = $args{$source_opt};
1395 $NoSpeed = $args{$nospeed_opt};
1396 $NoSize = $args{$nosize_opt};
1397 $NoBE = $args{$nobe_opt};
1398 $NoLE = $args{$nole_opt};
1399 $NoFrom = $args{$nofrom_opt};
1400 $NoTo = $args{$noto_opt};
1401 $CCSCol = $args{$ccscol_opt};
1402 $UCSCol = $args{$ucscol_opt};
1403 $Plane = $args{$plane_opt};
1404 $InFile = $args{$input_opt};
1405 $OutFile = $args{$output_opt};
1406 $CCSName = $args{$enc_opt};
1408 Err
"Error: input file isn't defined. Use -$help_opt for help.\n", 1
1413 # Construct output file name
1415 $OutFile =~ s/(.*\/)*([0-9a-zA-Z-_]*)(\..*)$/\L
$2/;
1416 $OutFile =~ tr/-/_/;
1419 $OutFile = "$OutFile.c";
1423 $OutFile = "$OutFile.cct"
1429 # Construct CCS name
1431 $CCSName =~ s/(.*\/)*([0-9a-zA-Z-_]*)(\..*)$/\L
$2/;
1432 $CCSName =~ tr/-/_/;
1435 Err
"-$nosize_opt option can't be used with -$nospeed_opt option "
1436 . "simultaniously.\n", 1 if $NoSpeed && $NoSize;
1438 Err
"-$nobe_opt option can't be used with -$nole_opt option "
1439 . "simultaniously.\n", 1 if $NoBE && $NoLE;
1441 Err
"-$noto_opt option can't be used with -$nofrom_opt option"
1442 . "simultaniously.\n", 1 if $NoTo && $NoFrom;
1444 Err
"-$nosize_opt, -$nospeed_opt, -$nobe_opt -$nole_opt "
1445 . "-$noto_opt and -$nofrom_opt "
1446 . "options can't be used with -$source_opt option.\n"
1447 . "Source code always contains both speed- and size-optimized "
1448 . "tables in System Endian. Use -$help_opt for help.\n", 1
1449 if $Source and $NoSpeed || $NoSize || $NoBE || $NoLE || $NoTo || $NoFrom;
1451 if (!$CCSCol && !$UCSCol)
1456 elsif ($CCSCol && $UCSCol)
1458 Err
"Column number should be >= 0\n", 1 if ($CCSCol <= 0 or $UCSCol <= 0);
1464 Err
"Please, define both CCS and UCS column numbers\n", 1;
1469 print "Use $InFile file for input.\n",
1470 "Use $OutFile file for output.\n",
1471 "Use $CCSName as CCS name.\n";
1472 print "Generate C source file.\n" if $Source;
1473 print "Generate binary file.\n" if !$Source;
1474 printf "Use plane N 0x%.4X.\n", hex $Plane if defined $Plane;
1475 printf "Use column N $CCSCol for $CCSName.\n";
1476 printf "Use column N $UCSCol for UCS.\n";
1477 print "Don't generate size-optimized tables.\n" if $NoSize;
1478 print "Don't generate speed-optimized tables.\n" if $NoSpeed;
1479 print "Don't generate big-endian tables.\n" if $NoBE;
1480 print "Don't generate little-endian tables.\n" if $NoLE;
1481 print "Don't generate \"to_ucs\" table.\n" if $NoTo;
1482 print "Don't generate \"from_ucs\" table.\n" if $NoFrom;
1489 # =============================================================================
1491 # Print error message, close all and exit
1493 # Parameter 1: error message
1494 # Parameter 2: don't delete output file if > 1
1496 # =============================================================================
1499 print STDERR
"$_[0]";
1502 unlink $OutFile unless $_[1];