Cygwin: access: Fix X_OK behaviour for backup operators and admins
[newlib-cygwin.git] / newlib / libc / iconv / ccs / mktbl.pl
blob633511e4d7f28d588897d2467f6c6f20236d9ec7
1 #!/usr/bin/perl -w
3 # Copyright (c) 2003-2004, Artem B. Bityuckiy, SoftMine Corporation.
5 # Redistribution and use in source and binary forms, with or without
6 # modification, are permitted provided that the following conditions
7 # are met:
8 # 1. Redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer.
10 # 2. Redistributions in binary form must reproduce the above copyright
11 # notice, this list of conditions and the following disclaimer in the
12 # documentation and/or other materials provided with the distribution.
14 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 # SUCH DAMAGE.
26 use integer;
27 use Getopt::Std;
28 use IO::Seekable;
29 use strict;
32 # ##############################################################################
34 # FUNCTION PROTOTYPES AND GLOBAL DATA DECLARATION SECTION
36 # ##############################################################################
39 # SUPPLEMENTARY FUNCTIONS FORWARD DECLARATIONS
40 sub ProcessOptions();
41 sub Err($;$);
42 sub Generate8bitToUCS();
43 sub GenerateSpeed($);
44 sub Generate16bitSize($);
45 sub Output8bitToUCS(;$);
46 sub Output8bitFromUCS(;$);
47 sub OutputSpeed($;$);
48 sub OutputSize($;$);
50 # VARIABLES RELATING TO COMMAND-LINE OPTIONS
51 my $Verbose; # Be verbose if true
52 my $Source; # Output C source code instead of binary .cct file if true
53 my $Plane; # Use this plane if defined
54 my $InFile; # Use this file for input
55 my $OutFile; # Use this file for output
56 my $CCSName; # Use this CCS name
57 my $NoSpeed; # Don't generate speed-optimized tables (binary files only)
58 my $NoSize; # Don't generate size-optimized tables (binary files only)
59 my $NoBE; # Don't generate big-endian tables (binary files only)
60 my $NoLE; # Don't generate big-endian tables (binary files only)
61 my $NoTo; # Don't generate "to_ucs" table (binary files only)
62 my $NoFrom; # Don't generate "from_ucs" table (binary files only)
63 my $CCSCol; # CCS column number in source file
64 my $UCSCol; # UCS column number in source file
67 # DATA STRUCTURES WITH "TO_UCS" AND "FROM_UCS" SPEED/SIZE -OPTIMIZED TABLES
68 my (@FromSpeedTbl, @ToSpeedTbl, @FromSizeTbl, @ToSizeTbl);
69 # "TO_UCS" AND "FROM_UCS" SPEED/SIZE -OPTIMIZED TABLES SIZE IN BYTES
70 my ($FromSpeedBytes, $ToSpeedBytes, $FromSizeBytes, $ToSizeBytes) =
71 (0, 0, 0, 0);
73 my (%CCSUCS, %UCSCCS); # CCS->UCS and UCS->CCS mappings
74 my $Bits = 8; # Table bits (8 or 16);
76 # SPECIAL MARKER CODES
77 my $InvCode = 0xFFFF; # FFFF indicates 18 bit invalid codes
78 my $InvBlock = 0xFFFF; # FFFF also mark empty blocks in speed-optimized tables
79 my $LostCode = 0x3F; # ASCII '?' marks codes lost during CCS->UCS mapping
80 # To mark invalid codes in 8bit encodings 0xFF is used CCS's 0xFF mapping is saved
81 # separately. $FFMap variable stores real 0xFF mapping if defined.
82 my $InvCode8bit = 0xFF;
83 my $FFMap;
85 # 8 Bit "From UCS" table header size (bytes)
86 my $Hdr8bitFromUCS = 2;
87 # Binary table header size (bytes)
88 my $HdrBinary = 8;
90 # At first all lost CCS codes are marked by $TmpLost to distinguish between
91 # code which is equivalent to $LostCode and lost codes. This is done in order to
92 # output $MacroLostCode instead of $LostCode in source file.
93 my $TmpLost = 0x1FFFF;
95 # VARIABLES RELATING TO C SOURCE CODE
96 my $MacroInvCode = 'INVALC';
97 my $MacroInvBlock = 'INVBLK';
98 my $MacroLostCode = 'LOST_C';
99 my $MacroCCSName = 'ICONV_CCS_%s';
100 my $GuardSize = 'defined (TABLE_USE_SIZE_OPTIMIZATION)';
101 my $GuardToUCS = "ICONV_TO_UCS_CCS_%s";
102 my $GuardFromUCS = "ICONV_FROM_UCS_CCS_%s";
103 my $MacroSpeedTable = 'TABLE_SPEED_OPTIMIZED';
104 my $MacroSizeTable = 'TABLE_SIZE_OPTIMIZED';
105 my $Macro8bitTable = 'TABLE_8BIT';
106 my $Macro16bitTable = 'TABLE_16BIT';
107 my $MacroVer1Table = 'TABLE_VERSION_1';
108 my $TypeBICCS = 'iconv_ccs_t';
109 my $VarToUCSSize = "to_ucs_size_%s";
110 my $VarToUCSSpeed = "to_ucs_speed_%s";
111 my $VarFromUCSSize = "from_ucs_size_%s";
112 my $VarFromUCSSpeed = "from_ucs_speed_%s";
113 my $VarBICCS = "_iconv_ccs_%s";
114 # Text block that visually separates tables.
115 my $Separator = '=' x 70;
117 # ##############################################################################
119 # SCRIPT ENTRY POINT
121 # ##############################################################################
124 # Parse command-line options, check them and set correspondent global variables
125 ProcessOptions();
127 # Initialize global variables tat depend on CCS name.
128 $_ = sprintf $_, $CCSName foreach +($VarToUCSSpeed,
129 $VarToUCSSize,
130 $VarToUCSSpeed,
131 $VarFromUCSSpeed,
132 $VarFromUCSSize,
133 $VarBICCS);
134 $_ = sprintf $_, "\U$CCSName" foreach +($GuardToUCS,
135 $GuardFromUCS,
136 $MacroCCSName);
138 # Open input and output files
139 Err "Can't open \"$InFile\" file for reading: $!.\n", 1
140 unless open(INFILE, '<', $InFile);
141 Err "Can't open \"$OutFile\" file for writing: $!.\n", 1
142 unless open(OUTFILE, '>', $OutFile);
144 # ==============================================================================
145 # EXTRACT CODES MAP FROM INPUT FILE
146 # ==============================================================================
148 for (my $ln = 1; my $l = <INFILE>; $ln += 1)
150 # Skip comment and empty lines, remove ending CR symbol
151 next if $l =~ /^#.*$/ or $l =~ /^\s*$/;
152 $l =~ s/^(.*)\n$/$1/, $l =~ s/^(.*)\r$/$1/;
154 # Remove comment and extra spaces
155 $l =~ s/(.*)\s*#.*/$1/;
156 $l =~ s/\s+/ /g;
157 $l =~ s/(.*)\s*$/$1/;
159 # Split line into individual codes
160 my @codes = split / /, $l;
162 # Skip line if there is no needed columns
163 unless (defined $codes[$CCSCol])
165 print("Warning (line $ln): no CCS column, skip.\n") if $Verbose;
166 next;
168 unless (defined $codes[$UCSCol])
170 print("Warning (line $ln): no UCS column, skip.\n") if $Verbose;
171 next;
174 # Extract codes strings from needed columns
175 my ($ccs, $ucs) = ($codes[$CCSCol], $codes[$UCSCol]);
176 my $patt = qr/(0[xX])?[0-9a-fA-F]{1,8}/; # HEX digit regexp pattern.
178 # Check that CCS and UCS code strings has right format.
179 unless ($ccs =~ m/^$patt$/)
181 print("Warning (line $ln): $ccs CCS code isn't recognized, skip.\n")
182 if $Verbose;
183 next;
185 unless ($ucs =~ m/^($patt(,|\+))*$patt$/)
187 print("Warning (line $ln): $ucs UCS code isn't recognized, skip.\n")
188 if $Verbose;
189 next;
192 # Convert code to numeric format (assume hex).
193 $ccs = hex $ccs;
195 if ($ucs =~ m/,/ or $ucs =~ m/\+/)
197 # Mark CCS codes with "one to many" mappings as lost
198 printf "Warning (line $ln): only one to one mapping is supported, "
199 . "mark 0x%.4X CCS code as lost.\n", hex $ccs if $Verbose;
200 $ucs = $TmpLost;
202 else
204 # Convert code to numeric format
205 $ucs = hex $ucs;
207 # Check that UCS code isn't longer than 16 bits.
208 if ($ucs > 0xFFFF)
210 printf("Warning (line $ln): UCS code should fit 16 bits, "
211 . "mark 0x%.4X CCS code as lost.\n", hex $ccs) if $Verbose;
212 $ucs = $TmpLost;
216 # If CCS value > 0xFFFF user should specify plane number.
217 if ($ccs > 0xFFFF && !defined $Plane)
219 print("Warning (line $ln): $ccs is > 16 bit, plane number should be specified,"
220 . " skip this mapping.\n") if $Verbose;
221 next;
224 if (defined $Plane)
226 next if (($ccs & 0xFFFF0000) >> 16) != hex $Plane; # Skip alien plane.
227 $ccs &= 0xFFFF;
230 # Check that reserved codes aren't used.
231 if ($ccs == $InvCode or $ucs == $InvCode)
233 print("Warning (line $ln): $InvCode is reserved to mark invalid codes and "
234 . "shouldn't be used in mappings, skip.\n") if $Verbose;
235 next;
238 # Save mapping in UCSCCS and CCSUCS hash arrays.
239 $UCSCCS{$ucs} = $ccs if $ucs != $TmpLost && !defined $UCSCCS{$ucs};
240 $CCSUCS{$ccs} = $ucs if !defined $CCSUCS{$ccs};
242 $Bits = 16 if $ccs > 0xFF;
245 if (not %CCSUCS)
247 Err "Error: there is no plane $Plane in \"$0\".\n" if defined $Plane;
248 Err "Error: mapping wasn't found.\n";
252 # ==============================================================================
253 # GENERATE TABLE DATA
254 # ==============================================================================
256 if ($Bits == 8)
258 $FFMap = $CCSUCS{0xFF};
259 $FFMap = $InvCode if !defined $FFMap;
262 if ($Bits == 8)
264 Generate8bitToUCS() unless $NoTo;
266 else
268 GenerateSpeed("to_ucs") unless $NoTo || $NoSpeed;
269 Generate16bitSize("to_ucs") unless $NoTo || $NoSize;
272 GenerateSpeed("from_ucs") unless $NoFrom || $NoSpeed;
273 Generate16bitSize("from_ucs") unless $NoFrom || $NoSize;
275 # ==============================================================================
276 # OUTPUT ARRAYS
277 # ==============================================================================
279 if ($Source)
281 # OUTPUT SOURCE
282 print OUTFILE
284 * This file was generated automatically - don't edit it.
285 * File contains iconv CCS tables for $CCSName encoding.
288 #include \"ccsbi.h\"
290 #if defined ($GuardToUCS) \\
291 || defined ($GuardFromUCS)
293 #include <_ansi.h>
294 #include <sys/types.h>
295 #include <sys/param.h>
296 #include \"ccs.h\"
297 #include \"ccsnames.h\"
301 if ($Bits == 8)
303 print OUTFILE
304 "#if (BYTE_ORDER == LITTLE_ENDIAN)
305 # define W(word) (word) & 0xFF, (word) >> 8
306 #elif (BYTE_ORDER == BIG_ENDIAN)
307 # define W(word) (word) >> 8, (word) & 0xFF
308 #else
309 # error \"Unknown byte order.\"
310 #endif
315 unless ($NoTo)
317 if ($Bits == 8)
319 Output8bitToUCS();
321 else
323 OutputSpeed("to_ucs") unless $NoSpeed;
324 OutputSize("to_ucs") unless $NoSize;
327 unless ($NoFrom)
329 if ($Bits == 8)
331 Output8bitFromUCS();
333 else
335 OutputSpeed("from_ucs") unless $NoSpeed;
336 OutputSize("from_ucs") unless $NoSize;
340 # OUTPUT TABLE DESCRIPTION STRUCTURE
341 print OUTFILE
343 * $CCSName CCS description table.
344 * $Separator
346 const $TypeBICCS
347 $VarBICCS =
349 \t$MacroVer1Table, /* Table version */
350 \t$MacroCCSName, /* CCS name */
352 if ($Bits == 8)
354 print OUTFILE
355 "\t$Macro8bitTable, /* Table bits */
356 \t0, /* Not Used */
357 #if defined ($GuardFromUCS)
358 \t(__uint16_t *)&$VarFromUCSSpeed, /* UCS -> $CCSName table */
359 #else
360 \t(__uint16_t *)NULL,
361 #endif
362 \t0, /* Not Used */
363 #if defined ($GuardToUCS)
364 \t(__uint16_t *)&$VarToUCSSpeed /* $CCSName -> UCS table */
365 #else
366 \t(__uint16_t *)NULL,
367 #endif
368 };\n";
370 else
372 print OUTFILE
373 "\t$Macro16bitTable, /* Table bits */
374 #if defined ($GuardFromUCS) \\
375 && ($GuardSize)
376 \t$MacroSizeTable,
377 \t(__uint16_t *)&$VarFromUCSSize, /* UCS -> $CCSName table size-optimized table */
378 #elif defined ($GuardFromUCS) \\
379 && !($GuardSize)
380 \t$MacroSpeedTable,
381 \t(__uint16_t *)&$VarFromUCSSpeed, /* UCS -> $CCSName table speed-optimized table */
382 #else
383 \t$MacroSpeedTable,
384 \t(__uint16_t *)NULL,
385 #endif
386 #if defined ($GuardToUCS) \\
387 && ($GuardSize)
388 \t$MacroSizeTable,
389 \t(__uint16_t *)&$VarToUCSSize /* $CCSName -> UCS table speed-optimized table */
390 #elif defined ($GuardToUCS) \\
391 && !($GuardSize)
392 \t$MacroSpeedTable,
393 \t(__uint16_t *)&$VarToUCSSpeed /* $CCSName -> UCS table speed-optimized table */
394 #else
395 \t$MacroSpeedTable,
396 \t(__uint16_t *)NULL,
397 #endif
398 };\n";
400 print OUTFILE "\n#endif /* $GuardToUCS) || ... */\n\n";
402 else
404 # OUTPUT BINARY TABLES DESCRIPTION STRUCTURE (ALWAYS BIG ENDIAN)
405 print OUTFILE pack "n", 1;
406 print OUTFILE pack "n", $Bits;
407 my $len = length $CCSName;
408 print OUTFILE pack "N", $len;
409 print OUTFILE pack "a$len", $CCSName;
411 my $pos = $HdrBinary + $len;
412 if ($pos & 3)
414 my $l = 4 - ($pos & 3);
415 print OUTFILE pack "a$l", 'XXX';
416 $pos += $l;
419 $pos += 16*4;
421 my @tables;
422 for (my $i = 0; $i < 16; $i++)
424 $tables[$i] = 0;
427 $tables[0] = $pos, $tables[1] = $FromSpeedBytes, $pos += $FromSpeedBytes
428 unless $NoFrom || $NoSpeed || $NoBE;
429 $tables[2] = $pos, $tables[3] = $FromSpeedBytes, $pos += $FromSpeedBytes
430 unless $NoFrom || $NoSpeed || $NoLE;
431 if ($Bits == 16)
433 $tables[4] = $pos, $tables[5] = $FromSizeBytes, $pos += $FromSizeBytes
434 unless $NoFrom || $NoSize || $NoBE;
435 $tables[6] = $pos, $tables[7] = $FromSizeBytes, $pos += $FromSizeBytes
436 unless $NoFrom || $NoSize || $NoLE;
438 $tables[8] = $pos, $tables[9] = $ToSpeedBytes, $pos += $ToSpeedBytes
439 unless $NoTo || $NoSpeed || $NoBE;
440 $tables[10] = $pos, $tables[11] = $ToSpeedBytes, $pos += $ToSpeedBytes
441 unless $NoTo || $NoSpeed || $NoLE;
442 if ($Bits == 16)
444 $tables[12] = $pos, $tables[13] = $ToSizeBytes, $pos += $ToSizeBytes
445 unless $NoTo || $NoSize || $NoBE;
446 $tables[14] = $pos, $tables[15] = $ToSizeBytes, $pos += $ToSizeBytes
447 unless $NoTo || $NoSize || $NoLE;
450 print OUTFILE pack("N", $_) foreach @tables;
452 print "Total bytes for output: $pos.\n" if $Verbose;
454 # OUTPUT BINARY TABLES
455 unless ($NoFrom)
457 if ($Bits == 8)
459 Output8bitFromUCS("n") unless $NoBE;
460 Output8bitFromUCS("v") unless $NoLE;
462 else
464 unless ($NoSpeed)
466 OutputSpeed("from_ucs", "n") unless $NoBE;
467 OutputSpeed("from_ucs", "v") unless $NoLE;
469 unless ($NoSize)
471 OutputSize("from_ucs", "n") unless $NoBE;
472 OutputSize("from_ucs", "v") unless $NoLE;
476 unless ($NoTo)
478 if ($Bits == 8)
480 Output8bitToUCS("n") unless $NoBE;
481 Output8bitToUCS("v") unless $NoLE;
483 else
485 unless ($NoSpeed)
487 OutputSpeed("to_ucs", "n") unless $NoBE;
488 OutputSpeed("to_ucs", "v") unless $NoLE;
490 unless ($NoSize)
492 OutputSize("to_ucs", "n") unless $NoBE;
493 OutputSize("to_ucs", "v") unless $NoLE;
499 close INFILE;
500 close OUTFILE;
501 exit 0;
504 # ##############################################################################
506 # SUPPLEMENTARY FUNCTIONS
508 # ##############################################################################
511 # =============================================================================
513 # Generate 8bit "to_ucs" table. Store table data in %ToSpeedTbl hash.
514 # Store table size in $ToSpeedBytes scalar.
516 # =============================================================================
517 sub Generate8bitToUCS()
519 for (my $i = 0; $i <= 255; $i++)
521 $ToSpeedTbl[$i] = defined $CCSUCS{$i} ? $CCSUCS{$i} : $InvCode;
523 $ToSpeedBytes = 256*2;
527 # =============================================================================
529 # Generate speed-optimized table.
531 # Parameter 1:
532 # "to_ucs" - generate "to_ucs" table, store table data in @ToSpeedTbl
533 # array, store table size in $ToSpeedBytes scalar.
534 # "from_ucs" - generate "from_ucs" table, store table data in @FromSpeedTbl
535 # array, store table size in $FromSpeedBytes scalar.
537 # Data is written to @ToSpeedTbl or @FromSpeedTbl (@map) table and has the
538 # following format:
539 # $table[0] - 256-element array (control block);
540 # $table[1 .. $#table] - 256-element arrays (data blocks).
542 # =============================================================================
543 sub GenerateSpeed($)
545 my $map;
546 my $tbl;
547 my $bytes;
549 if ($_[0] eq "to_ucs")
551 $map = \%CCSUCS;
552 $tbl = \@ToSpeedTbl;
553 $bytes = \$ToSpeedBytes;
555 elsif ($_[0] eq "from_ucs")
557 $map = \%UCSCCS;
558 $tbl = \@FromSpeedTbl;
559 $bytes = \$FromSpeedBytes;
561 else
563 Err "Internal script error in GenerateSpeed()\n";
566 # Identify unused blocks
567 my @busy_blocks;
568 $busy_blocks[$_ >> 8] = 1 foreach (keys %$map);
570 # GENERATE FIRST 256-ELEMENT CONTROL BLOCK
571 for (my $i = 0,
572 my $idx = $Bits == 16 ? 0 : 256 + $Hdr8bitFromUCS;
573 $i <= 0xFF; $i++)
575 $tbl->[0]->[$i] = $busy_blocks[$i] ? $idx += 256 : undef;
578 # GENERATE DATA BLOCKS
579 $$bytes = 0;
580 for (my $i = 0; $i <= 0xFF; $i++)
582 next unless $busy_blocks[$i];
583 $$bytes += 256;
584 for (my $j = 0; $j <= 0xFF; $j++)
586 $tbl->[$i+1]->[$j] = $map->{($i << 8) | $j};
589 $$bytes *= 2 if $Bits == 16;
590 $$bytes += $Hdr8bitFromUCS if $Bits == 8;
591 $$bytes += 512;
595 # =============================================================================
597 # Generate 16bit size-optimized table.
599 # Parameter 1:
600 # "to_ucs" - generate "to_ucs" table, store table data in @ToSizeTbl
601 # array, store table size in $ToSizeBytes scalar.
602 # "from_ucs" - generate "from_ucs" table, store table data in @FromSizeTbl
603 # array, store table size in $FromSizeBytes scalar.
605 # Data is written to @ToSizeTbl or @FromSizeTbl (@map) table and has the
606 # following format:
607 # $table[0] - number of ranges;
608 # $table[1] - number of unranged codes;
609 # $table[2] - unranged codes index in resulting array;
610 # $table[3]->[0 .. $table[0]] - array of arrays of ranges:
611 # $table[3]->[x]->[0] - first code;
612 # $table[3]->[x]->[1] - last code;
613 # $table[3]->[x]->[2] - range index in resulting array;
614 # $table[4]->[0 .. $table[0]] - array of arrays of ranges content;
615 # $table[5]->[0 .. $table[1]] - array of arrays of unranged codes;
616 # $table[5]->[x]->[0] - CCS code;
617 # $table[5]->[x]->[0] - UCS code;
619 # =============================================================================
620 sub Generate16bitSize($)
622 my $map;
623 my $tbl;
624 my $bytes;
626 if ($_[0] eq "to_ucs")
628 $map = \%CCSUCS;
629 $tbl = \@ToSizeTbl;
630 $bytes = \$ToSizeBytes;
632 elsif ($_[0] eq "from_ucs")
634 $map = \%UCSCCS;
635 $tbl = \@FromSizeTbl;
636 $bytes = \$FromSizeBytes;
638 else
640 Err "Internal script error Generate16bitSize()\n";
643 # CREATE LIST OF RANGES.
644 my @codes = sort {$a <=> $b} keys %$map;
645 my @ranges; # Code ranges
646 my @range; # Current working range
647 foreach (@codes)
649 if (not @range or $_ - 1 == $range[$#range])
651 push @range, $_;
653 else
655 my @tmp = @range;
656 push @ranges, \@tmp;
657 undef @range;
658 redo;
661 # Add Last range too
662 if (@range)
664 my @tmp = @range;
665 push @ranges, \@tmp;
668 # OPTIMIZE LIST OF RANGES.
669 my $r = 0; # Working range number
670 while (1)
672 last if ($r == $#ranges);
674 my @r1 = @{$ranges[$r]};
675 my @r2 = @{$ranges[$r + 1]};
677 # Calculate how many array entries two ranges need
678 my ($s1, $s2);
680 if ($#r1 == 0)
681 { $s1 = 2; }
682 elsif ($#r1 == 1)
683 { $s1 = 4; }
684 else
685 { $s1 = $#r1 + 1 + 3; }
687 if ($#r2 == 0)
688 { $s2 = 2; }
689 elsif ($#r2 == 1)
690 { $s2 = 4; }
691 else
692 { $s2 = $#r2 + 1 + 3; }
694 my $two = $s1 + $s2;
696 # Calculate how many array entries will be needed if we join them
697 my $one = $r2[$#r2] - $r1[0] + 1 + 3;
699 $r += 1, next if ($one > $two);
701 # Join ranges
702 my @r; # New range.
703 push @r, $_ foreach (@r1);
704 for (my $i = $r1[$#r1]+1; $i < $r2[0]; $i++)
706 push @r, undef;
708 push @r, $_ foreach (@r2);
709 $ranges[$r] = \@r;
710 splice @ranges, $r+1, 1;
713 # SEPARATE RANGED AND UNRANGED CODES. SPLIT 2-CODES RANGES ON 2 UNRANGED.
714 my @unranged;
715 foreach (@ranges)
717 if ($#$_ == 0)
719 push @unranged, $$_[0];
720 undef $_;
722 elsif ($#$_ == 1)
724 push @unranged, $$_[0];
725 push @unranged, $$_[1];
726 undef $_;
730 # DELETE UNUSED ELEMENTS
731 for (my $i = 0; $i <= $#ranges; $i++)
733 splice @ranges, $i--, 1 unless defined $ranges[$i];
736 # CALCULATE UNRANGED CODES ARRAY INDEX
737 my $idx = 3 + ($#ranges + 1)*3;
738 $idx += $#$_ + 1 foreach @ranges;
740 # COMPOSE TABLE
741 $tbl->[0] = $#ranges + 1; # Number of ranges
742 $tbl->[1] = $#unranged + 1; # Number of unranged codes
743 $tbl->[2] = $idx; # Array index of unranged codes
745 # Generate ranges list
746 $idx = 3 + ($#ranges + 1)*3; # First range data index
747 $$bytes = $idx*2;
748 my $num = 0;
749 foreach (@ranges)
751 $tbl->[3]->[$num]->[0] = $_->[0];
752 $tbl->[3]->[$num]->[1] = $_->[$#$_];
753 $tbl->[3]->[$num]->[2] = $idx;
754 $idx += $#$_ + 1;
755 $num += 1;
758 # Generate ranges content
759 $num = 0;
760 foreach (@ranges)
762 for (my $i = 0; $i <= $#$_; $i++)
764 $tbl->[4]->[$num]->[$i] = defined $_->[$i] ? $map->{$_->[$i]} : undef;
766 $num += 1;
767 $$bytes += ($#$_ + 1)*2;
770 # Generate unranged codes list
771 $num = 0;
772 foreach (@unranged)
774 $tbl->[5]->[$num]->[0] = $_;
775 $tbl->[5]->[$num]->[1] = $map->{$_};
776 $num += 1;
779 $$bytes += ($#unranged + 1)*4;
783 # =============================================================================
785 # Output 8bit "to UCS" table. Output table's source code if $Source
786 # and table's binary data if !$Source.
788 # Parameter 1: Not used when sources are output. Output BE binary if 'n' and
789 # LE binary if 'v'.
791 # =============================================================================
792 sub Output8bitToUCS(;$)
794 my $endian = $_[0];
795 my $br = 0;
797 printf "Output%s 8-bit UCS -> $CCSName table ($ToSpeedBytes bytes).\n",
798 defined $endian ? ($endian eq 'n' ?
799 " Big Endian" : " Little Endian") : "" if $Verbose;
800 if ($Source)
802 # Output heading information
803 printf OUTFILE
805 * 8-bit $CCSName -> UCS table ($ToSpeedBytes bytes).
806 * $Separator
808 #if defined ($GuardToUCS)
810 static const __uint16_t
811 ${VarToUCSSpeed}\[] =
812 {\n\t";
815 if ($Source)
817 foreach (@ToSpeedTbl)
819 $br += 1;
820 if ($_ != $InvCode)
822 if ($_ != $TmpLost)
824 printf OUTFILE "0x%.4X,", $_;
826 else
828 print OUTFILE "$MacroLostCode,";
831 else
833 print OUTFILE "$MacroInvCode,";
835 print(OUTFILE "\n\t"), $br = 0 unless $br % 8;
837 print OUTFILE "\n};\n\n#endif /* $GuardToUCS */\n\n";
839 else
841 foreach (@ToSpeedTbl)
843 print OUTFILE pack($endian, $_ == $TmpLost ? $LostCode : $_);
849 # =============================================================================
851 # Output 8bit "from UCS" table. Output table's source code if $Source
852 # and table's binary data if !$Source.
854 # Parameter 1: Not used when sources are output. Output BE binary if 'n' and
855 # LE binary if 'v'.
857 # =============================================================================
858 sub Output8bitFromUCS(;$)
860 my $endian = $_[0];
862 printf "Output%s 8-bit $CCSName -> UCS table ($FromSpeedBytes bytes).\n",
863 defined $endian ? ($endian eq 'n' ?
864 " Big Endian" : " Little Endian") : "" if $Verbose;
865 if ($Source)
867 print OUTFILE
869 * 8-bit UCS -> $CCSName speed-optimized table ($FromSpeedBytes bytes).
870 * $Separator
873 #if defined ($GuardFromUCS)
875 static const unsigned char
876 ${VarFromUCSSpeed}\[] =
881 # SAVE 0xFF MAPPING.
882 if ($Source)
884 printf OUTFILE "\tW(0x%.4X), /* Real 0xFF mapping. 0xFF is used "
885 . "to mark invalid codes */\n", $FFMap;
887 else
889 print OUTFILE pack($endian, $FFMap);
892 # OUTPUT HEADING BLOCK (ALWAYS 16 BIT)
893 if ($Source)
895 my $count = 0;
896 print OUTFILE "\t/* Heading Block */";
897 for (my $i = 0, my $br = 0; $i < 256; $br = ++$i % 4)
899 print OUTFILE "\n\t" unless $br;
900 if (defined $FromSpeedTbl[0]->[$i])
902 printf OUTFILE "W(0x%.4X),", $FromSpeedTbl[0]->[$i];
904 else
906 print OUTFILE "W($MacroInvBlock),";
910 else
912 print OUTFILE pack($endian, defined $_ ? $_ : $InvBlock)
913 foreach @{$FromSpeedTbl[0]};
916 if ($Source)
918 my $index = 512 + $Hdr8bitFromUCS;
919 for (my $blk = 1; $blk <= $#FromSpeedTbl; $blk++)
921 next unless defined $FromSpeedTbl[$blk];
922 printf OUTFILE "\n\t/* Block $blk, Array index 0x%.4X */", $index;
923 $index += 256;
924 for (my $i = 0, my $br = 0; $i < 256; $i++, $br = $i % 8)
926 print OUTFILE "\n\t" unless $br;
927 my $code = $FromSpeedTbl[$blk]->[$i];
928 if (!defined $code)
930 printf OUTFILE "0x%.2X,", $InvCode8bit;
932 else
934 printf OUTFILE "0x%.2X,", $code == $TmpLost ? $LostCode : $code;
938 print OUTFILE "\n};\n\n#endif /* $GuardFromUCS */\n\n";
940 else
942 for (my $blk = 1; $blk <= $#FromSpeedTbl; $blk++)
944 next unless defined $FromSpeedTbl[$blk];
945 for (my $i = 0, my $br = 0; $i < 256; $br = ++$i % 8)
947 my $code = $FromSpeedTbl[$blk]->[$i];
948 if (!defined $code)
950 printf OUTFILE pack 'C', $InvCode8bit;
952 else
954 print OUTFILE $code == $TmpLost ? pack('C', $LostCode)
955 : pack('C', $code);
963 # =============================================================================
965 # Output 16bit Speed-optimized table. Output table's source code if $Source
966 # and table's binary data if !$Source.
968 # Parameter 1:
969 # "to_ucs" - Output "to_ucs" table.
970 # "from_ucs" - Output "from_ucs" table.
971 # Parameter 2: Not used when sources are output. Output BE binary if 'n' and
972 # LE binary if 'v'.
974 # =============================================================================
975 sub OutputSpeed($;$)
977 my $endian = $_[1];
978 my $tbl;
979 my ($direction, $optimiz, $e, $bytes);
980 $optimiz = $Bits == 16 ? " speed-optimized" : "";
981 $e = $endian ? ($endian eq 'n' ? " Big Endian" : " Little Endian") : "";
982 if ($_[0] eq "to_ucs")
984 $tbl = \@ToSpeedTbl;
985 $direction = " $CCSName -> UCS";
986 $bytes = $ToSpeedBytes;
988 if ($Source)
990 print OUTFILE
992 * 16-bit $CCSName -> UCS speed-optimized table ($ToSpeedBytes bytes).
993 * $Separator
995 #if defined ($GuardToUCS) \\
996 && !($GuardSize)
998 static const __uint16_t
999 ${VarToUCSSpeed}\[] =
1004 elsif ($_[0] eq "from_ucs")
1006 $tbl = \@FromSpeedTbl;
1007 $direction = " UCS -> $CCSName";
1008 $bytes = $FromSpeedBytes;
1010 if ($Source)
1012 print OUTFILE
1014 * 16-bit UCS -> $CCSName speed-optimized table ($FromSpeedBytes bytes).
1015 * $Separator
1018 #if defined ($GuardFromUCS) \\
1019 && !($GuardSize)
1021 static const __uint16_t
1022 ${VarFromUCSSpeed}\[] =
1027 else
1029 Err "Internal script error Output16bitSpeed()\n";
1032 printf "Output%s 16-bit%s%s table (%d bytes).\n",
1033 $e, $direction, $optimiz, $bytes if $Verbose;
1035 # OUTPUT HEADING BLOCK (ALWAYS 16 BIT)
1036 if ($Source)
1038 my $count = 0;
1039 print OUTFILE "\t/* Heading Block */";
1040 for (my $i = 0, my $br = 0; $i < 256; $br = ++$i % 8)
1042 print OUTFILE "\n\t" unless $br;
1043 if (defined $tbl->[0]->[$i])
1045 printf OUTFILE "0x%.4X,", $tbl->[0]->[$i];
1047 else
1049 print OUTFILE "$MacroInvBlock,";
1053 else
1055 print OUTFILE pack($endian, defined $_ ? $_ : $InvBlock)
1056 foreach @{$tbl->[0]};
1059 # OUTPUT OTHER BLOCKS
1060 if ($Source)
1062 my $index = 256;
1063 for (my $blk = 1; $blk <= $#$tbl; $blk++)
1065 next unless defined $tbl->[$blk];
1066 printf OUTFILE "\n\t/* Block $blk, Array index 0x%.4X */", $index;
1067 $index += 256;
1068 for (my $i = 0, my $br = 0; $i < 256; $br = ++$i % 8)
1070 print OUTFILE "\n\t" unless $br;
1071 my $code = $tbl->[$blk]->[$i];
1072 print OUTFILE defined $code ?
1073 ($code == $TmpLost ? $MacroLostCode : sprintf "0x%.4X", $code)
1074 : $MacroInvCode, ",";
1078 else
1080 for (my $blk = 1; $blk <= $#$tbl; $blk++)
1082 next unless defined $tbl->[$blk];
1083 for (my $i = 0, my $br = 0; $i < 256; $br = ++$i % 8)
1085 my $code = $tbl->[$blk]->[$i];
1086 print OUTFILE pack($endian,
1087 defined $code ? ($code == $TmpLost ? $LostCode : $code) : $InvCode);
1092 if ($Source)
1094 if ($_[0] eq "to_ucs")
1096 print OUTFILE
1100 #endif /* $GuardToUCS && !$GuardSize */
1104 else
1106 print OUTFILE
1110 #endif /* $GuardFromUCS && !$GuardSize */
1117 # =============================================================================
1119 # Output 16bit Size-optimized table. Output table's source code if $Source
1120 # and table's binary data if !$Source.
1122 # Parameter 1:
1123 # "to_ucs" - Output "to_ucs" table.
1124 # "from_ucs" - Output "from_ucs" table.
1125 # Parameter 2: Not used when sources are output. Output BE binary if 'n' and
1126 # LE binary if 'v'.
1128 # =============================================================================
1129 sub OutputSize($;$)
1131 my $endian = $_[1];
1132 my $tbl;
1133 my ($direction, $optimiz, $e, $bytes);
1134 $optimiz = $Bits == 16 ? " size-optimized" : "";
1135 $e = $endian ? ($endian eq 'n' ? " Big Endian" : " Little Endian") : "";
1136 if ($_[0] eq "to_ucs")
1138 $tbl = \@ToSizeTbl;
1139 $direction = " $CCSName -> UCS";
1140 $bytes = $ToSizeBytes;
1142 if ($Source)
1144 print OUTFILE
1146 * 16-bit $CCSName -> UCS size-optimized table ($ToSizeBytes bytes).
1147 * $Separator
1149 #if defined ($GuardToUCS) \\
1150 && ($GuardSize)
1152 static const __uint16_t
1153 ${VarToUCSSize}\[] =
1158 elsif ($_[0] eq "from_ucs")
1160 $tbl = \@FromSizeTbl;
1161 $direction = " UCS -> $CCSName";
1162 $bytes = $FromSizeBytes;
1163 if ($Source)
1165 print OUTFILE
1167 * 16-bit UCS -> $CCSName size-optimized table ($FromSizeBytes bytes).
1168 * $Separator
1171 #if defined ($GuardFromUCS) \\
1172 && ($GuardSize)
1174 static const __uint16_t
1175 ${VarFromUCSSize}\[] =
1180 else
1182 Err "Internal script error Output16bitSize()\n";
1185 printf "Output%s 16-bit%s%s table (%d bytes).\n",
1186 $e, $direction, $optimiz, $bytes if $Verbose;
1188 # OUTPUT FIRST 3 ELEMENTS
1189 if ($Source)
1191 printf OUTFILE "\t0x%.4X, /* Ranges number */\n", $tbl->[0];
1192 printf OUTFILE "\t0x%.4X, /* Unranged codes number */\n", $tbl->[1];
1193 printf OUTFILE "\t0x%.4X, /* First unranged code index */\n", $tbl->[2];
1195 else
1197 printf OUTFILE pack $endian, $tbl->[0];
1198 printf OUTFILE pack $endian, $tbl->[1];
1199 printf OUTFILE pack $endian, $tbl->[2];
1202 my $idx = 0;
1203 # OUTPUT RANGES
1204 if ($Source)
1206 print OUTFILE "\t/* Ranges list: first code, last Code, array index. */\n";
1207 for (my $range = 0; $range <= $#{$tbl->[3]}; $range++)
1209 printf OUTFILE "\t/* Array index: 0x%.4X */ 0x%.4X, 0x%.4X, 0x%.4X,\n",
1210 $idx += 3,
1211 $tbl->[3]->[$range]->[0],
1212 $tbl->[3]->[$range]->[1],
1213 $tbl->[3]->[$range]->[2];
1216 else
1218 for (my $range = 0; $range <= $#{$tbl->[3]}; $range++)
1220 print OUTFILE pack($endian, $tbl->[3]->[$range]->[0]),
1221 pack($endian, $tbl->[3]->[$range]->[1]),
1222 pack($endian, $tbl->[3]->[$range]->[2]);
1225 $idx += 3;
1227 # OUTPUT RANGES CONTENT
1228 if ($Source)
1230 print OUTFILE "\t/* Ranges content */";
1231 for (my $range = 0; $range <= $#{$tbl->[3]}; $range++)
1233 printf OUTFILE "\n\t/* Range 0x%.4X - 0x%.4X, array index: 0x%.4X */",
1234 $tbl->[3]->[$range]->[0], $tbl->[3]->[$range]->[1], $idx;
1235 $idx += $tbl->[3]->[$range]->[1] - $tbl->[3]->[$range]->[0] + 1;
1236 for (my $elt = 0, my $br = 0;
1237 $elt <= $#{$tbl->[4]->[$range]};
1238 $br = ++$elt % 8)
1240 print OUTFILE "\n\t" unless $br;
1241 if (defined $tbl->[4]->[$range]->[$elt])
1243 if ($tbl->[4]->[$range]->[$elt] != $TmpLost)
1245 printf OUTFILE "0x%.4X,", $tbl->[4]->[$range]->[$elt];
1247 else
1249 print OUTFILE "$MacroLostCode,";
1252 else
1254 print OUTFILE "$MacroInvCode,";
1259 else
1261 for (my $range = 0; $range <= $#{$tbl->[3]}; $range++)
1263 for (my $elt = 0; $elt <= $#{$tbl->[4]->[$range]}; $elt++)
1265 if (defined $tbl->[4]->[$range]->[$elt])
1267 if ($tbl->[4]->[$range]->[$elt] != $TmpLost)
1269 print OUTFILE pack $endian, $tbl->[4]->[$range]->[$elt];
1271 else
1273 print OUTFILE pack $endian, $LostCode;
1276 else
1278 print OUTFILE pack $endian, $InvCode;
1284 # OUTPUT UNRANGED CODES
1285 if ($Source)
1287 printf OUTFILE "\n\t/* Unranged codes (%d codes) */", $#{$tbl->[4]} + 1;
1288 for (my $i = 0; $i <= $#{$tbl->[5]}; $i++)
1290 printf OUTFILE "\n\t/* Array index: 0x%.4X */ 0x%.4X,0x%.4X,",
1291 $idx, $tbl->[5]->[$i]->[0], $tbl->[5]->[$i]->[1];
1294 else
1296 for (my $i = 0; $i <= $#{$tbl->[5]}; $i++)
1298 print OUTFILE pack($endian, $tbl->[5]->[$i]->[0]),
1299 pack($endian, $tbl->[5]->[$i]->[1]);
1303 if ($Source)
1305 if ($_[0] eq "to_ucs")
1307 print OUTFILE
1311 #endif /* $GuardToUCS && $GuardSize */
1315 else
1317 print OUTFILE
1321 #endif /* $GuardFromUCS && $GuardSize */
1329 # =============================================================================
1331 # Parse command line options
1333 # =============================================================================
1334 sub ProcessOptions()
1336 my $help_opt = 'h'; # Print help option
1337 my $input_opt = 'i'; # Input file name option
1338 my $output_opt = 'o'; # Output file name option
1339 my $source_opt = 'S'; # Generate C source file option
1340 my $enc_opt = 'N'; # Encoding name
1341 my $plane_opt = 'p'; # Plane number
1342 my $verbose_opt = 'v'; # Verbose output
1343 my $ccscol_opt = 'x'; # Encoding's column number
1344 my $ucscol_opt = 'y'; # UCS column number
1345 my $nosize_opt = 'l'; # Don't generate size-optimized tables
1346 my $nospeed_opt = 'b'; # Don't generate speed-optimized tables
1347 my $nobe_opt = 'B'; # Don't generate big-endian tables
1348 my $nole_opt = 'L'; # Don't generate big-endian tables
1349 my $noto_opt = 't'; # Don't generate "to_ucs" table
1350 my $nofrom_opt = 'f'; # Don't generate "from_ucs" table
1352 my %args; # Command line arguments found by getopts()
1354 my $getopts_string =
1355 "$help_opt$source_opt$enc_opt:$verbose_opt$input_opt:$output_opt:$plane_opt:"
1356 . "$nosize_opt$nospeed_opt$nobe_opt$nole_opt$noto_opt$nofrom_opt$ccscol_opt:"
1357 . "$ucscol_opt:";
1359 getopts($getopts_string, \%args) || Err "getopts() failed: $!.\n", 1;
1361 # Print usage rules and exit.
1362 if ($args{$help_opt})
1364 print<<END
1365 Usage:
1366 -$help_opt - this help message;
1367 -$input_opt - input file name (required);
1368 -$output_opt - output file name;
1369 -$enc_opt - CCS or encoding name;
1370 -$plane_opt - plane number (high 16 bits) to use (in hex);
1371 -$source_opt - generate C source file;
1372 -$nospeed_opt - don't generate speed-optimized tables (binary files only);
1373 -$nosize_opt - don't generate size-optimized tables (binary files only);
1374 -$nobe_opt - don't generate Big Endian tables (binary files only);
1375 -$nole_opt - don't generate Little Endian tables (binary files only);
1376 -$noto_opt - don't generate "to_ucs" table;
1377 -$nofrom_opt - don't generate "from_ucs" table;
1378 -$ccscol_opt - encoding's column number;
1379 -$ucscol_opt - UCS column number;
1380 -$verbose_opt - verbose output.
1382 If output file name isn't specified, <infile>.c (for sources) or
1383 <infile>.cct (for binaries) is assumed.
1384 If encoding name isn't specified <infile> is assumed.
1385 <infile> is normalized (small letters, "-" are substituted by "_") input file
1386 name base (no extension). For example, for Koi8-r.txt input file, <infile>
1387 is koi8_r.
1390 exit 0;
1393 $Verbose = $args{$verbose_opt};
1394 $Source = $args{$source_opt};
1395 $NoSpeed = $args{$nospeed_opt};
1396 $NoSize = $args{$nosize_opt};
1397 $NoBE = $args{$nobe_opt};
1398 $NoLE = $args{$nole_opt};
1399 $NoFrom = $args{$nofrom_opt};
1400 $NoTo = $args{$noto_opt};
1401 $CCSCol = $args{$ccscol_opt};
1402 $UCSCol = $args{$ucscol_opt};
1403 $Plane = $args{$plane_opt};
1404 $InFile = $args{$input_opt};
1405 $OutFile = $args{$output_opt};
1406 $CCSName = $args{$enc_opt};
1408 Err "Error: input file isn't defined. Use -$help_opt for help.\n", 1
1409 unless $InFile;
1411 unless ($OutFile)
1413 # Construct output file name
1414 $OutFile = $InFile;
1415 $OutFile =~ s/(.*\/)*([0-9a-zA-Z-_]*)(\..*)$/\L$2/;
1416 $OutFile =~ tr/-/_/;
1417 if ($Source)
1419 $OutFile = "$OutFile.c";
1421 else
1423 $OutFile = "$OutFile.cct"
1427 unless ($CCSName)
1429 # Construct CCS name
1430 $CCSName = $InFile;
1431 $CCSName =~ s/(.*\/)*([0-9a-zA-Z-_]*)(\..*)$/\L$2/;
1432 $CCSName =~ tr/-/_/;
1435 Err "-$nosize_opt option can't be used with -$nospeed_opt option "
1436 . "simultaniously.\n", 1 if $NoSpeed && $NoSize;
1438 Err "-$nobe_opt option can't be used with -$nole_opt option "
1439 . "simultaniously.\n", 1 if $NoBE && $NoLE;
1441 Err "-$noto_opt option can't be used with -$nofrom_opt option"
1442 . "simultaniously.\n", 1 if $NoTo && $NoFrom;
1444 Err "-$nosize_opt, -$nospeed_opt, -$nobe_opt -$nole_opt "
1445 . "-$noto_opt and -$nofrom_opt "
1446 . "options can't be used with -$source_opt option.\n"
1447 . "Source code always contains both speed- and size-optimized "
1448 . "tables in System Endian. Use -$help_opt for help.\n", 1
1449 if $Source and $NoSpeed || $NoSize || $NoBE || $NoLE || $NoTo || $NoFrom;
1451 if (!$CCSCol && !$UCSCol)
1453 $CCSCol = 0;
1454 $UCSCol = 1;
1456 elsif ($CCSCol && $UCSCol)
1458 Err "Column number should be >= 0\n", 1 if ($CCSCol <= 0 or $UCSCol <= 0);
1459 $CCSCol -= 1;
1460 $UCSCol -= 1;
1462 else
1464 Err "Please, define both CCS and UCS column numbers\n", 1;
1467 if ($Verbose)
1469 print "Use $InFile file for input.\n",
1470 "Use $OutFile file for output.\n",
1471 "Use $CCSName as CCS name.\n";
1472 print "Generate C source file.\n" if $Source;
1473 print "Generate binary file.\n" if !$Source;
1474 printf "Use plane N 0x%.4X.\n", hex $Plane if defined $Plane;
1475 printf "Use column N $CCSCol for $CCSName.\n";
1476 printf "Use column N $UCSCol for UCS.\n";
1477 print "Don't generate size-optimized tables.\n" if $NoSize;
1478 print "Don't generate speed-optimized tables.\n" if $NoSpeed;
1479 print "Don't generate big-endian tables.\n" if $NoBE;
1480 print "Don't generate little-endian tables.\n" if $NoLE;
1481 print "Don't generate \"to_ucs\" table.\n" if $NoTo;
1482 print "Don't generate \"from_ucs\" table.\n" if $NoFrom;
1485 return;
1489 # =============================================================================
1491 # Print error message, close all and exit
1493 # Parameter 1: error message
1494 # Parameter 2: don't delete output file if > 1
1496 # =============================================================================
1497 sub Err($;$)
1499 print STDERR "$_[0]";
1500 close INFILE;
1501 close OUTFILE;
1502 unlink $OutFile unless $_[1];
1504 exit 1;