doc: TRE 0.8.0 required
[elinks/elinks-j605.git] / Unicode / gen-case
blob9ca69ddc24bafb635ed21d559626a3360a23915d
1 #! /usr/bin/perl
2 use strict;
3 use warnings;
5 my @trans;
7 print "\t/* -*- c -*- source code generated by ", join(" ", $0, @ARGV), " */\n";
8 while (<>) {
9 s/#.*$//;
10 next if /^\s*$/;
11 my($code, $status, $mapping) = /^([[:xdigit:]]+);\s*([CFST]);\s*([[:xdigit:]]+(?:\s+[[:xdigit:]]+)*);\s*$/
12 or warn("$ARGV:$.: weird line\n"), next;
13 next unless $status eq "C" or $status eq "S";
14 warn("$ARGV:$.: multi-char simple mapping\n"), next
15 if $mapping =~ /\s/;
16 $code = hex($code);
17 $mapping = hex($mapping);
18 $trans[$code] = $mapping;
19 } continue {
20 close ARGV if eof;
23 sub gobble {
24 my($begin, $step) = @_;
25 my $diff = $trans[$begin] - $begin;
26 my @codes;
27 my @holes;
28 my $probe = $begin;
29 my $hole;
30 while (1) {
31 my @beyond;
32 while (defined($trans[$probe]) && $trans[$probe] == $probe + $diff) {
33 push @beyond, $probe;
34 $probe += $step;
36 last unless @beyond >= 2;
37 push @holes, $hole if defined $hole;
38 push @codes, @beyond;
39 $hole = $probe;
40 $probe += $step;
42 return 0 unless @codes;
44 # The following formula was tuned for i486-linux-gnu-gcc-4.0 -O1.
45 if (@codes <= 2 + @holes) {
46 print "if (", join(" || ", map { sprintf("c == 0x%X", $_) } @codes), ")\n";
47 } else {
48 printf "if (c >= 0x%X && c <= 0x%X", $codes[0], $codes[-1];
49 printf " && c != 0x%X", $_ foreach @holes;
50 if ($step == 2) { printf " && (c & 1) == %d", $begin & 1 }
51 elsif ($step != 1) { printf " && c %% %d == %d", $step, $begin % $step }
52 print ")\n";
54 if ($diff != 0) {
55 if ($diff < 0) { printf "\t\tc -= "; $diff = -$diff }
56 else { printf "\t\tc += " }
57 if ($diff < 10) { printf "%d", $diff }
58 else { printf "0x%X", $diff }
60 print ";\n";
62 undef $trans[$_] foreach @codes;
63 return 1;
66 my $first = 1;
67 for (my $code = 0; $code <= $#trans; ++$code) {
68 next unless defined $trans[$code];
70 print $first ? "\t" : "\telse ";
71 gobble($code, 1) or gobble($code, 2) or gobble($code, 3) or gobble($code, 4)
72 or printf "if (c == 0x%X)\n\t\tc = 0x%X;\n", $code, $trans[$code];
73 $first = 0;
75 close STDOUT or die "$0: -: $!\n";
77 __END__
79 =head1 NAME
81 gen-case - Generate C source code for folding the case of a Unicode character.
83 =head1 SYNOPSIS
85 B<gen-case> CaseFolding.txt > ../src/intl/casefold.inc
87 =head1 DESCRIPTION
89 B<gen-case> reads F<CaseFolding.txt> of the Unicode Character Database
90 and generates C source code that implements the I<simple case folding>
91 as defined in that file.
93 The generated source code can then be used like this:
95 unicode_val_T
96 unicode_simple_case_fold(unicode_val_T c)
98 #include "casefold.inc"
99 return c;
102 =head1 BUGS
104 Does not support B<--help> nor B<--version>.
106 =head1 AUTHOR
108 Kalle Olavi Niemitalo <kon@iki.fi>
110 =head1 COPYRIGHT AND LICENSE
112 Copyright (c) 2006 Kalle Olavi Niemitalo.
114 Permission to use, copy, modify, and distribute this software for any
115 purpose with or without fee is hereby granted, provided that the above
116 copyright notice and this permission notice appear in all copies.
118 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
119 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
120 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
121 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
122 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
123 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
124 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.