Follow-on fix for bug 457825. Use sheet principal for agent and user sheets. r=dbaron...
[wine-gecko.git] / intl / unicharutil / tools / gencattable.pl
blobaa9857de32e179f92a4219f34456e8cf77cebaea
1 #!/usr/bin/perl
3 # ***** BEGIN LICENSE BLOCK *****
4 # Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 # The contents of this file are subject to the Mozilla Public License Version
7 # 1.1 (the "License"); you may not use this file except in compliance with
8 # the License. You may obtain a copy of the License at
9 # http://www.mozilla.org/MPL/
11 # Software distributed under the License is distributed on an "AS IS" basis,
12 # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 # for the specific language governing rights and limitations under the
14 # License.
16 # The Original Code is mozilla.org code.
18 # The Initial Developer of the Original Code is
19 # Netscape Communications Corporation.
20 # Portions created by the Initial Developer are Copyright (C) 1999
21 # the Initial Developer. All Rights Reserved.
23 # Contributor(s):
24 # Jungshik Shin <jshin@i18nl10n.com>
26 # Alternatively, the contents of this file may be used under the terms of
27 # either the GNU General Public License Version 2 or later (the "GPL"), or
28 # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 # in which case the provisions of the GPL or the LGPL are applicable instead
30 # of those above. If you wish to allow use of your version of this file only
31 # under the terms of either the GPL or the LGPL, and not to allow others to
32 # use your version of this file under the terms of the MPL, indicate your
33 # decision by deleting the provisions above and replace them with the notice
34 # and other provisions required by the GPL or the LGPL. If you do not delete
35 # the provisions above, a recipient may use your version of this file under
36 # the terms of any one of the MPL, the GPL or the LGPL.
38 # ***** END LICENSE BLOCK *****
40 ######################################################################
42 # Initial global variable
44 ######################################################################
46 %gcount = ();
47 %pat = ();
49 %map = (
50 "M" => "1",
51 "N" => "2",
52 "Z" => "3",
53 "C" => "4",
54 "L" => "5",
55 "P" => "6",
56 "S" => "7"
59 %special = ();
61 ######################################################################
63 # Open the unicode database file
65 ######################################################################
66 open ( UNICODATA , "< UnicodeData-Latest.txt")
67 || die "cannot find UnicodeData-Latest.txt";
69 ######################################################################
71 # Open the output file
73 ######################################################################
74 open ( OUT , "> ../src/cattable.h")
75 || die "cannot open output ../src/cattable.h file";
77 ######################################################################
79 # Generate license and header
81 ######################################################################
82 $mpl = <<END_OF_MPL;
83 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
84 /* ***** BEGIN LICENSE BLOCK *****
85 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
87 * The contents of this file are subject to the Mozilla Public License Version
88 * 1.1 (the "License"); you may not use this file except in compliance with
89 * the License. You may obtain a copy of the License at
90 * http://www.mozilla.org/MPL/
92 * Software distributed under the License is distributed on an "AS IS" basis,
93 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
94 * for the specific language governing rights and limitations under the
95 * License.
97 * The Original Code is mozilla.org code.
99 * The Initial Developer of the Original Code is
100 * Netscape Communications Corporation.
101 * Portions created by the Initial Developer are Copyright (C) 1999
102 * the Initial Developer. All Rights Reserved.
104 * Contributor(s):
106 * Alternatively, the contents of this file may be used under the terms of
107 * either the GNU General Public License Version 2 or later (the "GPL"), or
108 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
109 * in which case the provisions of the GPL or the LGPL are applicable instead
110 * of those above. If you wish to allow use of your version of this file only
111 * under the terms of either the GPL or the LGPL, and not to allow others to
112 * use your version of this file under the terms of the MPL, indicate your
113 * decision by deleting the provisions above and replace them with the notice
114 * and other provisions required by the GPL or the LGPL. If you do not delete
115 * the provisions above, a recipient may use your version of this file under
116 * the terms of any one of the MPL, the GPL or the LGPL.
118 * ***** END LICENSE BLOCK ***** */
120 DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY
121 mozilla/intl/unicharutil/tools/gencattable.pl
123 END_OF_MPL
125 print OUT $mpl;
127 print OUT "#include \"nscore.h\" \n\n";
130 %category = ();
131 %sh = ();
132 %sl = ();
133 %sc = ();
135 $prevcjkcomp = 0;
136 $cjkcompidx = 0;
138 ######################################################################
140 # Process the file line by line
142 ######################################################################
143 while(<UNICODATA>) {
144 chop;
145 ######################################################################
147 # Get value from fields
149 ######################################################################
150 @f = split(/;/ , $_);
151 $c = $f[0]; # The unicode value
152 $n = $f[1]; # The unicode name
153 $g = $f[2]; # The General Category
155 $cat = substr($g, 0, 1);
156 # All CJK Compatibility ideographs belong to Lo
157 if ($n =~ /^CJK COMPATIBILITY IDEOGRAPH/)
159 $catnum = $map{$cat};
160 if ($cat ne "L") {
161 print "WARNING !!!! " . "
162 error in handling CJK Compatibility Ideograph block\n\n";
164 if (hex($prevcjkcomp) + 1 != hex($c))
166 if (hex($prevcjkcomp) != 0)
168 $sh{$cjkcompkey} = $prevcjkcomp;
170 $cjkcompkey = sprintf("CJK Compatibility #%d", ++$cjkcompidx);
171 $sl{$cjkcompkey} = $c;
172 $sc{$cjkcompkey} = $catnum;
173 push @special, $cjkcompkey;
175 $prevcjkcomp = $c;
177 elsif(( substr($n, 0, 1) ne "<") || ($n eq "<control>"))
180 # print $g;
183 $gcount{$g}++;
184 $gcount{$cat}++;
185 $category{$c} = $cat;
186 # print $g . " = " . $gcount{$g} . "\n";
187 } else {
189 # Handle special block
190 @pair=split(/, /, $n );
191 $catnum = $map{$cat};
192 $pair[0] =~ s/^<//;
194 # printf "[%s][%s] => %d\n", $pair[0], $pair[1], $catnum;
195 if( $pair[1] eq "First>") {
196 $sl{$pair[0]} = $c;
197 $sc{$pair[0]} = $catnum;
198 push @special, $pair[0];
199 } elsif ( $pair[1] eq "Last>") {
200 $sh{$pair[0]} = $c;
201 if($sc{$pair[0]} ne $catnum)
203 print "WARNING !!!! error in handling special block\n\n";
205 } else {
206 print "WARNING !!!! error in handling special block\n\n";
211 # take care of the last CJK Compatibility block
212 $sh{$cjkcompkey} = $prevcjkcomp;
214 # @cats = keys(%gcount);
215 # foreach $cat ( sort(@cats) ) {
216 # $count = $gcount{$cat};
217 # print "$cat ==> $count\n";
221 # We treat characters < U+1D00 as "plane 0" and all the rest of planes 0 and 1
222 # as "plane 1". This gives a relatively even distribution of patterns between
223 # planes. If you change the value of $planeSplit, make sure that none of the
224 # ranges below straddles the new value!
225 $planeSplit = 0x1d00;
227 @range = (
228 0x0000, 0x07ff,
229 0x0900, 0x1b7f,
230 0x1d00, 0x33ff,
231 0x4dc0, 0x4dff,
232 0xa000, 0xa87f,
233 0xfb00, 0xffff,
234 0x10000, 0x104af,
235 0x10800, 0x1083f,
236 0x10900, 0x1091f,
237 0x10a00, 0x10a5f,
238 0x12000, 0x1247f,
239 0x1d000, 0x1d7ff
243 $totaldata = 0;
245 $tt=($#range+1) / 2;
246 @newidx = (0);
247 @patarray = ();
248 $oldplane = -1;
249 @planes = ();
251 for($t = 1; $t <= $tt; $t++)
253 $tl = $range[($t-1) * 2];
254 $th = $range[($t-1) * 2 + 1];
255 $ts = ( $th - $tl ) >> 3;
256 $totaldata += $ts + 1;
257 if ($planeSplit > $tl && $planeSplit < $th) {
258 printf STDERR "plane split %04X falls within range %04X - %04X\n",
259 $planeSplit, $tl, $th;
260 die "This program is now broken!!!\n\n\n";
262 if ($tl < $planeSplit) {
263 $plane = 0;
264 } else {
265 $plane = 1;
267 if ($oldplane != $plane) {
268 if ($oldplane != -1) {
269 printf STDERR "Plane %d has %d patterns\n", $oldplane, $newidx[$oldplane];
270 if ($newidx[$oldplane] > 256) {
271 printf STDERR "We have more than 256 patterns for plane %d\n", $oldplane;
272 die "This program is now broken!!!\n\n\n";
275 $newidx[$plane] = 0;
276 push @planes, $plane;
278 $oldplane = $plane;
280 printf OUT "static const PRUint8 gGenCatIdx%d[%d] = {\n", $t, $ts + 1;
281 for($i = ($tl >> 3); $i <= ($th >> 3) ; $i ++ )
283 $data = 0;
284 for($j = 0; $j < 8 ; $j++)
286 $k = sprintf("%04X", (($i << 3) + $j));
288 $cat = $category{$k};
289 if( $cat ne "")
291 $data = $data + ($map{$cat} << (4*$j));
294 $pattern = sprintf("0x%08X", $data);
297 $idx = $pat[$plane]{$pattern};
298 unless( exists($pat[$plane]{$pattern})){
299 $idx = $newidx[$plane]++;
300 $patarray[$plane][$idx] = $pattern;
301 $pat[$plane]{$pattern} = $idx;
304 printf OUT " %3d, // U+%06X - U+%06X : %s\n" ,
305 $idx, ($i << 3),((($i +1)<< 3)-1), $pattern ;
309 printf OUT "};\n\n";
311 if($t ne $tt)
313 $tl = $range[($t-1) * 2 + 1] + 1;
314 $th = $range[$t * 2] - 1;
315 for($i = ($tl >> 3); $i <= ($th >> 3) ; $i ++ )
317 $data = 0;
318 for($j = 0; $j < 8 ; $j++)
320 $k = sprintf("%04X", (($i << 3) + $j));
322 $cat = $category{$k};
323 if( $cat ne "")
325 $data = $data + ($map{$cat} << (4*$j));
328 $pattern = sprintf("0x%08X", $data);
329 if($data ne 0)
331 print "WARNING, Unicode Database now contain characters " .
332 "which we have not considered. change this program !!!\n\n";
333 printf "Problem- U+%06X - U+%06X range\n", ($i << 3),((($i +1)<< 3)-1);
339 printf STDERR "Plane %d has %d patterns\n", $plane, $newidx[$plane];
340 if ($newidx[$plane] > 256) {
341 printf STDERR "We have more than 256 patterns for plane %d\n", $plane;
342 die "This program is now broken!!!\n\n\n";
345 for $plane (@planes) {
346 printf OUT "static const PRUint32 gGenCatPatPl%d[$newidx] = {\n", $plane;
347 for($i = 0 ; $i < $newidx[$plane]; $i++)
349 printf OUT " %s, // $i \n", $patarray[$plane][$i] ;
351 printf OUT "}; \n\n";
352 $totaldata += $newidx[$plane] * 4;
355 printf OUT "static PRUint8 GetCat(PRUint32 u)\n{\n";
356 printf OUT " PRUint32 pat;\n";
357 printf OUT " //\n";
358 printf OUT " // Handle block which use index table mapping \n";
359 printf OUT " //\n";
360 for($t = 1; $t <= $tt; $t++)
362 $tl = $range[($t-1) * 2];
363 $th = $range[($t-1) * 2 + 1];
364 if ($tl < $planeSplit) {
365 $plane = 0;
366 } else {
367 $plane = 1;
369 printf OUT " // Handle U+%06X to U+%06X\n", $tl, $th;
370 printf OUT " if(0x%06X <= u && u <= 0x%06X) {\n", $tl, $th;
371 printf OUT " pat = " .
372 "gGenCatPatPl%d[gGenCatIdx%d [( u - 0x%06X ) / 8]];\n",
373 $plane, $t, $tl;
374 printf OUT " return (pat >> ((u % 8) * 4)) & 0x0F;\n";
375 printf OUT " }\n\n";
378 printf OUT " //\n";
379 printf OUT " // Handle blocks which share the same category \n";
380 printf OUT " //\n";
383 #@special = keys(%sh);
384 foreach $s ( @special ) {
385 printf OUT " // Handle %s block \n", $s;
386 printf OUT " if(0x%s <= u && u <= 0x%s) \n", $sl{$s}, $sh{$s};
387 printf OUT " return $sc{$s}; \n\n";
392 printf OUT " return 0; // UNDEFINE \n}\n";
394 printf OUT "// total data size = $totaldata\n";
395 #$total = 0;
396 #@pats = keys(%pat);
397 #foreach $pattern ( sort(@pats) ) {
398 # $count = $pat{$pattern};
399 # # print "$cat ==> $count\n";
400 # $total++;
402 print "total = $totaldata\n";
404 ######################################################################
406 # Close files
408 ######################################################################
409 close(UNIDATA);
410 close(OUT);