3 # ***** BEGIN LICENSE BLOCK *****
4 # Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 # The contents of this file are subject to the Mozilla Public License Version
7 # 1.1 (the "License"); you may not use this file except in compliance with
8 # the License. You may obtain a copy of the License at
9 # http://www.mozilla.org/MPL/
11 # Software distributed under the License is distributed on an "AS IS" basis,
12 # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 # for the specific language governing rights and limitations under the
16 # The Original Code is mozilla.org code.
18 # The Initial Developer of the Original Code is
19 # Netscape Communications Corporation.
20 # Portions created by the Initial Developer are Copyright (C) 1999
21 # the Initial Developer. All Rights Reserved.
24 # Jungshik Shin <jshin@i18nl10n.com>
26 # Alternatively, the contents of this file may be used under the terms of
27 # either the GNU General Public License Version 2 or later (the "GPL"), or
28 # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 # in which case the provisions of the GPL or the LGPL are applicable instead
30 # of those above. If you wish to allow use of your version of this file only
31 # under the terms of either the GPL or the LGPL, and not to allow others to
32 # use your version of this file under the terms of the MPL, indicate your
33 # decision by deleting the provisions above and replace them with the notice
34 # and other provisions required by the GPL or the LGPL. If you do not delete
35 # the provisions above, a recipient may use your version of this file under
36 # the terms of any one of the MPL, the GPL or the LGPL.
38 # ***** END LICENSE BLOCK *****
40 ######################################################################
42 # Initial global variable
44 ######################################################################
61 ######################################################################
63 # Open the unicode database file
65 ######################################################################
66 open ( UNICODATA
, "< UnicodeData-Latest.txt")
67 || die "cannot find UnicodeData-Latest.txt";
69 ######################################################################
71 # Open the output file
73 ######################################################################
74 open ( OUT
, "> ../src/cattable.h")
75 || die "cannot open output ../src/cattable.h file";
77 ######################################################################
79 # Generate license and header
81 ######################################################################
83 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
84 /* ***** BEGIN LICENSE BLOCK *****
85 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
87 * The contents of this file are subject to the Mozilla Public License Version
88 * 1.1 (the "License"); you may not use this file except in compliance with
89 * the License. You may obtain a copy of the License at
90 * http://www.mozilla.org/MPL/
92 * Software distributed under the License is distributed on an "AS IS" basis,
93 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
94 * for the specific language governing rights and limitations under the
97 * The Original Code is mozilla.org code.
99 * The Initial Developer of the Original Code is
100 * Netscape Communications Corporation.
101 * Portions created by the Initial Developer are Copyright (C) 1999
102 * the Initial Developer. All Rights Reserved.
106 * Alternatively, the contents of this file may be used under the terms of
107 * either the GNU General Public License Version 2 or later (the "GPL"), or
108 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
109 * in which case the provisions of the GPL or the LGPL are applicable instead
110 * of those above. If you wish to allow use of your version of this file only
111 * under the terms of either the GPL or the LGPL, and not to allow others to
112 * use your version of this file under the terms of the MPL, indicate your
113 * decision by deleting the provisions above and replace them with the notice
114 * and other provisions required by the GPL or the LGPL. If you do not delete
115 * the provisions above, a recipient may use your version of this file under
116 * the terms of any one of the MPL, the GPL or the LGPL.
118 * ***** END LICENSE BLOCK
***** */
120 DO NOT EDIT THIS DOCUMENT
!!! THIS DOCUMENT IS GENERATED BY
121 mozilla
/intl/unicharutil
/tools/gencattable
.pl
127 print OUT
"#include \"nscore.h\" \n\n";
138 ######################################################################
140 # Process the file line by line
142 ######################################################################
145 ######################################################################
147 # Get value from fields
149 ######################################################################
150 @f = split(/;/ , $_);
151 $c = $f[0]; # The unicode value
152 $n = $f[1]; # The unicode name
153 $g = $f[2]; # The General Category
155 $cat = substr($g, 0, 1);
156 # All CJK Compatibility ideographs belong to Lo
157 if ($n =~ /^CJK COMPATIBILITY IDEOGRAPH/)
159 $catnum = $map{$cat};
161 print "WARNING !!!! " . "
162 error in handling CJK Compatibility Ideograph block\n\n";
164 if (hex($prevcjkcomp) + 1 != hex($c))
166 if (hex($prevcjkcomp) != 0)
168 $sh{$cjkcompkey} = $prevcjkcomp;
170 $cjkcompkey = sprintf("CJK Compatibility #%d", ++$cjkcompidx);
171 $sl{$cjkcompkey} = $c;
172 $sc{$cjkcompkey} = $catnum;
173 push @special, $cjkcompkey;
177 elsif(( substr($n, 0, 1) ne "<") || ($n eq "<control>"))
185 $category{$c} = $cat;
186 # print $g . " = " . $gcount{$g} . "\n";
189 # Handle special block
190 @pair=split(/, /, $n );
191 $catnum = $map{$cat};
194 # printf "[%s][%s] => %d\n", $pair[0], $pair[1], $catnum;
195 if( $pair[1] eq "First>") {
197 $sc{$pair[0]} = $catnum;
198 push @special, $pair[0];
199 } elsif ( $pair[1] eq "Last>") {
201 if($sc{$pair[0]} ne $catnum)
203 print "WARNING !!!! error in handling special block\n\n";
206 print "WARNING !!!! error in handling special block\n\n";
211 # take care of the last CJK Compatibility block
212 $sh{$cjkcompkey} = $prevcjkcomp;
214 # @cats = keys(%gcount);
215 # foreach $cat ( sort(@cats) ) {
216 # $count = $gcount{$cat};
217 # print "$cat ==> $count\n";
221 # We treat characters < U+1D00 as "plane 0" and all the rest of planes 0 and 1
222 # as "plane 1". This gives a relatively even distribution of patterns between
223 # planes. If you change the value of $planeSplit, make sure that none of the
224 # ranges below straddles the new value!
225 $planeSplit = 0x1d00;
251 for($t = 1; $t <= $tt; $t++)
253 $tl = $range[($t-1) * 2];
254 $th = $range[($t-1) * 2 + 1];
255 $ts = ( $th - $tl ) >> 3;
256 $totaldata += $ts + 1;
257 if ($planeSplit > $tl && $planeSplit < $th) {
258 printf STDERR
"plane split %04X falls within range %04X - %04X\n",
259 $planeSplit, $tl, $th;
260 die "This program is now broken!!!\n\n\n";
262 if ($tl < $planeSplit) {
267 if ($oldplane != $plane) {
268 if ($oldplane != -1) {
269 printf STDERR
"Plane %d has %d patterns\n", $oldplane, $newidx[$oldplane];
270 if ($newidx[$oldplane] > 256) {
271 printf STDERR
"We have more than 256 patterns for plane %d\n", $oldplane;
272 die "This program is now broken!!!\n\n\n";
276 push @planes, $plane;
280 printf OUT
"static const PRUint8 gGenCatIdx%d[%d] = {\n", $t, $ts + 1;
281 for($i = ($tl >> 3); $i <= ($th >> 3) ; $i ++ )
284 for($j = 0; $j < 8 ; $j++)
286 $k = sprintf("%04X", (($i << 3) + $j));
288 $cat = $category{$k};
291 $data = $data + ($map{$cat} << (4*$j));
294 $pattern = sprintf("0x%08X", $data);
297 $idx = $pat[$plane]{$pattern};
298 unless( exists($pat[$plane]{$pattern})){
299 $idx = $newidx[$plane]++;
300 $patarray[$plane][$idx] = $pattern;
301 $pat[$plane]{$pattern} = $idx;
304 printf OUT
" %3d, // U+%06X - U+%06X : %s\n" ,
305 $idx, ($i << 3),((($i +1)<< 3)-1), $pattern ;
313 $tl = $range[($t-1) * 2 + 1] + 1;
314 $th = $range[$t * 2] - 1;
315 for($i = ($tl >> 3); $i <= ($th >> 3) ; $i ++ )
318 for($j = 0; $j < 8 ; $j++)
320 $k = sprintf("%04X", (($i << 3) + $j));
322 $cat = $category{$k};
325 $data = $data + ($map{$cat} << (4*$j));
328 $pattern = sprintf("0x%08X", $data);
331 print "WARNING, Unicode Database now contain characters " .
332 "which we have not considered. change this program !!!\n\n";
333 printf "Problem- U+%06X - U+%06X range\n", ($i << 3),((($i +1)<< 3)-1);
339 printf STDERR
"Plane %d has %d patterns\n", $plane, $newidx[$plane];
340 if ($newidx[$plane] > 256) {
341 printf STDERR
"We have more than 256 patterns for plane %d\n", $plane;
342 die "This program is now broken!!!\n\n\n";
345 for $plane (@planes) {
346 printf OUT
"static const PRUint32 gGenCatPatPl%d[$newidx] = {\n", $plane;
347 for($i = 0 ; $i < $newidx[$plane]; $i++)
349 printf OUT
" %s, // $i \n", $patarray[$plane][$i] ;
351 printf OUT
"}; \n\n";
352 $totaldata += $newidx[$plane] * 4;
355 printf OUT
"static PRUint8 GetCat(PRUint32 u)\n{\n";
356 printf OUT
" PRUint32 pat;\n";
358 printf OUT
" // Handle block which use index table mapping \n";
360 for($t = 1; $t <= $tt; $t++)
362 $tl = $range[($t-1) * 2];
363 $th = $range[($t-1) * 2 + 1];
364 if ($tl < $planeSplit) {
369 printf OUT
" // Handle U+%06X to U+%06X\n", $tl, $th;
370 printf OUT
" if(0x%06X <= u && u <= 0x%06X) {\n", $tl, $th;
371 printf OUT
" pat = " .
372 "gGenCatPatPl%d[gGenCatIdx%d [( u - 0x%06X ) / 8]];\n",
374 printf OUT
" return (pat >> ((u % 8) * 4)) & 0x0F;\n";
379 printf OUT
" // Handle blocks which share the same category \n";
383 #@special = keys(%sh);
384 foreach $s ( @special ) {
385 printf OUT
" // Handle %s block \n", $s;
386 printf OUT
" if(0x%s <= u && u <= 0x%s) \n", $sl{$s}, $sh{$s};
387 printf OUT
" return $sc{$s}; \n\n";
392 printf OUT
" return 0; // UNDEFINE \n}\n";
394 printf OUT
"// total data size = $totaldata\n";
397 #foreach $pattern ( sort(@pats) ) {
398 # $count = $pat{$pattern};
399 # # print "$cat ==> $count\n";
402 print "total = $totaldata\n";
404 ######################################################################
408 ######################################################################