Bug 468575 - Scrape some gunk off the config/ grout, r=ted
[wine-gecko.git] / intl / unicharutil / tools / gencasetable.pl
blobd87a6ca8e21d039b0d8f1fdf8458f03455a94d7d
1 #!/usr/bin/perl
3 # ***** BEGIN LICENSE BLOCK *****
4 # Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 # The contents of this file are subject to the Mozilla Public License Version
7 # 1.1 (the "License"); you may not use this file except in compliance with
8 # the License. You may obtain a copy of the License at
9 # http://www.mozilla.org/MPL/
11 # Software distributed under the License is distributed on an "AS IS" basis,
12 # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 # for the specific language governing rights and limitations under the
14 # License.
16 # The Original Code is mozilla.org code.
18 # The Initial Developer of the Original Code is
19 # Netscape Communications Corporation.
20 # Portions created by the Initial Developer are Copyright (C) 1999
21 # the Initial Developer. All Rights Reserved.
23 # Contributor(s):
25 # Alternatively, the contents of this file may be used under the terms of
26 # either the GNU General Public License Version 2 or later (the "GPL"), or
27 # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 # in which case the provisions of the GPL or the LGPL are applicable instead
29 # of those above. If you wish to allow use of your version of this file only
30 # under the terms of either the GPL or the LGPL, and not to allow others to
31 # use your version of this file under the terms of the MPL, indicate your
32 # decision by deleting the provisions above and replace them with the notice
33 # and other provisions required by the GPL or the LGPL. If you do not delete
34 # the provisions above, a recipient may use your version of this file under
35 # the terms of any one of the MPL, the GPL or the LGPL.
37 # ***** END LICENSE BLOCK *****
39 # If you run this script because the Unicode standard has been updated,
40 # check xpcom/string/public/nsCharTraits.h to see whether
41 # nsCharTraits<PRUnichar>::ASCIIToLower needs to be updated. It only
42 # needs to be update if the Unicode consortium adds (or removes)
43 # a Unicode character whose lowercase form is an ASCII character.
44 # Currently there are only two such characters: KELVIN SIGN and
45 # LATIN CAPITAL LETTER I WITH DOT ABOVE.
47 ######################################################################
49 # Initial global variable
51 ######################################################################
52 %utot = ();
53 $ui=0;
54 $li=0;
56 ######################################################################
58 # Open the unicode database file
60 ######################################################################
61 open ( UNICODATA , "< UnicodeData-Latest.txt")
62 || die "cannot find UnicodeData-Latest.txt";
64 ######################################################################
66 # Open the output file
68 ######################################################################
69 open ( OUT , "> ../src/casetable.h")
70 || die "cannot open output ../src/casetable.h file";
72 ######################################################################
74 # Generate license and header
76 ######################################################################
77 $npl = <<END_OF_NPL;
78 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
79 /* ***** BEGIN LICENSE BLOCK *****
80 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
82 * The contents of this file are subject to the Mozilla Public License Version
83 * 1.1 (the "License"); you may not use this file except in compliance with
84 * the License. You may obtain a copy of the License at
85 * http://www.mozilla.org/MPL/
87 * Software distributed under the License is distributed on an "AS IS" basis,
88 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
89 * for the specific language governing rights and limitations under the
90 * License.
92 * The Original Code is mozilla.org code.
94 * The Initial Developer of the Original Code is
95 * Netscape Communications Corporation.
96 * Portions created by the Initial Developer are Copyright (C) 1999
97 * the Initial Developer. All Rights Reserved.
99 * Contributor(s):
101 * Alternatively, the contents of this file may be used under the terms of
102 * either the GNU General Public License Version 2 or later (the "GPL"), or
103 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
104 * in which case the provisions of the GPL or the LGPL are applicable instead
105 * of those above. If you wish to allow use of your version of this file only
106 * under the terms of either the GPL or the LGPL, and not to allow others to
107 * use your version of this file under the terms of the MPL, indicate your
108 * decision by deleting the provisions above and replace them with the notice
109 * and other provisions required by the GPL or the LGPL. If you do not delete
110 * the provisions above, a recipient may use your version of this file under
111 * the terms of any one of the MPL, the GPL or the LGPL.
113 * ***** END LICENSE BLOCK ***** */
115 DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY
116 mozilla/intl/unicharutil/tools/gencasetable.pl
118 END_OF_NPL
119 print OUT $npl;
121 print OUT "#include \"nscore.h\" \n\n";
123 ######################################################################
125 # Process the file line by line
127 ######################################################################
128 while(<UNICODATA>) {
129 chop;
130 ######################################################################
132 # Get value from fields
134 ######################################################################
135 @f = split(/;/ , $_);
136 $c = $f[0]; # The unicode value
137 $u = $f[12]; # The upper case
138 $l = $f[13]; # The lower case
139 $t = $f[14]; # The title case
142 # print $c . " | " . $u . " | " . $l . " | " . $t . "\n";
145 ######################################################################
147 # Process title case for this entry
149 ######################################################################
151 # if upper case is not equal to title case , store into
152 # %utot hash
155 if(($t ne "") && ($u ne "") && ( $u ne $t )) {
157 # print $c . " | " . $u . " | " . $l . " | " . $t . "\n";
159 $utot{$u} = $t;
162 $cv = hex($c); # convert the Unicode value into integer
164 ######################################################################
166 # Process upper case for this entry
168 ######################################################################
169 if( $u ne "") { # if upper case exist
170 $uvalue = hex($u); # convert the upper case value into integer
172 ######################################################################
173 # store related information into arrays
174 # @ucv - unicode value
175 # @uv - upper case value (debug only)
176 # @ud - difference between unicode and upper case
177 # @ulastd - difference between unicode and last unicode in the entry
178 ######################################################################
180 $ucv[$ui] = $cv;
181 $uv[$ui] = $uvalue;
182 $ud[$ui] = 0x0000FFFF & ($uvalue - $cv);
184 if( $ui ne 0) {
185 $ulastd[$ui] = $cv - $ucv[$ui-1];
187 $ui++;
190 ######################################################################
192 # Process lower case for this entry
194 ######################################################################
195 if( $l ne "") { # if lower case exist
196 $lvalue = hex($l); # convert the lower case value into integer
198 ######################################################################
199 # store related information into arrays
200 # @lcv - unicode value
201 # @lv - lower case value (debug only)
202 # @ld - difference between unicode and lower case
203 # @llastd - difference between unicode and last unicode in the entry
204 ######################################################################
206 $lcv[$li] = $cv;
207 $lv[$li] = $lvalue;
208 $ld[$li] = 0x0000FFFF & ($lvalue - $cv);
210 if( $li ne 0) {
211 $llastd[$li] = $cv - $lcv[$li-1];
213 $li++;
215 if(( $l ne "") || ($u ne "")) { # if lower or upper case exist
216 $idx = $cv >> 13;
217 $bits = 1 << (($cv >> 8) & 0x1F) ;
218 $blk[$idx] |= $bits;
224 ######################################################################
226 # Print out all the tables
228 ######################################################################
230 ######################################################################
232 # Print out upper to title case mapping
234 ######################################################################
236 $ttotal = 0;
237 print OUT "static const PRUnichar gUpperToTitle[] = { \n";
238 while(($upper, $title) = each(%utot)) {
239 print OUT " 0x" . $upper . ", 0x" . $utot{$upper} . ", \n";
240 $ttotal++;
242 print OUT "};\n\n";
243 print OUT "static const PRUint32 gUpperToTitleItems = $ttotal;\n\n";
245 ######################################################################
247 # Print out gToUpper table
249 ######################################################################
250 print OUT "static const PRUint16 gToUpper[] = \n";
251 print OUT "{ /* From To Every Diff */ \n";
252 $utotal=0;
253 $ufrom = 0; # remember the start of the output item
254 for ($i = 0; $i <= $#ucv; $i++)
256 if(0 eq $i) {
257 ######################################################################
259 # Print the first item in the array
261 ######################################################################
262 $ufrom = $ucv[0];
263 printf OUT " 0x%04x, " , $ucv[0];
264 } else {
265 ######################################################################
267 # Print all the item except the first and last one
268 # only print if the upper case difference is different from the
269 # and the difference between last entry changed
271 ######################################################################
272 if(($ud[$i] ne $ud[$i-1]) ||
273 (($ufrom ne $ucv[$i-1]) && ($ulastd[$i] ne $ulastd[$i-1]))) {
275 $every = 0;
276 if($ufrom ne $ucv[$i-1])
278 $every = $ulastd[$i-1];
281 printf OUT "((0x%02x << 8) | 0x%02x), 0x%04x ,\n",
282 ($ucv[$i-1] - $ufrom), $every, $ud[$i-1];
284 if((($ucv[$i-1] - $ufrom) > 255) || ($every > 255)) {
285 print "WARNING!!! cannot handle block > 255 chars (Upper)\n\n";
286 printf "0x%04X, 0x%04x, 0x%04x), 0x%04x \n",
287 $ufrom, $ucv[$i-1], $every, $ud[$i-1];
290 $ufrom = $ucv[$i]; # update the start of the item
291 printf OUT " 0x%04x, " , $ufrom;
292 $utotal++;
295 if( $i eq $#ucv) {
296 ######################################################################
298 # Print the last item in the array
300 ######################################################################
301 printf OUT "((0x%02x << 8) | 0x%02x), 0x%04x \n};\n\n",
302 ($ucv[$i] - $ufrom), $ulastd[$i], $ud[$i];
303 $utotal++;
304 print OUT "static const PRUint32 gToUpperItems = $utotal;\n\n";
307 # printf "%4x - %4x - %4x - %4x\n", $ucv[$i], $uv[$i], $ud[$i], $ulastd[$i];
311 ######################################################################
313 # Print out gToLower table
315 ######################################################################
316 print OUT "static const PRUint16 gToLower[] = \n";
317 print OUT "{ /* From To Every Diff */ \n";
318 $ltotal=0;
319 $lfrom = 0; # remember the start of the output item
320 for ($i = 0; $i <= $#lcv; $i++)
322 if(0 eq $i) {
323 ######################################################################
325 # Print the first item in the array
327 ######################################################################
328 $lfrom = $lcv[0];
329 printf OUT " 0x%04x, " , $lcv[0];
330 } else {
331 ######################################################################
333 # Print all the item except the first and last one
334 # only print if the lower case difference is different from the
335 # and the difference between last entry changed
337 ######################################################################
338 if(($ld[$i] ne $ld[$i-1]) ||
339 (($lfrom ne $lcv[$i-1]) && ($llastd[$i] ne $llastd[$i-1]))) {
341 $every = 0;
342 if($lfrom ne $lcv[$i-1])
344 $every = $llastd[$i-1];
347 printf OUT "((0x%02x << 8) | 0x%02x), 0x%04x ,\n",
348 ($lcv[$i-1] - $lfrom) , $every, $ld[$i-1];
350 if((($lcv[$i-1] - $lfrom) > 255) || ($every > 255)) {
351 print "WARNING!!! cannot handle block > 255 chars (Lower)\n\n";
352 printf "0x%04X, 0x%04x, 0x%04x, 0x%04x \n",
353 $lfrom, $lcv[$i-1], $every, $ld[$i-1];
355 $lfrom = $lcv[$i]; # update the start of the item
356 printf OUT " 0x%04x, " , $lfrom;
357 $ltotal++;
360 if( $i eq $#lcv) {
361 ######################################################################
363 # Print the last item in the array
365 ######################################################################
366 printf OUT "((0x%02x << 8) | 0x%02x), 0x%04x \n};\n\n",
367 ($lcv[$i] - $lfrom), $llastd[$i], $ld[$i];
368 $ltotal++;
369 print OUT "static const PRUint32 gToLowerItems = $ltotal;\n\n";
372 # printf "%4x - %4x - %4x - %4x\n", $lcv[$i], $lv[$i], $ld[$i], $llastd[$i];
376 print OUT "static const PRUint32 gCaseBlocks [8] = {\n";
377 for($idx=0;$idx<8;$idx++)
379 printf OUT "0x%08X", $blk[$idx];
380 if($idx != 7) {
381 printf OUT ",\n";
382 } else {
383 printf OUT "\n";
386 print OUT "};\n";
389 ######################################################################
391 # Close files
393 ######################################################################
394 close(UNIDATA);
395 close(OUT);