Bug 468575 - Scrape some gunk off the config/ grout, r=ted
[wine-gecko.git] / intl / lwbrk / tools / anzx4501.pl
blobfa4d0349046d9076b866a97ff1c795e5240a3bb4
1 #!/usr/bin/perl
3 # ***** BEGIN LICENSE BLOCK *****
4 # Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 # The contents of this file are subject to the Mozilla Public License Version
7 # 1.1 (the "License"); you may not use this file except in compliance with
8 # the License. You may obtain a copy of the License at
9 # http://www.mozilla.org/MPL/
11 # Software distributed under the License is distributed on an "AS IS" basis,
12 # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 # for the specific language governing rights and limitations under the
14 # License.
16 # The Original Code is mozilla.org code.
18 # The Initial Developer of the Original Code is
19 # Netscape Communications Corporation.
20 # Portions created by the Initial Developer are Copyright (C) 1999
21 # the Initial Developer. All Rights Reserved.
23 # Contributor(s):
25 # Alternatively, the contents of this file may be used under the terms of
26 # either the GNU General Public License Version 2 or later (the "GPL"), or
27 # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 # in which case the provisions of the GPL or the LGPL are applicable instead
29 # of those above. If you wish to allow use of your version of this file only
30 # under the terms of either the GPL or the LGPL, and not to allow others to
31 # use your version of this file under the terms of the MPL, indicate your
32 # decision by deleting the provisions above and replace them with the notice
33 # and other provisions required by the GPL or the LGPL. If you do not delete
34 # the provisions above, a recipient may use your version of this file under
35 # the terms of any one of the MPL, the GPL or the LGPL.
37 # ***** END LICENSE BLOCK *****
39 ######################################################################
41 # Initial global variable
43 ######################################################################
44 %utot = ();
45 $ui=0;
46 $li=0;
48 ######################################################################
50 # Open the unicode database file
52 ######################################################################
53 open ( UNICODATA , "< ../../unicharutil/tools/UnicodeData-Latest.txt")
54 || die "cannot find UnicodeData-Latest.txt";
56 ######################################################################
58 # Open the JIS X 4051 Class file
60 ######################################################################
61 open ( CLASS , "< jisx4501class.txt")
62 || die "cannot find jisx4501class.txt";
64 ######################################################################
66 # Open the JIS X 4051 Class simplified mapping
68 ######################################################################
69 open ( SIMP , "< jisx4501simp.txt")
70 || die "cannot find jisx4501simp.txt";
72 ######################################################################
74 # Open the output file
76 ######################################################################
77 open ( OUT , "> anzx4501.html")
78 || die "cannot open output anzx4501.html file";
80 ######################################################################
82 # Open the output file
84 ######################################################################
85 open ( HEADER , "> ../src/jisx4501class.h")
86 || die "cannot open output ../src/jisx4501class.h file";
88 ######################################################################
90 # Generate license and header
92 ######################################################################
93 $hthmlheader = <<END_OF_HTML;
94 <HTML>
95 <HEAD>
96 <TITLE>
97 Analysis of JIS X 4051 to Unicode General Category Mapping
98 </TITLE>
99 </HEAD>
100 <BODY>
101 <H1>
102 Analysis of JIS X 4051 to Unicode General Category Mapping
103 </H1>
104 END_OF_HTML
105 print OUT $hthmlheader;
107 ######################################################################
109 # Generate license and header
111 ######################################################################
112 $npl = <<END_OF_NPL;
113 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
114 /* ***** BEGIN LICENSE BLOCK *****
115 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
117 * The contents of this file are subject to the Mozilla Public License Version
118 * 1.1 (the "License"); you may not use this file except in compliance with
119 * the License. You may obtain a copy of the License at
120 * http://www.mozilla.org/MPL/
122 * Software distributed under the License is distributed on an "AS IS" basis,
123 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
124 * for the specific language governing rights and limitations under the
125 * License.
127 * The Original Code is mozilla.org code.
129 * The Initial Developer of the Original Code is
130 * Netscape Communications Corporation.
131 * Portions created by the Initial Developer are Copyright (C) 1999
132 * the Initial Developer. All Rights Reserved.
134 * Contributor(s):
136 * Alternatively, the contents of this file may be used under the terms of
137 * either the GNU General Public License Version 2 or later (the "GPL"), or
138 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
139 * in which case the provisions of the GPL or the LGPL are applicable instead
140 * of those above. If you wish to allow use of your version of this file only
141 * under the terms of either the GPL or the LGPL, and not to allow others to
142 * use your version of this file under the terms of the MPL, indicate your
143 * decision by deleting the provisions above and replace them with the notice
144 * and other provisions required by the GPL or the LGPL. If you do not delete
145 * the provisions above, a recipient may use your version of this file under
146 * the terms of any one of the MPL, the GPL or the LGPL.
148 * ***** END LICENSE BLOCK ***** */
150 DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY
151 mozilla/intl/lwbrk/tools/anzx4501.pl
153 END_OF_NPL
154 print HEADER $npl;
156 %occ = ();
157 %gcat = ();
158 %dcat = ();
159 %simp = ();
160 %gcount = ();
161 %dcount = ();
162 %sccount = ();
163 %rangecount = ();
165 ######################################################################
167 # Process the file line by line
169 ######################################################################
170 while(<UNICODATA>) {
171 chop;
172 ######################################################################
174 # Get value from fields
176 ######################################################################
177 @f = split(/;/ , $_);
178 $c = $f[0]; # The unicode value
179 $g = $f[2];
180 $d = substr($g, 0, 1);
182 $gcat{$c} = $g;
183 $dcat{$c} = $d;
184 $gcount{$g}++;
185 $dcount{$d}++;
187 close(UNIDATA);
189 while(<SIMP>) {
190 chop;
191 ######################################################################
193 # Get value from fields
195 ######################################################################
196 @f = split(/;/ , $_);
198 $simp{$f[0]} = $f[1];
199 $sccount{$f[1]}++;
201 close(SIMP);
203 sub GetClass{
204 my ($u) = @_;
205 my $hex = DecToHex($u);
206 $g = $gcat{$hex};
207 if($g ne "") {
208 return $g;
209 } elsif (( 0x3400 <= $u) && ( $u <= 0x9fa5 ) ) {
210 return "Han";
211 } elsif (( 0xac00 <= $u) && ( $u <= 0xd7a3 ) ) {
212 return "Lo";
213 } elsif (( 0xd800 <= $u) && ( $u <= 0xdb7f ) ) {
214 return "Cs";
215 } elsif (( 0xdb80 <= $u) && ( $u <= 0xdbff ) ) {
216 return "Cs";
217 } elsif (( 0xdc00 <= $u) && ( $u <= 0xdfff ) ) {
218 return "Cs";
219 } elsif (( 0xe000 <= $u) && ( $u <= 0xf8ff ) ) {
220 return "Co";
221 } else {
222 printf "WARNING !!!! Cannot find General Category for U+%s \n" , $hex;
225 sub GetDClass{
226 my ($u) = @_;
227 my $hex = DecToHex($u);
228 $g = $dcat{$hex};
229 if($g ne "") {
230 return $g;
231 } elsif (( 0x3400 <= $u) && ( $u <= 0x9fa5 ) ) {
232 return "Han";
233 } elsif (( 0xac00 <= $u) && ( $u <= 0xd7a3 ) ) {
234 return "L";
235 } elsif (( 0xd800 <= $u) && ( $u <= 0xdb7f ) ) {
236 return "C";
237 } elsif (( 0xdb80 <= $u) && ( $u <= 0xdbff ) ) {
238 return "C";
239 } elsif (( 0xdc00 <= $u) && ( $u <= 0xdfff ) ) {
240 return "C";
241 } elsif (( 0xe000 <= $u) && ( $u <= 0xf8ff ) ) {
242 return "C";
243 } else {
244 printf "WARNING !!!! Cannot find Detailed General Category for U+%s \n" , $hex;
247 sub DecToHex{
248 my ($d) = @_;
249 return sprintf("%04X", $d);
251 %gtotal = ();
252 %dtotal = ();
253 while(<CLASS>) {
254 chop;
255 ######################################################################
257 # Get value from fields
259 ######################################################################
260 @f = split(/;/ , $_);
262 if( substr($f[2], 0, 1) ne "a")
264 $sc = $simp{$f[2]};
265 $l = hex($f[0]);
266 if($f[1] eq "")
268 $h = $l;
269 } else {
270 $h = hex($f[1]);
272 for($k = $l; $k <= $h ; $k++)
274 if( exists($occ{$k}))
276 # printf "WARNING !! Conflict defination!!! U+%s -> [%s] [%s | %s]\n",
277 # DecToHex($k), $occ{$k} , $f[2] , $sc;
279 else
281 $occ{$k} = $sc . " | " . $f[2];
282 $gclass = GetClass($k);
283 $dclass = GetDClass($k);
284 $gtotal{$sc . $gclass}++;
285 $dtotal{$sc . $dclass}++;
286 $u = DecToHex($k);
287 $rk = " " . substr($u,0,2) . ":" . $sc;
288 $rangecount{$rk}++;
294 #print %gtotal;
295 #print %dtotal;
297 sub printreport
299 print OUT "<TABLE BORDER=3>\n";
300 print OUT "<TR BGCOLOR=blue><TH><TH>\n";
302 foreach $d (sort(keys %dcount)) {
303 print OUT "<TD BGCOLOR=red>$d</TD>\n";
306 print OUT "<TD BGCOLOR=white>Total</TD>\n";
307 foreach $g (sort(keys %gcount)) {
308 print OUT "<TD BGCOLOR=yellow>$g</TD>\n";
310 print OUT "</TR>\n";
311 foreach $sc (sort(keys %sccount)) {
313 print OUT "<TR><TH>$sc<TH>\n";
315 $total = 0;
316 foreach $d (sort (keys %dcount)) {
317 $count = $dtotal{$sc . $d};
318 $total += $count;
319 print OUT "<TD>$count</TD>\n";
322 print OUT "<TD BGCOLOR=white>$total</TD>\n";
324 foreach $g (sort(keys %gcount)) {
325 $count = $gtotal{$sc . $g};
326 print OUT "<TD>$count</TD>\n";
330 print OUT "</TR>\n";
332 print OUT "</TABLE>\n";
335 print OUT "<TABLE BORDER=3>\n";
336 print OUT "<TR BGCOLOR=blue><TH><TH>\n";
338 foreach $sc (sort(keys %sccount))
340 print OUT "<TD BGCOLOR=red>$sc</TD>\n";
343 print OUT "</TR>\n";
346 for($rr = 0; $rr < 0x4f; $rr++)
348 $empty = 0;
349 $r = sprintf("%02X" , $rr) ;
350 $tmp = "<TR><TH>" . $r . "<TH>\n";
352 foreach $sc (sort(keys %sccount)) {
353 $count = $rangecount{ " " .$r . ":" .$sc};
354 $tmp .= sprintf("<TD>%s</TD>\n", $count);
355 $empty += $count;
358 $tmp .= "</TR>\n";
360 if($empty ne 0)
362 print OUT $tmp;
365 print OUT "</TABLE>\n";
368 printreport();
370 sub printarray
372 my($r, $def) = @_;
373 printf "[%s || %s]\n", $r, $def;
374 $k = hex($r) * 256;
375 printf HEADER "static const PRUint32 gLBClass%s[32] = {\n", $r;
376 for($i = 0 ; $i < 256; $i+= 8)
378 for($j = 7 ; $j >= 0; $j-- )
380 $v = $k + $i + $j;
381 if( exists($occ{$v}))
383 $p = substr($occ{$v}, 1,1);
384 } else {
385 $p = $def;
388 if($j eq 7 )
390 printf HEADER "0x%s" , $p;
391 } else {
392 printf HEADER "%s", $p ;
395 printf HEADER ", // U+%04X - U+%04X\n", $k + $i ,( $k + $i + 7);
397 print HEADER "};\n\n";
399 printarray("00", "7");
400 printarray("20", "7");
401 printarray("21", "7");
402 printarray("30", "5");
403 printarray("0E", "8");
405 #print %rangecount;
407 ######################################################################
409 # Close files
411 ######################################################################
412 close(HEADER);
413 close(CLASS);
414 close(OUT);