Update ooo320-m1
[ooovba.git] / i18npool / source / isolang / langid.pl
blobdc6a289b0f6228029d09fbfac537b572c85ad434
1 : # -*- perl -*- vim: ft=perl
2 eval 'exec perl -w -S $0 ${1+"$@"}'
3 if 0;
4 #*************************************************************************
6 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7 #
8 # Copyright 2008 by Sun Microsystems, Inc.
10 # OpenOffice.org - a multi-platform office productivity suite
12 # $RCSfile: langid.pl,v $
14 # $Revision: 1.4 $
16 # This file is part of OpenOffice.org.
18 # OpenOffice.org is free software: you can redistribute it and/or modify
19 # it under the terms of the GNU Lesser General Public License version 3
20 # only, as published by the Free Software Foundation.
22 # OpenOffice.org is distributed in the hope that it will be useful,
23 # but WITHOUT ANY WARRANTY; without even the implied warranty of
24 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 # GNU Lesser General Public License version 3 for more details
26 # (a copy is included in the LICENSE file that accompanied this code).
28 # You should have received a copy of the GNU Lesser General Public License
29 # version 3 along with OpenOffice.org. If not, see
30 # <http://www.openoffice.org/license.html>
31 # for a copy of the LGPLv3 License.
33 #*************************************************************************
35 # See Usage() below or invoke without arguments for short instructions.
36 # For long instructions use the source, Luke ;-)
38 use strict;
40 sub Usage()
42 print STDERR
43 "\n",
44 "langid - a hackish utility to lookup lang.h language defines and LangIDs,\n",
45 "isolang.cxx ISO639/ISO3166 mapping, locale data files, langtab.src language\n",
46 "listbox entries, postset.mk and file_ooo.scp registry name.\n\n",
48 "Usage: $0 [--single] {language string} | {LangID} | {primarylanguage sublanguage} | {language-country}\n\n",
50 "A language string will be used as a generic string match in all searched files.\n",
51 "You may enclose the language string in word delimiters,\n",
52 "e.g. \\blanguage_german\\b for a specific match.\n",
53 "If the language string expression matches more than one define,\n",
54 "e.g. as in 'german', all matching defines will be processed.\n",
55 "If the language string does not match a define or an identifier in\n",
56 "langtab.src, a generic string match of the listbox entries will be tried.\n\n",
58 "Numeric values of LangID,primarylanguage,sublanguage can be given\n",
59 "decimal, hexagesimal (leading 0x), octal (leading 0) or binary (leading 0b).\n",
60 "The exact language_define of an exact match will be used in remaining lookups.\n\n",
62 "A language-country pair will lookup a xx-YY mapping from isolang.cxx,\n",
63 "for example: 'en-US' or 'de-' or '-CH',\n",
64 "xx and YY can be given case insensitive, will be lowered-uppered internally,\n",
65 "and xx and YY themselfs may be regular expressions.\n",
66 "Also here a list of matches will be processed.\n\n",
68 "If option --single is given, only the first match will be processed.\n\n";
71 my $SOLARVERSION = $ENV{"SOLARVERSION"};
72 my $INPATH = $ENV{"INPATH"};
73 my $SRC_ROOT = $ENV{"SRC_ROOT"};
74 my $UPDMINOREXT = $ENV{"UPDMINOREXT"};
75 if (!defined($SOLARVERSION) || !defined($INPATH) || !defined($SRC_ROOT))
77 print "\nNeed \$SOLARVERSION, \$INPATH and \$SRC_ROOT, please set your OOo environment!\n";
78 Usage();
79 exit 1;
81 if (!defined($UPDMINOREXT)) {
82 $UPDMINOREXT = '';
84 my $SOLENVINC = "$SOLARVERSION/$INPATH/inc$UPDMINOREXT";
86 my $LANGUAGE_MASK_PRIMARY = 0x03ff;
88 sub getPrimaryLanguage($)
90 my($lcid) = @_;
91 return $lcid & $LANGUAGE_MASK_PRIMARY;
94 sub getSubLanguage($)
96 my($lcid) = @_;
97 return $lcid >> 10;
100 sub makeLangID($$)
102 my( $sub, $pri) = @_;
103 return ($sub << 10) | $pri;
107 sub grepFile($$$$@)
109 my( $regex, $path, $module, $name, @addregex) = @_;
110 my @result;
111 my $found = 0;
112 my $arefound = '';
113 my $file;
114 # Try module under current working directory first to catch local
115 # modifications. A Not yet delivered lang.h is a special case.
116 if ("$path/$module/$name" eq "$SOLENVINC/i18npool/lang.h") {
117 $file = "./$module/inc/i18npool/lang.h"; }
118 else {
119 $file = "./$module/$name"; }
120 if (!($found = open( IN, $file)))
122 # Then with the given path.
123 $file = "$path/$module/$name";
124 if (!($found = open( IN, $file)))
126 print "No $file\n";
127 $file = "$path/$module.lnk/$name";
128 if (!($found = open( IN, $file))) {
129 print "No $file.\n";
130 $file = "$path/$module.link/$name";
131 if (!($found = open( IN, $file))) {
132 print "No $file either.\n"; }
136 if ($found)
138 $found = 0;
139 while (my $line = <IN>)
141 if ($line =~ /$regex/)
143 if (!$found)
145 $found = 1;
146 print "$file:\n";
148 chomp( $line);
149 print "$line\n";
150 push( @result, $line);
152 else
154 for my $re (@addregex)
156 if ($re ne $arefound && $line =~ /$re/)
158 if ($arefound eq '')
160 $arefound = $re;
162 else
164 if (!$found)
166 $found = 1;
167 print "$file:\n";
169 chomp( $line);
170 print "$line\n";
171 push( @result, $line);
177 close( IN);
179 if (!$found) {
180 print "Not found in $file\n"; }
181 return @result;
185 sub main()
187 my( $lcid, @parts, $grepdef, $options, $single);
188 $grepdef = 0;
189 $single = 0;
190 for ($options = 0; $options < @ARGV && $ARGV[$options] =~ /^--/; ++$options)
192 if ($ARGV[$options] eq '--single') { $single = 1; }
193 else { print "Unknown option: $ARGV[$options]\n"; }
195 if (@ARGV == 1 + $options)
197 # 0x hex, 0b bin, 0 oct
198 if ($ARGV[$options] =~ /^0/) {
199 $lcid = oct( $ARGV[0]); }
200 elsif ($ARGV[$options] =~ /^[0-9]/) {
201 $lcid = $ARGV[$options]; }
202 else
204 $grepdef = $ARGV[$options];
205 $lcid = 0;
207 $parts[0] = getPrimaryLanguage( $lcid);
208 $parts[1] = getSubLanguage( $lcid);
210 elsif (@ARGV == 2 + $options)
212 for (my $i = $options; $i < 2 + $options; ++$i)
214 if ($ARGV[$i] =~ /^0/) {
215 $parts[$i] = oct( $ARGV[$i]); }
216 else {
217 $parts[$i] = $ARGV[$i]; }
219 $lcid = makeLangID( $parts[1], $parts[0]);
221 else
223 Usage();
224 return 1;
226 my $modifier = "(?i)";
227 my (@resultlist, @greplist, $result);
228 # If no string was given on the command line, but value(s) were, lookup the
229 # LangID value to obtain the define identifier.
230 if ($grepdef)
232 # #define LANGUAGE_AFRIKAANS 0x0436
233 @resultlist = grepFile(
234 $modifier . '^\s*#\s*define\s+[A-Z_]*' . $grepdef,
235 $SOLENVINC, "i18npool", "lang.h", ());
237 else
239 printf( "LangID: 0x%04X (dec %d), primary: 0x%03x, sub 0x%02x\n", $lcid,
240 $lcid, $parts[0], $parts[1]);
241 my $buf = sprintf( "0x%04X", $lcid);
242 @resultlist = grepFile(
243 '^\s*#\s*define\s+\w+\s+' . $buf,
244 $SOLENVINC, "i18npool", "lang.h", ());
246 for $result (@resultlist)
248 # #define LANGUAGE_AFRIKAANS 0x0436
249 if ($result =~ /^\s*#\s*define\s+(\w+)\s+(0x[0-9a-fA-F]+)/)
251 push( @greplist, '\b' . $1 . '\b');
252 $modifier = ""; # complete identifier now case sensitive
253 if ($single) {
254 last; }
257 # If the string given is of the form xx-yy lookup a language,country pair
258 # to obtain the define identifier. xx and yy themselfs may be regexps.
259 # xx- is a short form for 'xx-.*' and -yy a short form for '.*-yy'
260 if ($grepdef =~ /^(.*)-$/) {
261 $grepdef = $1 . "-.*"; }
262 if ($grepdef =~ /^-(.*)$/) {
263 $grepdef = ".*-" . $1; }
264 if ($grepdef =~ /^(.*)-(.*)$/)
266 my $lang = $1;
267 my $coun = $2;
268 $lang = lc($lang);
269 $coun = uc($coun);
270 # { LANGUAGE_AFRIKAANS, "af", "ZA" },
271 @resultlist = grepFile(
272 '^\s*\{\s*\w+\s*,\s*\"' . $lang . '\"\s*,\s*\"' . $coun . '\"\s*\}\s*,',
273 "$SRC_ROOT", "i18npool", "source/isolang/isolang.cxx", ());
274 for $result (@resultlist)
276 if ($result =~ /^\s*\{\s*(\w+)\s*,\s*\"\w+\"\s*,\s*\"(\w+)?\"\s*\}\s*,/)
278 push( @greplist, '\b' . $1 . '\b');
279 $modifier = ""; # complete identifier now case sensitive
280 if ($single) {
281 last; }
284 $grepdef = 0;
286 if (!@greplist && $grepdef) {
287 push( @greplist, $grepdef); }
288 for $grepdef (@greplist)
290 print "\nUsing: " . $grepdef . "\n";
292 # Decimal LCID, was needed for Langpack.ulf but isn't used anymore,
293 # keep just in case we'd need it again.
294 # #define LANGUAGE_AFRIKAANS 0x0436
295 @resultlist = grepFile(
296 $modifier . '^\s*#\s*define\s+[A-Z_]*' . $grepdef,
297 $SOLENVINC, "i18npool", "lang.h", ());
298 my @lcidlist;
299 for $result (@resultlist)
301 # #define LANGUAGE_AFRIKAANS 0x0436
302 if ($result =~ /^\s*#\s*define\s+(\w+)\s+(0x[0-9a-fA-F]+)/)
304 push( @lcidlist, oct( $2));
308 # { LANGUAGE_AFRIKAANS, "af", "ZA" },
309 @resultlist = grepFile(
310 $modifier . '^\s*\{\s*.*' . $grepdef . '.*\s*,\s*\".*\"\s*,\s*\".*\"\s*\}\s*,',
311 "$SRC_ROOT", "i18npool", "source/isolang/isolang.cxx", ());
313 my @langcoungreplist;
314 for $result (@resultlist)
316 if ($result =~ /^\s*\{\s*\w+\s*,\s*\"(\w+)\"\s*,\s*\"(\w+)?\"\s*\}\s*,/)
318 my $lang = $1;
319 my $coun = $2;
320 my $loca;
321 if ($coun)
323 $loca = $lang . "_" . $coun;
324 push( @langcoungreplist, $lang . '(-' . $coun . ')?');
326 else
328 $loca = $lang;
329 $coun = "";
330 push( @langcoungreplist, $lang);
332 my $file = "$SRC_ROOT/i18npool/source/localedata/data/$loca.xml";
333 my $found;
334 if (!($found = open( LD, $file)))
336 $file = "$SRC_ROOT/i18npool.lnk/source/localedata/data/$loca.xml";
337 if (!($found = open( LD, $file)))
339 $file = "$SRC_ROOT/i18npool.link/source/localedata/data/$loca.xml";
340 $found = open( LD, $file);
343 if ($found)
345 print "Found $file:\n";
346 my $on = 0;
347 while (my $line = <LD>)
349 if ($line =~ /<(Language|Country)>/) {
350 $on = 1; }
351 if ($on) {
352 print $line; }
353 if ($line =~ /<\/(Language|Country)>/) {
354 $on = 0; }
356 close( LD);
358 else {
359 print "No $SRC_ROOT/i18npool/source/localedata/data/$loca.xml\n"; }
363 # case LANGUAGE_ARABIC:
364 grepFile(
365 $modifier . '^\s*case\s*.*' . $grepdef . '.*\s*:',
366 "$SRC_ROOT", "i18npool", "source/isolang/mslangid.cxx", ());
368 # With CWS 'langstatusbar' the language listbox resource file gets a new location.
369 my $module = "svx";
370 my $name = "source/dialog/langtab.src";
371 if (!(-e "$SRC_ROOT/$module/$name")) {
372 $module = "svtools";
373 $name = "source/misc/langtab.src";
375 # < "Afrikaans" ; LANGUAGE_AFRIKAANS ; > ;
376 # lookup define
377 @resultlist = grepFile(
378 $modifier . '^\s*<\s*\".*\"\s*;\s*.*' . $grepdef . '.*\s*;\s*>\s*;',
379 "$SRC_ROOT", $module, $name, ());
380 # lookup string
381 if (!@resultlist) {
382 grepFile(
383 $modifier . '^\s*<\s*\".*' . $grepdef . '.*\"\s*;\s*.*\s*;\s*>\s*;',
384 "$SRC_ROOT", $module, $name, ()); }
386 for my $langcoun (@langcoungreplist)
388 # Name (xxx) = "/registry/spool/org/openoffice/Office/Common-ctl.xcu";
389 grepFile(
390 '^\s*Name\s*\(' . $langcoun . '\)\s*=',
391 "$SRC_ROOT", "scp2", "source/ooo/file_ooo.scp", ());
392 # completelangiso=af ar as-IN ... zu
393 grepFile(
394 '^\s*completelangiso\s*[= ](.{2,3}(-..)?)*' . $langcoun . '',
395 "$SRC_ROOT", "solenv", "inc/postset.mk",
396 # needs a duplicated pair of backslashes to produce a literal \\
397 ('^\s*completelangiso\s*=', '^\s+' . $langcoun . '\s*\\\\*$'));
400 return 0;
403 main();