1 : # -*- perl -*- vim: ft=perl
2 eval 'exec perl -w -S $0 ${1+"$@"}'
4 #*************************************************************************
6 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 # Copyright 2008 by Sun Microsystems, Inc.
10 # OpenOffice.org - a multi-platform office productivity suite
12 # $RCSfile: langid.pl,v $
16 # This file is part of OpenOffice.org.
18 # OpenOffice.org is free software: you can redistribute it and/or modify
19 # it under the terms of the GNU Lesser General Public License version 3
20 # only, as published by the Free Software Foundation.
22 # OpenOffice.org is distributed in the hope that it will be useful,
23 # but WITHOUT ANY WARRANTY; without even the implied warranty of
24 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 # GNU Lesser General Public License version 3 for more details
26 # (a copy is included in the LICENSE file that accompanied this code).
28 # You should have received a copy of the GNU Lesser General Public License
29 # version 3 along with OpenOffice.org. If not, see
30 # <http://www.openoffice.org/license.html>
31 # for a copy of the LGPLv3 License.
33 #*************************************************************************
35 # See Usage() below or invoke without arguments for short instructions.
36 # For long instructions use the source, Luke ;-)
44 "langid - a hackish utility to lookup lang.h language defines and LangIDs,\n",
45 "isolang.cxx ISO639/ISO3166 mapping, locale data files, langtab.src language\n",
46 "listbox entries, postset.mk and file_ooo.scp registry name.\n\n",
48 "Usage: $0 [--single] {language string} | {LangID} | {primarylanguage sublanguage} | {language-country}\n\n",
50 "A language string will be used as a generic string match in all searched files.\n",
51 "You may enclose the language string in word delimiters,\n",
52 "e.g. \\blanguage_german\\b for a specific match.\n",
53 "If the language string expression matches more than one define,\n",
54 "e.g. as in 'german', all matching defines will be processed.\n",
55 "If the language string does not match a define or an identifier in\n",
56 "langtab.src, a generic string match of the listbox entries will be tried.\n\n",
58 "Numeric values of LangID,primarylanguage,sublanguage can be given\n",
59 "decimal, hexagesimal (leading 0x), octal (leading 0) or binary (leading 0b).\n",
60 "The exact language_define of an exact match will be used in remaining lookups.\n\n",
62 "A language-country pair will lookup a xx-YY mapping from isolang.cxx,\n",
63 "for example: 'en-US' or 'de-' or '-CH',\n",
64 "xx and YY can be given case insensitive, will be lowered-uppered internally,\n",
65 "and xx and YY themselfs may be regular expressions.\n",
66 "Also here a list of matches will be processed.\n\n",
68 "If option --single is given, only the first match will be processed.\n\n";
71 my $SOLARVERSION = $ENV{"SOLARVERSION"};
72 my $INPATH = $ENV{"INPATH"};
73 my $SRC_ROOT = $ENV{"SRC_ROOT"};
74 my $UPDMINOREXT = $ENV{"UPDMINOREXT"};
75 if (!defined($SOLARVERSION) || !defined($INPATH) || !defined($SRC_ROOT))
77 print "\nNeed \$SOLARVERSION, \$INPATH and \$SRC_ROOT, please set your OOo environment!\n";
81 if (!defined($UPDMINOREXT)) {
84 my $SOLENVINC = "$SOLARVERSION/$INPATH/inc$UPDMINOREXT";
86 my $LANGUAGE_MASK_PRIMARY = 0x03ff;
88 sub getPrimaryLanguage
($)
91 return $lcid & $LANGUAGE_MASK_PRIMARY;
102 my( $sub, $pri) = @_;
103 return ($sub << 10) | $pri;
109 my( $regex, $path, $module, $name, @addregex) = @_;
114 # Try module under current working directory first to catch local
115 # modifications. A Not yet delivered lang.h is a special case.
116 if ("$path/$module/$name" eq "$SOLENVINC/i18npool/lang.h") {
117 $file = "./$module/inc/i18npool/lang.h"; }
119 $file = "./$module/$name"; }
120 if (!($found = open( IN
, $file)))
122 # Then with the given path.
123 $file = "$path/$module/$name";
124 if (!($found = open( IN
, $file)))
127 $file = "$path/$module.lnk/$name";
128 if (!($found = open( IN
, $file))) {
130 $file = "$path/$module.link/$name";
131 if (!($found = open( IN
, $file))) {
132 print "No $file either.\n"; }
139 while (my $line = <IN
>)
141 if ($line =~ /$regex/)
150 push( @result, $line);
154 for my $re (@addregex)
156 if ($re ne $arefound && $line =~ /$re/)
171 push( @result, $line);
180 print "Not found in $file\n"; }
187 my( $lcid, @parts, $grepdef, $options, $single);
190 for ($options = 0; $options < @ARGV && $ARGV[$options] =~ /^--/; ++$options)
192 if ($ARGV[$options] eq '--single') { $single = 1; }
193 else { print "Unknown option: $ARGV[$options]\n"; }
195 if (@ARGV == 1 + $options)
197 # 0x hex, 0b bin, 0 oct
198 if ($ARGV[$options] =~ /^0/) {
199 $lcid = oct( $ARGV[0]); }
200 elsif ($ARGV[$options] =~ /^[0-9]/) {
201 $lcid = $ARGV[$options]; }
204 $grepdef = $ARGV[$options];
207 $parts[0] = getPrimaryLanguage
( $lcid);
208 $parts[1] = getSubLanguage
( $lcid);
210 elsif (@ARGV == 2 + $options)
212 for (my $i = $options; $i < 2 + $options; ++$i)
214 if ($ARGV[$i] =~ /^0/) {
215 $parts[$i] = oct( $ARGV[$i]); }
217 $parts[$i] = $ARGV[$i]; }
219 $lcid = makeLangID
( $parts[1], $parts[0]);
226 my $modifier = "(?i)";
227 my (@resultlist, @greplist, $result);
228 # If no string was given on the command line, but value(s) were, lookup the
229 # LangID value to obtain the define identifier.
232 # #define LANGUAGE_AFRIKAANS 0x0436
233 @resultlist = grepFile
(
234 $modifier . '^\s*#\s*define\s+[A-Z_]*' . $grepdef,
235 $SOLENVINC, "i18npool", "lang.h", ());
239 printf( "LangID: 0x%04X (dec %d), primary: 0x%03x, sub 0x%02x\n", $lcid,
240 $lcid, $parts[0], $parts[1]);
241 my $buf = sprintf( "0x%04X", $lcid);
242 @resultlist = grepFile
(
243 '^\s*#\s*define\s+\w+\s+' . $buf,
244 $SOLENVINC, "i18npool", "lang.h", ());
246 for $result (@resultlist)
248 # #define LANGUAGE_AFRIKAANS 0x0436
249 if ($result =~ /^\s*#\s*define\s+(\w+)\s+(0x[0-9a-fA-F]+)/)
251 push( @greplist, '\b' . $1 . '\b');
252 $modifier = ""; # complete identifier now case sensitive
257 # If the string given is of the form xx-yy lookup a language,country pair
258 # to obtain the define identifier. xx and yy themselfs may be regexps.
259 # xx- is a short form for 'xx-.*' and -yy a short form for '.*-yy'
260 if ($grepdef =~ /^(.*)-$/) {
261 $grepdef = $1 . "-.*"; }
262 if ($grepdef =~ /^-(.*)$/) {
263 $grepdef = ".*-" . $1; }
264 if ($grepdef =~ /^(.*)-(.*)$/)
270 # { LANGUAGE_AFRIKAANS, "af", "ZA" },
271 @resultlist = grepFile
(
272 '^\s*\{\s*\w+\s*,\s*\"' . $lang . '\"\s*,\s*\"' . $coun . '\"\s*\}\s*,',
273 "$SRC_ROOT", "i18npool", "source/isolang/isolang.cxx", ());
274 for $result (@resultlist)
276 if ($result =~ /^\s*\{\s*(\w+)\s*,\s*\"\w+\"\s*,\s*\"(\w+)?\"\s*\}\s*,/)
278 push( @greplist, '\b' . $1 . '\b');
279 $modifier = ""; # complete identifier now case sensitive
286 if (!@greplist && $grepdef) {
287 push( @greplist, $grepdef); }
288 for $grepdef (@greplist)
290 print "\nUsing: " . $grepdef . "\n";
292 # Decimal LCID, was needed for Langpack.ulf but isn't used anymore,
293 # keep just in case we'd need it again.
294 # #define LANGUAGE_AFRIKAANS 0x0436
295 @resultlist = grepFile
(
296 $modifier . '^\s*#\s*define\s+[A-Z_]*' . $grepdef,
297 $SOLENVINC, "i18npool", "lang.h", ());
299 for $result (@resultlist)
301 # #define LANGUAGE_AFRIKAANS 0x0436
302 if ($result =~ /^\s*#\s*define\s+(\w+)\s+(0x[0-9a-fA-F]+)/)
304 push( @lcidlist, oct( $2));
308 # { LANGUAGE_AFRIKAANS, "af", "ZA" },
309 @resultlist = grepFile
(
310 $modifier . '^\s*\{\s*.*' . $grepdef . '.*\s*,\s*\".*\"\s*,\s*\".*\"\s*\}\s*,',
311 "$SRC_ROOT", "i18npool", "source/isolang/isolang.cxx", ());
313 my @langcoungreplist;
314 for $result (@resultlist)
316 if ($result =~ /^\s*\{\s*\w+\s*,\s*\"(\w+)\"\s*,\s*\"(\w+)?\"\s*\}\s*,/)
323 $loca = $lang . "_" . $coun;
324 push( @langcoungreplist, $lang . '(-' . $coun . ')?');
330 push( @langcoungreplist, $lang);
332 my $file = "$SRC_ROOT/i18npool/source/localedata/data/$loca.xml";
334 if (!($found = open( LD
, $file)))
336 $file = "$SRC_ROOT/i18npool.lnk/source/localedata/data/$loca.xml";
337 if (!($found = open( LD
, $file)))
339 $file = "$SRC_ROOT/i18npool.link/source/localedata/data/$loca.xml";
340 $found = open( LD
, $file);
345 print "Found $file:\n";
347 while (my $line = <LD
>)
349 if ($line =~ /<(Language|Country)>/) {
353 if ($line =~ /<\/(Language
|Country
)>/) {
359 print "No $SRC_ROOT/i18npool/source/localedata/data/$loca.xml\n"; }
363 # case LANGUAGE_ARABIC:
365 $modifier . '^\s*case\s*.*' . $grepdef . '.*\s*:',
366 "$SRC_ROOT", "i18npool", "source/isolang/mslangid.cxx", ());
368 # With CWS 'langstatusbar' the language listbox resource file gets a new location.
370 my $name = "source/dialog/langtab.src";
371 if (!(-e
"$SRC_ROOT/$module/$name")) {
373 $name = "source/misc/langtab.src";
375 # < "Afrikaans" ; LANGUAGE_AFRIKAANS ; > ;
377 @resultlist = grepFile
(
378 $modifier . '^\s*<\s*\".*\"\s*;\s*.*' . $grepdef . '.*\s*;\s*>\s*;',
379 "$SRC_ROOT", $module, $name, ());
383 $modifier . '^\s*<\s*\".*' . $grepdef . '.*\"\s*;\s*.*\s*;\s*>\s*;',
384 "$SRC_ROOT", $module, $name, ()); }
386 for my $langcoun (@langcoungreplist)
388 # Name (xxx) = "/registry/spool/org/openoffice/Office/Common-ctl.xcu";
390 '^\s*Name\s*\(' . $langcoun . '\)\s*=',
391 "$SRC_ROOT", "scp2", "source/ooo/file_ooo.scp", ());
392 # completelangiso=af ar as-IN ... zu
394 '^\s*completelangiso\s*[= ](.{2,3}(-..)?)*' . $langcoun . '',
395 "$SRC_ROOT", "solenv", "inc/postset.mk",
396 # needs a duplicated pair of backslashes to produce a literal \\
397 ('^\s*completelangiso\s*=', '^\s+' . $langcoun . '\s*\\\\*$'));