Version 4.0.0.1, tag libreoffice-4.0.0.1
[LibreOffice.git] / i18npool / source / isolang / langid.pl
blob78221b5527aaa8a5e59b39d59d119da1f1ced9b2
1 : # -*- perl -*- vim: ft=perl
2 eval 'exec perl -w -S $0 ${1+"$@"}'
3 if 0;
5 # This file is part of the LibreOffice project.
7 # This Source Code Form is subject to the terms of the Mozilla Public
8 # License, v. 2.0. If a copy of the MPL was not distributed with this
9 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
11 # This file incorporates work covered by the following license notice:
13 # Licensed to the Apache Software Foundation (ASF) under one or more
14 # contributor license agreements. See the NOTICE file distributed
15 # with this work for additional information regarding copyright
16 # ownership. The ASF licenses this file to you under the Apache
17 # License, Version 2.0 (the "License"); you may not use this file
18 # except in compliance with the License. You may obtain a copy of
19 # the License at http://www.apache.org/licenses/LICENSE-2.0 .
22 # See Usage() below or invoke without arguments for short instructions.
23 # For long instructions use the source, Luke ;-)
25 use strict;
27 sub Usage()
29 print STDERR
30 "\n",
31 "langid - a hackish utility to lookup lang.h language defines and LangIDs,\n",
32 "isolang.cxx ISO639/ISO3166 mapping, locale data files, langtab.src language\n",
33 "listbox entries, langlist.mk, file_ooo.scp registry name, languages.pm and\n",
34 "msi-encodinglist.txt\n\n",
36 "Usage: $0 [--single] {language string} | {LangID} | {primarylanguage sublanguage} | {language-country}\n\n",
38 "A language string will be used as a generic string match in all searched files.\n",
39 "You may enclose the language string in word delimiters,\n",
40 "e.g. \\blanguage_german\\b for a specific match.\n",
41 "If the language string expression matches more than one define,\n",
42 "e.g. as in 'german', all matching defines will be processed.\n",
43 "If the language string does not match a define or an identifier in\n",
44 "langtab.src, a generic string match of the listbox entries will be tried.\n\n",
46 "Numeric values of LangID,primarylanguage,sublanguage can be given\n",
47 "decimal, hexadecimal (leading 0x), octal (leading 0) or binary (leading 0b).\n",
48 "The exact language_define of an exact match will be used in remaining lookups.\n\n",
50 "A language-country pair will lookup a xx-YY mapping from isolang.cxx,\n",
51 "for example: 'en-US' or 'de-' or '-CH',\n",
52 "xx and YY can be given case insensitive, will be lowered-uppered internally,\n",
53 "and xx and YY themselves may be regular expressions.\n",
54 "Also here a list of matches will be processed.\n\n",
56 "If option --single is given, only the first match will be processed.\n\n";
59 my $SOLARVERSION = $ENV{"SOLARVERSION"};
60 my $INPATH = $ENV{"INPATH"};
61 my $SRC_ROOT = $ENV{"SRC_ROOT"};
62 my $UPDMINOREXT = $ENV{"UPDMINOREXT"};
63 if (!defined($SOLARVERSION) || !defined($INPATH) || !defined($SRC_ROOT))
65 print "\nNeed \$SOLARVERSION, \$INPATH and \$SRC_ROOT, please set your OOo environment!\n";
66 Usage();
67 exit 1;
69 if (!defined($UPDMINOREXT)) {
70 $UPDMINOREXT = '';
72 my $SOLENVINC = "$SOLARVERSION/$INPATH/inc$UPDMINOREXT";
74 my $LANGUAGE_MASK_PRIMARY = 0x03ff;
76 sub getPrimaryLanguage($)
78 my($lcid) = @_;
79 return $lcid & $LANGUAGE_MASK_PRIMARY;
82 sub getSubLanguage($)
84 my($lcid) = @_;
85 return $lcid >> 10;
88 sub makeLangID($$)
90 my( $sub, $pri) = @_;
91 return ($sub << 10) | $pri;
95 sub grepFile($$$$@)
97 my( $regex, $path, $module, $name, @addregex) = @_;
98 my @result;
99 my $found = 0;
100 my $areopen = 0;
101 my $arecloser = '';
102 my $file;
103 # Try module under current working directory first to catch local
104 # modifications. A Not yet delivered lang.h is a special case.
105 if ("$path/$module/$name" eq "$SOLENVINC/i18npool/lang.h") {
106 $file = "./$module/inc/i18npool/lang.h"; }
107 else {
108 $file = "./$module/$name"; }
109 if (!($found = open( IN, $file)))
111 # Then with the given path.
112 $file = "$path/$module/$name";
113 if (!($found = open( IN, $file)))
115 print "No $file\n";
116 $file = "$path/$module.lnk/$name";
117 if (!($found = open( IN, $file))) {
118 print "No $file.\n";
119 $file = "$path/$module.link/$name";
120 if (!($found = open( IN, $file))) {
121 print "No $file either.\n"; }
125 if ($found)
127 $found = 0;
128 while (my $line = <IN>)
130 if ($line =~ /$regex/)
132 if (!$found)
134 $found = 1;
135 print "$file:\n";
137 chomp( $line);
138 print "$line\n";
139 push( @result, $line);
141 elsif (@addregex)
143 # By convention first element is opener, second element is closer.
144 if (!$areopen)
146 if ($line =~ /$addregex[0]/)
148 $areopen = 1;
149 $arecloser = $addregex[1];
152 if ($areopen)
154 for (my $i = 2; $i < @addregex; ++$i)
156 if ($line =~ /$addregex[$i]/)
158 if (!$found)
160 $found = 1;
161 print "$file:\n";
163 chomp( $line);
164 print "$line\n";
165 push( @result, $line);
168 if ($line =~ /$arecloser/)
170 $areopen = 0;
175 close( IN);
177 if (!$found) {
178 print "Not found in $file\n";
179 #print "Not found in $file for $regex @addregex\n";
181 return @result;
185 sub main()
187 my( $lcid, @parts, $grepdef, $options, $single);
188 $grepdef = 0;
189 $single = 0;
190 for ($options = 0; $options < @ARGV && $ARGV[$options] =~ /^--/; ++$options)
192 if ($ARGV[$options] eq '--single') { $single = 1; }
193 else { print "Unknown option: $ARGV[$options]\n"; }
195 if (@ARGV == 1 + $options)
197 # 0x hex, 0b bin, 0 oct
198 if ($ARGV[$options] =~ /^0/) {
199 $lcid = oct( $ARGV[0]); }
200 elsif ($ARGV[$options] =~ /^[0-9]/) {
201 $lcid = $ARGV[$options]; }
202 else
204 $grepdef = $ARGV[$options];
205 $lcid = 0;
207 $parts[0] = getPrimaryLanguage( $lcid);
208 $parts[1] = getSubLanguage( $lcid);
210 elsif (@ARGV == 2 + $options)
212 for (my $i = $options; $i < 2 + $options; ++$i)
214 if ($ARGV[$i] =~ /^0/) {
215 $parts[$i] = oct( $ARGV[$i]); }
216 else {
217 $parts[$i] = $ARGV[$i]; }
219 $lcid = makeLangID( $parts[1], $parts[0]);
221 else
223 Usage();
224 return 1;
226 my $modifier = "(?i)";
227 my (@resultlist, @greplist, $result);
228 # If no string was given on the command line, but value(s) were, lookup the
229 # LangID value to obtain the define identifier.
230 if ($grepdef)
232 # #define LANGUAGE_AFRIKAANS 0x0436
233 @resultlist = grepFile(
234 $modifier . '^\s*#\s*define\s+[A-Z_]*' . $grepdef,
235 $SOLENVINC, "i18npool", "lang.h", ());
237 else
239 printf( "LangID: 0x%04X (dec %d), primary: 0x%03x, sub 0x%02x\n", $lcid,
240 $lcid, $parts[0], $parts[1]);
241 my $buf = sprintf( "0x%04X", $lcid);
242 @resultlist = grepFile(
243 '^\s*#\s*define\s+\w+\s+' . $buf,
244 $SOLENVINC, "i18npool", "lang.h", ());
246 for $result (@resultlist)
248 # #define LANGUAGE_AFRIKAANS 0x0436
249 if ($result =~ /^\s*#\s*define\s+(\w+)\s+(0x[0-9a-fA-F]+)/)
251 push( @greplist, '\b' . $1 . '\b');
252 $modifier = ""; # complete identifier now case sensitive
253 if ($single) {
254 last; }
257 # If the string given is of the form xx-yy lookup a language,country pair
258 # to obtain the define identifier. xx and yy themselfs may be regexps.
259 # xx- is a short form for 'xx-.*' and -yy a short form for '.*-yy'
260 if ($grepdef =~ /^(.*)-$/) {
261 $grepdef = $1 . "-.*"; }
262 if ($grepdef =~ /^-(.*)$/) {
263 $grepdef = ".*-" . $1; }
264 if ($grepdef =~ /^(.*)-(.*)$/)
266 my $lang = $1;
267 my $coun = $2;
268 $lang = lc($lang);
269 $coun = uc($coun);
270 # { LANGUAGE_AFRIKAANS, "af", "ZA" },
271 @resultlist = grepFile(
272 '^\s*\{\s*\w+\s*,\s*\"' . $lang . '\"\s*,\s*\"' . $coun . '\"\s*\}\s*,',
273 "$SRC_ROOT", "i18npool", "source/isolang/isolang.cxx", ());
274 for $result (@resultlist)
276 if ($result =~ /^\s*\{\s*(\w+)\s*,\s*\"\w+\"\s*,\s*\"(\w+)?\"\s*\}\s*,/)
278 push( @greplist, '\b' . $1 . '\b');
279 $modifier = ""; # complete identifier now case sensitive
280 if ($single) {
281 last; }
284 $grepdef = 0;
286 if (!@greplist && $grepdef) {
287 push( @greplist, $grepdef); }
288 for $grepdef (@greplist)
290 print "\nUsing: " . $grepdef . "\n";
292 # Decimal LCID, was needed for Langpack.ulf but isn't used anymore,
293 # keep just in case we'd need it again.
294 # #define LANGUAGE_AFRIKAANS 0x0436
295 @resultlist = grepFile(
296 $modifier . '^\s*#\s*define\s+[A-Z_]*' . $grepdef,
297 $SOLENVINC, "i18npool", "lang.h", ());
298 my @lcidlist;
299 for $result (@resultlist)
301 # #define LANGUAGE_AFRIKAANS 0x0436
302 if ($result =~ /^\s*#\s*define\s+(\w+)\s+(0x[0-9a-fA-F]+)/)
304 push( @lcidlist, oct( $2));
308 # { LANGUAGE_AFRIKAANS, "af", "ZA" },
309 @resultlist = grepFile(
310 $modifier . '^\s*\{\s*.*' . $grepdef . '.*\s*,\s*\".*\"\s*,\s*\".*\"\s*\}\s*,',
311 "$SRC_ROOT", "i18npool", "source/isolang/isolang.cxx", ());
313 my @langcoungreplist;
314 for $result (@resultlist)
316 if ($result =~ /^\s*\{\s*\w+\s*,\s*\"(\w+)\"\s*,\s*\"(\w+)?\"\s*\}\s*,/)
318 my $lang = $1;
319 my $coun = $2;
320 my $loca;
321 if ($coun)
323 $loca = $lang . "_" . $coun;
324 push( @langcoungreplist, '\b' . $lang . '\b(-' . $coun . ')?');
326 else
328 $loca = $lang;
329 $coun = "";
330 push( @langcoungreplist, '\b' . $lang . '\b');
332 my $file = "$SRC_ROOT/i18npool/source/localedata/data/$loca.xml";
333 my $found;
334 if (!($found = open( LD, $file)))
336 $file = "$SRC_ROOT/i18npool.lnk/source/localedata/data/$loca.xml";
337 if (!($found = open( LD, $file)))
339 $file = "$SRC_ROOT/i18npool.link/source/localedata/data/$loca.xml";
340 $found = open( LD, $file);
343 if ($found)
345 print "Found $file:\n";
346 my $on = 0;
347 while (my $line = <LD>)
349 if ($line =~ /<(Language|Country)>/) {
350 $on = 1; }
351 if ($on) {
352 print $line; }
353 if ($line =~ /<\/(Language|Country)>/) {
354 $on = 0; }
356 close( LD);
358 else {
359 print "No $SRC_ROOT/i18npool/source/localedata/data/$loca.xml\n"; }
363 # case LANGUAGE_ARABIC:
364 grepFile(
365 $modifier . '^\s*case\s*.*' . $grepdef . '.*\s*:',
366 "$SRC_ROOT", "i18npool", "source/isolang/mslangid.cxx", ());
368 # With CWS 'langstatusbar' the language listbox resource file gets a new location.
369 my $module = "svx";
370 my $name = "source/dialog/langtab.src";
371 if (!(-e "$SRC_ROOT/$module/$name")) {
372 $module = "svtools";
373 $name = "source/misc/langtab.src";
375 # < "Afrikaans" ; LANGUAGE_AFRIKAANS ; > ;
376 # lookup define
377 @resultlist = grepFile(
378 $modifier . '^\s*<\s*\".*\"\s*;\s*.*' . $grepdef . '.*\s*;\s*>\s*;',
379 "$SRC_ROOT", $module, $name, ());
380 # lookup string
381 if (!@resultlist) {
382 grepFile(
383 $modifier . '^\s*<\s*\".*' . $grepdef . '.*\"\s*;\s*.*\s*;\s*>\s*;',
384 "$SRC_ROOT", $module, $name, ()); }
386 for my $langcoun (@langcoungreplist)
388 # Name (xxx) = "/registry/spool/org/openoffice/Office/Common-ctl.xcu";
389 grepFile(
390 '^\s*Name\s*\(' . $langcoun . '\)\s*=',
391 "$SRC_ROOT", "scp2", "source/ooo/file_ooo.scp", ());
393 # completelangiso=af ar as-IN ... zu
394 grepFile(
395 '^\s*completelangiso\s*=\s*(\s*([a-z]{2,3})(-[A-Z][A-Z])?)*' . $langcoun . '',
396 "$SRC_ROOT", "solenv", "inc/langlist.mk",
397 # needs a duplicated pair of backslashes to produce a literal \\
398 ('^\s*completelangiso\s*=', '^\s*$', '^\s*' . $langcoun . '\s*\\\\*$'));
400 # @noMSLocaleLangs = ( "br", "bs", ... )
401 grepFile(
402 '^\s*@noMSLocaleLangs\s*=\s*\(\s*(\s*"([a-z]{2,3})(-[A-Z][A-Z])?"\s*,?)*' . $langcoun . '',
403 "$SRC_ROOT", "solenv", "bin/modules/installer/languages.pm",
404 ('^\s*@noMSLocaleLangs\s*=', '\)\s*$', '"' . $langcoun . '"'));
406 # af 1252 1078 # Afrikaans
407 grepFile(
408 '^\s*' . $langcoun . '',
409 "$SRC_ROOT", "l10ntools", "source/ulfconv/msi-encodinglist.txt", ());
412 return 0;
415 main();