Replace functions which called once with their bodies
[pidgin-git.git] / po / check_po.pl
blob16819c9c037577ef80dad7ef59d9cf0fef70c901
1 #!/usr/bin/env perl
3 # check_po.pl - check po file translations for likely errors
5 # Written by David W. Pfitzner dwp@mso.anu.edu.au
6 # This script is hereby placed in the Public Domain.
8 # Various checks on po file translations:
9 # - printf-style format strings;
10 # - differences in trailing newlines;
11 # - empty (non-fuzzy) msgid;
12 # - likely whitespace errors on joining multi-line entries
13 # Ignores all fuzzy entries.
15 # Options:
16 # -x Don't do standard checks above (eg, just check one of below).
17 # -n Check newlines within strings; ie, that have equal numbers
18 # of newlines in msgstr and msgid. (Optional because this may
19 # happen legitimately.)
20 # -w Check leading whitespace. Sometimes whitespace is simply
21 # spacing (eg, for widget labels etc), or punctuation differences,
22 # so this may be ok.
23 # -W Check trailing whitespace. See -w above.
24 # -p Check trailing punctuation.
25 # -c Check capitalization of first non-whitespace character
26 # (only if [a-zA-Z]).
27 # -e Check on empty (c.q. new) msgstr
29 # Reads stdin (or filename args, via <>), writes any problems to stdout.
31 # Modified by Davide Pagnin nightmare@freeciv.it to support plural forms
33 # Version: 0.41 (2002-06-06)
35 # TODO: This script needs to be able to handle Farsi's %Id flag for
36 # number format specifiers. More information on how it works, see
37 # http://www.gnu.org/software/hello/manual/gettext/c_002dformat.html
38 # It's possible someone has already made this change... look around
39 # for an updated version of this script.
41 use strict;
42 use vars qw($opt_c $opt_n $opt_p $opt_w $opt_W $opt_x $opt_e);
43 use Getopt::Std;
45 getopts('cnpwWxe');
47 # Globals, for current po entry:
49 # Note that msgid and msgstr have newlines represented by the
50 # two characters '\' and 'n' (and similarly for other escapes).
52 my @amsgid; # lines exactly as in input
53 my @amsgstr;
54 my $entryline; # lineno where entry starts
55 my $msgid; # lines joined by ""
56 my $msgstr;
57 my $is_fuzzy;
58 my $is_cformat;
59 my $state; # From constant values below.
60 my $did_print; # Whether we have printed this entry, to
61 # print only once for multiple problems.
63 use constant S_LOOKING_START => 0; # looking for start of entry
64 use constant S_DOING_MSGID => 1; # doing msgid part
65 use constant S_DOING_MSGSTR => 2; # doing msgstr part
67 # Initialize or reinitalize globals to prepare for new entry:
68 sub new_entry {
69 @amsgid = ();
70 @amsgstr = ();
71 $msgid = undef;
72 $msgstr = undef;
73 $entryline = 0;
74 $is_fuzzy = 0;
75 $is_cformat = 0;
76 $did_print = 0;
77 $state = S_LOOKING_START;
80 # Nicely print either a "msgid" or "msgstr" (name is one of these)
81 # with given array of data.
82 sub print_one {
83 my $name = shift;
84 print " $name \"", join("\"\n \"", @_), "\"\n";
87 # Print a problem (args like print()), preceeded by entry unless
88 # we have already printed that: label, and msgid and msgstr.
90 sub print_problem {
91 unless ($did_print) {
92 print "ENTRY:", ($ARGV eq "-" ? "" : " ($ARGV, line $entryline)"), "\n";
93 print_one("msgid", @amsgid);
94 print_one("msgstr", @amsgstr);
95 $did_print = 1;
97 print "*** ", @_;
100 # Check final newline: probably, translations should end in a newline
101 # if and only if the original string does.
102 # (See also check_trailing_whitespace and check_num_newlines below.)
104 sub check_trailing_newlines {
105 if ($opt_x) { return; }
107 my ($ichar, $schar);
109 $ichar = (length($msgid)>=2) ? substr($msgid, -2, 2) : "";
110 $schar = (length($msgstr)>=2) ? substr($msgstr, -2, 2) : "";
112 if ($ichar eq "\\n" && $schar ne "\\n") {
113 print_problem "Missing trailing newline\n";
115 if ($ichar ne "\\n" && $schar eq "\\n") {
116 print_problem "Extra trailing newline\n";
121 # Check leading whitespace. In general, any leading whitespace should
122 # be the same in msgstr and msgid -- but not always.
124 sub check_leading_whitespace {
125 unless ($opt_w) { return; }
127 my ($id, $str);
129 if ($msgid =~ m/^(\s+)/) {
130 $id = $1;
131 } else {
132 $id = "";
134 if ($msgstr =~ m/^(\s+)/) {
135 $str = $1;
136 } else {
137 $str = "";
139 if ($id ne $str) {
140 print_problem "Different leading whitespace\n";
144 # Check trailing whitespace. In general, any trailing whitespace should
145 # be the same in msgstr and msgid -- but not always.
147 sub check_trailing_whitespace {
148 unless ($opt_W) { return; }
150 my ($id, $str);
152 if ($msgid =~ m/((?:\s|\\n)+)$/) {
153 $id = $1;
154 } else {
155 $id = "";
157 if ($msgstr =~ m/((?:\s|\\n)+)$/) {
158 $str = $1;
159 } else {
160 $str = "";
162 if ($id ne $str) {
163 print_problem "Different trailing whitespace\n";
167 # Check equal numbers of newlines. In general ... etc.
169 sub check_num_newlines {
170 unless ($opt_n) { return; }
172 my $num_i = ($msgid =~ m(\\n)g);
173 my $num_s = ($msgstr =~ m(\\n)g);
175 if ($num_i != $num_s) {
176 print_problem "Mismatch in newline count\n";
181 # Check capitalization of first non-whitespace character (for [a-zA-Z]
182 # only). In general ... etc.
184 sub check_leading_capitalization {
185 unless ($opt_c) { return; }
187 my ($id, $str);
189 if ($msgid =~ m/^\s*([a-zA-Z])/) {
190 $id = $1;
192 if ($msgstr =~ m/^\s*([a-zA-Z])/) {
193 $str = $1;
195 if (defined($id) && defined($str)) {
196 if (($id =~ /^[a-z]$/ && $str =~ /^[A-Z]$/) ||
197 ($id =~ /^[A-Z]$/ && $str =~ /^[a-z]$/)) {
198 print_problem "Different leading capitalization\n";
203 # Check trailing 'punctuation' characters (ignoring trailing whitespace).
204 # In general .. etc.
206 sub check_trailing_punctuation {
207 unless ($opt_p) { return; }
209 my ($id, $str);
211 # Might want more characters:
212 if ($msgid =~ m/([\\\.\/\,\!\?\"\'\:\;])+(?:\s|\\n)*$/) {
213 $id = $1;
214 } else {
215 $id = "";
217 if ($msgstr =~ m/([\\\.\/\,\!\?\"\'\:\;])+(?:\s|\\n)*$/) {
218 $str = $1;
219 } else {
220 $str = "";
222 ##print "$id $str\n";
223 if ($id ne $str) {
224 print_problem "Different trailing punctuation\n";
228 # Check that multiline strings have whitespace separation, since
229 # otherwise, eg:
230 # msgstr "this is a multiline"
231 # "string"
232 # expands to:
233 # "this is a multilinestring"
235 sub check_whitespace_joins {
236 if ($opt_x) { return; }
238 my $ok = 1;
239 my $i = 0;
241 foreach my $aref (\@amsgid, \@amsgstr) {
242 my $prev = undef;
243 LINE:
244 foreach my $line (@$aref) {
245 if (defined($prev)
246 && length($prev)
247 && $prev !~ /\s$/
248 && $prev !~ /\\n$/
249 && $line !~ /^\s/
250 && $line !~ /^\\n/)
252 $ok = 0;
253 last LINE;
255 $prev = $line;
257 if (!$ok) {
258 print_problem("Possible non-whitespace line-join problem in ",
259 ($i==0 ? "msgid" : "msgstr"), " \n");
261 $i++;
265 # Check printf-style format entries.
266 # Non-trivial, because translation strings may use format specifiers
267 # out of order, or skip some specifiers etc. Also gettext marks
268 # anything with '%' as cformat, though not all are.
270 sub check_cformat {
271 unless ($is_cformat) { return; }
272 if ($opt_x) { return; }
274 my (@iform, @sform);
275 @iform = ($msgid =~ m/\%[0-9\.\$]*[a-z]/g);
276 @sform = ($msgstr =~ m/\%[0-9\.\$]*[a-z]/g);
278 ##print join("::", @iform), "\n";
279 ##print join("::", @sform), "\n";
281 my $js; # index in sform
282 my $j; # index into iform
283 SFORM:
284 for ($js=0; $js < @sform; $js++) {
285 my $sf = $sform[$js];
286 my $sf_orig = $sf;
287 if ($sf =~ s/^\%([0-9]+)\$(.*[a-z])$/\%$2/) {
288 $j = $1-1;
289 } else {
290 $j = $js;
292 if ($j > $#iform) {
293 print_problem("Format number mismatch for $sf_orig [msgstr:",
294 ($js+1), "]\n");
295 next SFORM;
297 my $if = $iform[$j];
298 if ($sf ne $if) {
299 print_problem("Format mismatch: $sf_orig [msgstr:", ($js+1), "]",
300 " vs $if [msgid:", ($j+1), "]\n");
305 # Run all individual checks on current entry, reporting any problems.
306 sub check_entry {
307 if ($is_fuzzy) {
308 return;
310 $msgid = join("", @amsgid);
311 $msgstr = join("", @amsgstr);
313 unless ($opt_x) {
314 if (length($msgid)==0) {
315 print_problem "Zero length msgid\n";
318 if (length($msgstr)==0) {
319 unless ($opt_e) { return; }
320 print_problem "Untranslated msgid\n";
322 check_cformat;
323 check_whitespace_joins;
324 check_num_newlines;
325 check_leading_whitespace;
326 check_trailing_newlines;
327 check_trailing_whitespace;
328 check_leading_capitalization;
329 check_trailing_punctuation;
332 new_entry;
334 LINE:
335 while(<>) {
336 if ( m(^\s*$) ) {
337 if ($state==S_DOING_MSGSTR) {
338 check_entry;
339 new_entry;
341 next LINE;
343 if ( m(^\#, fuzzy) ) {
344 $is_fuzzy = 1;
346 if ( m(^\#, .*c-format) ) {
347 # .* is because can have fuzzy, c-format
348 $is_cformat = 1;
350 if ( m(^\#) ) {
351 next LINE;
353 if ( m(^msgid \"(.*)\"$) ) {
354 $entryline = $.;
355 @amsgid = ($1);
356 $state = S_DOING_MSGID;
357 next LINE;
359 if ( m(^msgid_plural \"(.*)\"$) ) {
360 $entryline = $.;
361 @amsgid = ($1);
362 $state = S_DOING_MSGID;
363 next LINE;
365 if ( m(^msgstr \"(.*)\"$) ) {
366 @amsgstr = ($1);
367 $state = S_DOING_MSGSTR;
368 next LINE;
370 if ( m(^msgstr\[[0-5]\] \"(.*)\"$) ) {
371 @amsgstr = ($1);
372 $state = S_DOING_MSGSTR;
373 next LINE;
375 if ( m(^\"(.*)\"$) ) {
376 if ($state==S_DOING_MSGID) {
377 push @amsgid, $1;
378 } elsif($state==S_DOING_MSGSTR) {
379 push @amsgstr, $1;
380 } else {
381 die "Looking at string $_ in bad state $state,";
383 next LINE;
385 die "Unexpected at $.: ", $_;