3 # check_po.pl - check po file translations for likely errors
5 # Written by David W. Pfitzner dwp@mso.anu.edu.au
6 # This script is hereby placed in the Public Domain.
8 # Various checks on po file translations:
9 # - printf-style format strings;
10 # - differences in trailing newlines;
11 # - empty (non-fuzzy) msgid;
12 # - likely whitespace errors on joining multi-line entries
13 # Ignores all fuzzy entries.
16 # -x Don't do standard checks above (eg, just check one of below).
17 # -n Check newlines within strings; ie, that have equal numbers
18 # of newlines in msgstr and msgid. (Optional because this may
19 # happen legitimately.)
20 # -w Check leading whitespace. Sometimes whitespace is simply
21 # spacing (eg, for widget labels etc), or punctuation differences,
23 # -W Check trailing whitespace. See -w above.
24 # -p Check trailing punctuation.
25 # -c Check capitalization of first non-whitespace character
27 # -e Check on empty (c.q. new) msgstr
29 # Reads stdin (or filename args, via <>), writes any problems to stdout.
31 # Modified by Davide Pagnin nightmare@freeciv.it to support plural forms
33 # Version: 0.41 (2002-06-06)
36 use vars
qw($opt_c $opt_n $opt_p $opt_w $opt_W $opt_x $opt_e);
41 # Globals, for current po entry:
43 # Note that msgid and msgstr have newlines represented by the
44 # two characters '\' and 'n' (and similarly for other escapes).
46 my @amsgid; # lines exactly as in input
48 my $entryline; # lineno where entry starts
49 my $msgid; # lines joined by ""
53 my $state; # From constant values below.
54 my $did_print; # Whether we have printed this entry, to
55 # print only once for multiple problems.
57 use constant S_LOOKING_START => 0; # looking for start of entry
58 use constant S_DOING_MSGID => 1; # doing msgid part
59 use constant S_DOING_MSGSTR => 2; # doing msgstr part
61 # Initialize or reinitalize globals to prepare for new entry:
71 $state = S_LOOKING_START;
74 # Nicely print either a "msgid" or "msgstr" (name is one of these)
75 # with given array of data.
78 print " $name \"", join("\"\n \"", @_), "\"\n";
81 # Print a problem (args like print()), preceeded by entry unless
82 # we have already printed that: label, and msgid and msgstr.
86 print "ENTRY:", ($ARGV eq "-" ? "" : " ($ARGV, line $entryline)"), "\n";
87 print_one("msgid", @amsgid);
88 print_one("msgstr", @amsgstr);
94 # Check final newline: probably, translations should end in a newline
95 # if and only if the original string does.
96 # (See also check_trailing_whitespace and check_num_newlines below.)
98 sub check_trailing_newlines {
99 if ($opt_x) { return; }
103 $ichar = (length($msgid)>=2) ? substr($msgid, -2, 2) : "";
104 $schar = (length($msgstr)>=2) ? substr($msgstr, -2, 2) : "";
106 if ($ichar eq "\\n" && $schar ne "\\n") {
107 print_problem "Missing trailing newline\n";
109 if ($ichar ne "\\n" && $schar eq "\\n") {
110 print_problem "Extra trailing newline\n";
115 # Check leading whitespace. In general, any leading whitespace should
116 # be the same in msgstr and msgid -- but not always.
118 sub check_leading_whitespace {
119 unless ($opt_w) { return; }
123 if ($msgid =~ m/^(\s+)/) {
128 if ($msgstr =~ m/^(\s+)/) {
134 print_problem "Different leading whitespace\n";
138 # Check trailing whitespace. In general, any trailing whitespace should
139 # be the same in msgstr and msgid -- but not always.
141 sub check_trailing_whitespace {
142 unless ($opt_W) { return; }
146 if ($msgid =~ m/((?:\s|\\n)+)$/) {
151 if ($msgstr =~ m/((?:\s|\\n)+)$/) {
157 print_problem "Different trailing whitespace\n";
161 # Check equal numbers of newlines. In general ... etc.
163 sub check_num_newlines {
164 unless ($opt_n) { return; }
166 my $num_i = ($msgid =~ m(\\n)g);
167 my $num_s = ($msgstr =~ m(\\n)g);
169 if ($num_i != $num_s) {
170 print_problem "Mismatch in newline count\n";
175 # Check capitalization of first non-whitespace character (for [a-zA-Z]
176 # only). In general ... etc.
178 sub check_leading_capitalization {
179 unless ($opt_c) { return; }
183 if ($msgid =~ m/^\s*([a-zA-Z])/) {
186 if ($msgstr =~ m/^\s*([a-zA-Z])/) {
189 if (defined($id) && defined($str)) {
190 if (($id =~ /^[a-z]$/ && $str =~ /^[A-Z]$/) ||
191 ($id =~ /^[A-Z]$/ && $str =~ /^[a-z]$/)) {
192 print_problem "Different leading capitalization\n";
197 # Check trailing 'punctuation' characters (ignoring trailing whitespace).
200 sub check_trailing_punctuation {
201 unless ($opt_p) { return; }
205 # Might want more characters:
206 if ($msgid =~ m/([\\\.\/\,\!\?\"\'\:\;])+(?:\s|\\n)*$/) {
211 if ($msgstr =~ m/([\\\.\/\,\!\?\"\'\:\;])+(?:\s|\\n)*$/) {
216 ##print "$id $str\n";
218 print_problem "Different trailing punctuation\n";
222 # Check that multiline strings have whitespace separation, since
224 # msgstr "this is a multiline"
227 # "this is a multilinestring"
229 sub check_whitespace_joins {
230 if ($opt_x) { return; }
235 foreach my $aref (\@amsgid, \@amsgstr) {
238 foreach my $line (@$aref) {
252 print_problem("Possible non-whitespace line-join problem in ",
253 ($i==0 ? "msgid" : "msgstr"), " \n");
259 # Check printf-style format entries.
260 # Non-trivial, because translation strings may use format specifiers
261 # out of order, or skip some specifiers etc. Also gettext marks
262 # anything with '%' as cformat, though not all are.
265 unless ($is_cformat) { return; }
266 if ($opt_x) { return; }
269 @iform = ($msgid =~ m/\%[0-9\.\$]*[a-z]/g);
270 @sform = ($msgstr =~ m/\%[0-9\.\$]*[a-z]/g);
272 ##print join("::", @iform), "\n";
273 ##print join("::", @sform), "\n";
275 my $js; # index in sform
276 my $j; # index into iform
278 for ($js=0; $js < @sform; $js++) {
279 my $sf = $sform[$js];
281 if ($sf =~ s/^\%([0-9]+)\$(.*[a-z])$/\%$2/) {
287 print_problem("Format number mismatch for $sf_orig [msgstr:",
293 print_problem("Format mismatch: $sf_orig [msgstr:", ($js+1), "]",
294 " vs $if [msgid:", ($j+1), "]\n");
299 # Run all individual checks on current entry, reporting any problems.
304 $msgid = join("", @amsgid);
305 $msgstr = join("", @amsgstr);
308 if (length($msgid)==0) {
309 print_problem "Zero length msgid\n";
312 if (length($msgstr)==0) {
313 unless ($opt_e) { return; }
314 print_problem "Untranslated msgid\n";
317 check_whitespace_joins;
319 check_leading_whitespace;
320 check_trailing_newlines;
321 check_trailing_whitespace;
322 check_leading_capitalization;
323 check_trailing_punctuation;
331 if ($state==S_DOING_MSGSTR) {
337 if ( m(^\#, fuzzy) ) {
340 if ( m(^\#, .*c-format) ) {
341 # .* is because can have fuzzy, c-format
347 if ( m(^msgid \"(.*)\"$) ) {
350 $state = S_DOING_MSGID;
353 if ( m(^msgid_plural \"(.*)\"$) ) {
356 $state = S_DOING_MSGID;
359 if ( m(^msgstr \"(.*)\"$) ) {
361 $state = S_DOING_MSGSTR;
364 if ( m(^msgstr\[[0-2]\] \"(.*)\"$) ) {
366 $state = S_DOING_MSGSTR;
369 if ( m(^\"(.*)\"$) ) {
370 if ($state==S_DOING_MSGID) {
372 } elsif($state==S_DOING_MSGSTR) {
375 die "Looking at string $_ in bad state $state,";
379 die "Unexpected at $.: ", $_;