3 # check_po.pl - check po file translations for likely errors
5 # Written by David W. Pfitzner dwp@mso.anu.edu.au
6 # This script is hereby placed in the Public Domain.
8 # Various checks on po file translations:
9 # - printf-style format strings;
10 # - differences in trailing newlines;
11 # - empty (non-fuzzy) msgid;
12 # - likely whitespace errors on joining multi-line entries
13 # Ignores all fuzzy entries.
16 # -x Don't do standard checks above (eg, just check one of below).
17 # -n Check newlines within strings; ie, that have equal numbers
18 # of newlines in msgstr and msgid. (Optional because this may
19 # happen legitimately.)
20 # -w Check leading whitespace. Sometimes whitespace is simply
21 # spacing (eg, for widget labels etc), or punctuation differences,
23 # -W Check trailing whitespace. See -w above.
24 # -p Check trailing punctuation.
25 # -c Check capitalization of first non-whitespace character
27 # -e Check on empty (c.q. new) msgstr
29 # Reads stdin (or filename args, via <>), writes any problems to stdout.
31 # Modified by Davide Pagnin nightmare@freeciv.it to support plural forms
33 # Version: 0.41 (2002-06-06)
35 # TODO: This script needs to be able to handle Farsi's %Id flag for
36 # number format specifiers. More information on how it works, see
37 # http://www.gnu.org/software/hello/manual/gettext/c_002dformat.html
38 # It's possible someone has already made this change... look around
39 # for an updated version of this script.
42 use vars
qw($opt_c $opt_n $opt_p $opt_w $opt_W $opt_x $opt_e);
47 # Globals, for current po entry:
49 # Note that msgid and msgstr have newlines represented by the
50 # two characters '\' and 'n' (and similarly for other escapes).
52 my @amsgid; # lines exactly as in input
54 my $entryline; # lineno where entry starts
55 my $msgid; # lines joined by ""
59 my $state; # From constant values below.
60 my $did_print; # Whether we have printed this entry, to
61 # print only once for multiple problems.
63 use constant S_LOOKING_START => 0; # looking for start of entry
64 use constant S_DOING_MSGID => 1; # doing msgid part
65 use constant S_DOING_MSGSTR => 2; # doing msgstr part
67 # Initialize or reinitalize globals to prepare for new entry:
77 $state = S_LOOKING_START;
80 # Nicely print either a "msgid" or "msgstr" (name is one of these)
81 # with given array of data.
84 print " $name \"", join("\"\n \"", @_), "\"\n";
87 # Print a problem (args like print()), preceeded by entry unless
88 # we have already printed that: label, and msgid and msgstr.
92 print "ENTRY:", ($ARGV eq "-" ? "" : " ($ARGV, line $entryline)"), "\n";
93 print_one("msgid", @amsgid);
94 print_one("msgstr", @amsgstr);
100 # Check final newline: probably, translations should end in a newline
101 # if and only if the original string does.
102 # (See also check_trailing_whitespace and check_num_newlines below.)
104 sub check_trailing_newlines {
105 if ($opt_x) { return; }
109 $ichar = (length($msgid)>=2) ? substr($msgid, -2, 2) : "";
110 $schar = (length($msgstr)>=2) ? substr($msgstr, -2, 2) : "";
112 if ($ichar eq "\\n" && $schar ne "\\n") {
113 print_problem "Missing trailing newline\n";
115 if ($ichar ne "\\n" && $schar eq "\\n") {
116 print_problem "Extra trailing newline\n";
121 # Check leading whitespace. In general, any leading whitespace should
122 # be the same in msgstr and msgid -- but not always.
124 sub check_leading_whitespace {
125 unless ($opt_w) { return; }
129 if ($msgid =~ m/^(\s+)/) {
134 if ($msgstr =~ m/^(\s+)/) {
140 print_problem "Different leading whitespace\n";
144 # Check trailing whitespace. In general, any trailing whitespace should
145 # be the same in msgstr and msgid -- but not always.
147 sub check_trailing_whitespace {
148 unless ($opt_W) { return; }
152 if ($msgid =~ m/((?:\s|\\n)+)$/) {
157 if ($msgstr =~ m/((?:\s|\\n)+)$/) {
163 print_problem "Different trailing whitespace\n";
167 # Check equal numbers of newlines. In general ... etc.
169 sub check_num_newlines {
170 unless ($opt_n) { return; }
172 my $num_i = ($msgid =~ m(\\n)g);
173 my $num_s = ($msgstr =~ m(\\n)g);
175 if ($num_i != $num_s) {
176 print_problem "Mismatch in newline count\n";
181 # Check capitalization of first non-whitespace character (for [a-zA-Z]
182 # only). In general ... etc.
184 sub check_leading_capitalization {
185 unless ($opt_c) { return; }
189 if ($msgid =~ m/^\s*([a-zA-Z])/) {
192 if ($msgstr =~ m/^\s*([a-zA-Z])/) {
195 if (defined($id) && defined($str)) {
196 if (($id =~ /^[a-z]$/ && $str =~ /^[A-Z]$/) ||
197 ($id =~ /^[A-Z]$/ && $str =~ /^[a-z]$/)) {
198 print_problem "Different leading capitalization\n";
203 # Check trailing 'punctuation' characters (ignoring trailing whitespace).
206 sub check_trailing_punctuation {
207 unless ($opt_p) { return; }
211 # Might want more characters:
212 if ($msgid =~ m/([\\\.\/\,\!\?\"\'\:\;])+(?:\s|\\n)*$/) {
217 if ($msgstr =~ m/([\\\.\/\,\!\?\"\'\:\;])+(?:\s|\\n)*$/) {
222 ##print "$id $str\n";
224 print_problem "Different trailing punctuation\n";
228 # Check that multiline strings have whitespace separation, since
230 # msgstr "this is a multiline"
233 # "this is a multilinestring"
235 sub check_whitespace_joins {
236 if ($opt_x) { return; }
241 foreach my $aref (\@amsgid, \@amsgstr) {
244 foreach my $line (@$aref) {
258 print_problem("Possible non-whitespace line-join problem in ",
259 ($i==0 ? "msgid" : "msgstr"), " \n");
265 # Check printf-style format entries.
266 # Non-trivial, because translation strings may use format specifiers
267 # out of order, or skip some specifiers etc. Also gettext marks
268 # anything with '%' as cformat, though not all are.
271 unless ($is_cformat) { return; }
272 if ($opt_x) { return; }
275 @iform = ($msgid =~ m/\%[0-9\.\$]*[a-z]/g);
276 @sform = ($msgstr =~ m/\%[0-9\.\$]*[a-z]/g);
278 ##print join("::", @iform), "\n";
279 ##print join("::", @sform), "\n";
281 my $js; # index in sform
282 my $j; # index into iform
284 for ($js=0; $js < @sform; $js++) {
285 my $sf = $sform[$js];
287 if ($sf =~ s/^\%([0-9]+)\$(.*[a-z])$/\%$2/) {
293 print_problem("Format number mismatch for $sf_orig [msgstr:",
299 print_problem("Format mismatch: $sf_orig [msgstr:", ($js+1), "]",
300 " vs $if [msgid:", ($j+1), "]\n");
305 # Run all individual checks on current entry, reporting any problems.
310 $msgid = join("", @amsgid);
311 $msgstr = join("", @amsgstr);
314 if (length($msgid)==0) {
315 print_problem "Zero length msgid\n";
318 if (length($msgstr)==0) {
319 unless ($opt_e) { return; }
320 print_problem "Untranslated msgid\n";
323 check_whitespace_joins;
325 check_leading_whitespace;
326 check_trailing_newlines;
327 check_trailing_whitespace;
328 check_leading_capitalization;
329 check_trailing_punctuation;
337 if ($state==S_DOING_MSGSTR) {
343 if ( m(^\#, fuzzy) ) {
346 if ( m(^\#, .*c-format) ) {
347 # .* is because can have fuzzy, c-format
353 if ( m(^msgid \"(.*)\"$) ) {
356 $state = S_DOING_MSGID;
359 if ( m(^msgid_plural \"(.*)\"$) ) {
362 $state = S_DOING_MSGID;
365 if ( m(^msgstr \"(.*)\"$) ) {
367 $state = S_DOING_MSGSTR;
370 if ( m(^msgstr\[[0-5]\] \"(.*)\"$) ) {
372 $state = S_DOING_MSGSTR;
375 if ( m(^\"(.*)\"$) ) {
376 if ($state==S_DOING_MSGID) {
378 } elsif($state==S_DOING_MSGSTR) {
381 die "Looking at string $_ in bad state $state,";
385 die "Unexpected at $.: ", $_;