6238
|
1 #!/usr/bin/perl -w
|
|
2 #
|
|
3 # check_po.pl - check po file translations for likely errors
|
|
4 #
|
|
5 # Written by David W. Pfitzner dwp@mso.anu.edu.au
|
|
6 # This script is hereby placed in the Public Domain.
|
|
7 #
|
|
8 # Various checks on po file translations:
|
|
9 # - printf-style format strings;
|
|
10 # - differences in trailing newlines;
|
|
11 # - empty (non-fuzzy) msgid;
|
|
12 # - likely whitespace errors on joining multi-line entries
|
|
13 # Ignores all fuzzy entries.
|
|
14 #
|
|
15 # Options:
|
|
16 # -x Don't do standard checks above (eg, just check one of below).
|
|
17 # -n Check newlines within strings; ie, that have equal numbers
|
|
18 # of newlines in msgstr and msgid. (Optional because this may
|
|
19 # happen legitimately.)
|
|
20 # -w Check leading whitespace. Sometimes whitespace is simply
|
|
21 # spacing (eg, for widget labels etc), or punctuation differences,
|
|
22 # so this may be ok.
|
|
23 # -W Check trailing whitespace. See -w above.
|
|
24 # -p Check trailing punctuation.
|
|
25 # -c Check capitalization of first non-whitespace character
|
|
26 # (only if [a-zA-Z]).
|
|
27 # -e Check on empty (c.q. new) msgstr
|
|
28 #
|
|
29 # Reads stdin (or filename args, via <>), writes any problems to stdout.
|
|
30 #
|
|
31 # Modified by Davide Pagnin nightmare@freeciv.it to support plural forms
|
|
32 #
|
|
33 # Version: 0.41 (2002-06-06)
|
|
34
|
|
35 use strict;
|
|
36 use vars qw($opt_c $opt_n $opt_p $opt_w $opt_W $opt_x $opt_e);
|
|
37 use Getopt::Std;
|
|
38
|
|
39 getopts('cnpwWxe');
|
|
40
|
|
41 # Globals, for current po entry:
|
|
42 #
|
|
43 # Note that msgid and msgstr have newlines represented by the
|
|
44 # two characters '\' and 'n' (and similarly for other escapes).
|
|
45
|
|
46 my @amsgid; # lines exactly as in input
|
|
47 my @amsgstr;
|
|
48 my $entryline; # lineno where entry starts
|
|
49 my $msgid; # lines joined by ""
|
|
50 my $msgstr;
|
|
51 my $is_fuzzy;
|
|
52 my $is_cformat;
|
|
53 my $state; # From constant values below.
|
|
54 my $did_print; # Whether we have printed this entry, to
|
|
55 # print only once for multiple problems.
|
|
56
|
|
57 use constant S_LOOKING_START => 0; # looking for start of entry
|
|
58 use constant S_DOING_MSGID => 1; # doing msgid part
|
|
59 use constant S_DOING_MSGSTR => 2; # doing msgstr part
|
|
60
|
|
61 # Initialize or reinitalize globals to prepare for new entry:
|
|
62 sub new_entry {
|
|
63 @amsgid = ();
|
|
64 @amsgstr = ();
|
|
65 $msgid = undef;
|
|
66 $msgstr = undef;
|
|
67 $entryline = 0;
|
|
68 $is_fuzzy = 0;
|
|
69 $is_cformat = 0;
|
|
70 $did_print = 0;
|
|
71 $state = S_LOOKING_START;
|
|
72 }
|
|
73
|
|
74 # Nicely print either a "msgid" or "msgstr" (name is one of these)
|
|
75 # with given array of data.
|
|
76 sub print_one {
|
|
77 my $name = shift;
|
|
78 print " $name \"", join("\"\n \"", @_), "\"\n";
|
|
79 }
|
|
80
|
|
81 # Print a problem (args like print()), preceeded by entry unless
|
|
82 # we have already printed that: label, and msgid and msgstr.
|
|
83 #
|
|
84 sub print_problem {
|
|
85 unless ($did_print) {
|
|
86 print "ENTRY:", ($ARGV eq "-" ? "" : " ($ARGV, line $entryline)"), "\n";
|
|
87 print_one("msgid", @amsgid);
|
|
88 print_one("msgstr", @amsgstr);
|
|
89 $did_print = 1;
|
|
90 }
|
|
91 print "*** ", @_;
|
|
92 }
|
|
93
|
|
94 # Check final newline: probably, translations should end in a newline
|
|
95 # if and only if the original string does.
|
|
96 # (See also check_trailing_whitespace and check_num_newlines below.)
|
|
97 #
|
|
98 sub check_trailing_newlines {
|
|
99 if ($opt_x) { return; }
|
|
100
|
|
101 my ($ichar, $schar);
|
|
102
|
|
103 $ichar = (length($msgid)>=2) ? substr($msgid, -2, 2) : "";
|
|
104 $schar = (length($msgstr)>=2) ? substr($msgstr, -2, 2) : "";
|
|
105
|
|
106 if ($ichar eq "\\n" && $schar ne "\\n") {
|
|
107 print_problem "Missing trailing newline\n";
|
|
108 }
|
|
109 if ($ichar ne "\\n" && $schar eq "\\n") {
|
|
110 print_problem "Extra trailing newline\n";
|
|
111 }
|
|
112
|
|
113 }
|
|
114
|
|
115 # Check leading whitespace. In general, any leading whitespace should
|
|
116 # be the same in msgstr and msgid -- but not always.
|
|
117 #
|
|
118 sub check_leading_whitespace {
|
|
119 unless ($opt_w) { return; }
|
|
120
|
|
121 my ($id, $str);
|
|
122
|
|
123 if ($msgid =~ m/^(\s+)/) {
|
|
124 $id = $1;
|
|
125 } else {
|
|
126 $id = "";
|
|
127 }
|
|
128 if ($msgstr =~ m/^(\s+)/) {
|
|
129 $str = $1;
|
|
130 } else {
|
|
131 $str = "";
|
|
132 }
|
|
133 if ($id ne $str) {
|
|
134 print_problem "Different leading whitespace\n";
|
|
135 }
|
|
136 }
|
|
137
|
|
138 # Check trailing whitespace. In general, any trailing whitespace should
|
|
139 # be the same in msgstr and msgid -- but not always.
|
|
140 #
|
|
141 sub check_trailing_whitespace {
|
|
142 unless ($opt_W) { return; }
|
|
143
|
|
144 my ($id, $str);
|
|
145
|
|
146 if ($msgid =~ m/((?:\s|\\n)+)$/) {
|
|
147 $id = $1;
|
|
148 } else {
|
|
149 $id = "";
|
|
150 }
|
|
151 if ($msgstr =~ m/((?:\s|\\n)+)$/) {
|
|
152 $str = $1;
|
|
153 } else {
|
|
154 $str = "";
|
|
155 }
|
|
156 if ($id ne $str) {
|
|
157 print_problem "Different trailing whitespace\n";
|
|
158 }
|
|
159 }
|
|
160
|
|
161 # Check equal numbers of newlines. In general ... etc.
|
|
162 #
|
|
163 sub check_num_newlines {
|
|
164 unless ($opt_n) { return; }
|
|
165
|
|
166 my $num_i = ($msgid =~ m(\\n)g);
|
|
167 my $num_s = ($msgstr =~ m(\\n)g);
|
|
168
|
|
169 if ($num_i != $num_s) {
|
|
170 print_problem "Mismatch in newline count\n";
|
|
171 }
|
|
172
|
|
173 }
|
|
174
|
|
175 # Check capitalization of first non-whitespace character (for [a-zA-Z]
|
|
176 # only). In general ... etc.
|
|
177 #
|
|
178 sub check_leading_capitalization {
|
|
179 unless ($opt_c) { return; }
|
|
180
|
|
181 my ($id, $str);
|
|
182
|
|
183 if ($msgid =~ m/^\s*([a-zA-Z])/) {
|
|
184 $id = $1;
|
|
185 }
|
|
186 if ($msgstr =~ m/^\s*([a-zA-Z])/) {
|
|
187 $str = $1;
|
|
188 }
|
|
189 if (defined($id) && defined($str)) {
|
|
190 if (($id =~ /^[a-z]$/ && $str =~ /^[A-Z]$/) ||
|
|
191 ($id =~ /^[A-Z]$/ && $str =~ /^[a-z]$/)) {
|
|
192 print_problem "Different leading capitalization\n";
|
|
193 }
|
|
194 }
|
|
195 }
|
|
196
|
|
197 # Check trailing 'punctuation' characters (ignoring trailing whitespace).
|
|
198 # In general .. etc.
|
|
199 #
|
|
200 sub check_trailing_punctuation {
|
|
201 unless ($opt_p) { return; }
|
|
202
|
|
203 my ($id, $str);
|
|
204
|
|
205 # Might want more characters:
|
|
206 if ($msgid =~ m/([\\\.\/\,\!\?\"\'\:\;])+(?:\s|\\n)*$/) {
|
|
207 $id = $1;
|
|
208 } else {
|
|
209 $id = "";
|
|
210 }
|
|
211 if ($msgstr =~ m/([\\\.\/\,\!\?\"\'\:\;])+(?:\s|\\n)*$/) {
|
|
212 $str = $1;
|
|
213 } else {
|
|
214 $str = "";
|
|
215 }
|
|
216 ##print "$id $str\n";
|
|
217 if ($id ne $str) {
|
|
218 print_problem "Different trailing punctuation\n";
|
|
219 }
|
|
220 }
|
|
221
|
|
222 # Check that multiline strings have whitespace separation, since
|
|
223 # otherwise, eg:
|
|
224 # msgstr "this is a multiline"
|
|
225 # "string"
|
|
226 # expands to:
|
|
227 # "this is a multilinestring"
|
|
228 #
|
|
229 sub check_whitespace_joins {
|
|
230 if ($opt_x) { return; }
|
|
231
|
|
232 my $ok = 1;
|
|
233 my $i = 0;
|
|
234
|
|
235 foreach my $aref (\@amsgid, \@amsgstr) {
|
|
236 my $prev = undef;
|
|
237 LINE:
|
|
238 foreach my $line (@$aref) {
|
|
239 if (defined($prev)
|
|
240 && length($prev)
|
|
241 && $prev !~ /\s$/
|
|
242 && $prev !~ /\\n$/
|
|
243 && $line !~ /^\s/
|
|
244 && $line !~ /^\\n/)
|
|
245 {
|
|
246 $ok = 0;
|
|
247 last LINE;
|
|
248 }
|
|
249 $prev = $line;
|
|
250 }
|
|
251 if (!$ok) {
|
|
252 print_problem("Possible non-whitespace line-join problem in ",
|
|
253 ($i==0 ? "msgid" : "msgstr"), " \n");
|
|
254 }
|
|
255 $i++;
|
|
256 }
|
|
257 }
|
|
258
|
|
259 # Check printf-style format entries.
|
|
260 # Non-trivial, because translation strings may use format specifiers
|
|
261 # out of order, or skip some specifiers etc. Also gettext marks
|
|
262 # anything with '%' as cformat, though not all are.
|
|
263 #
|
|
264 sub check_cformat {
|
|
265 unless ($is_cformat) { return; }
|
|
266 if ($opt_x) { return; }
|
|
267
|
|
268 my (@iform, @sform);
|
|
269 @iform = ($msgid =~ m/\%[0-9\.\$]*[a-z]/g);
|
|
270 @sform = ($msgstr =~ m/\%[0-9\.\$]*[a-z]/g);
|
|
271
|
|
272 ##print join("::", @iform), "\n";
|
|
273 ##print join("::", @sform), "\n";
|
|
274
|
|
275 my $js; # index in sform
|
|
276 my $j; # index into iform
|
|
277 SFORM:
|
|
278 for ($js=0; $js < @sform; $js++) {
|
|
279 my $sf = $sform[$js];
|
|
280 my $sf_orig = $sf;
|
|
281 if ($sf =~ s/^\%([0-9]+)\$(.*[a-z])$/\%$2/) {
|
|
282 $j = $1-1;
|
|
283 } else {
|
|
284 $j = $js;
|
|
285 }
|
|
286 if ($j > $#iform) {
|
|
287 print_problem("Format number mismatch for $sf_orig [msgstr:",
|
|
288 ($js+1), "]\n");
|
|
289 next SFORM;
|
|
290 }
|
|
291 my $if = $iform[$j];
|
|
292 if ($sf ne $if) {
|
|
293 print_problem("Format mismatch: $sf_orig [msgstr:", ($js+1), "]",
|
|
294 " vs $if [msgid:", ($j+1), "]\n");
|
|
295 }
|
|
296 }
|
|
297 }
|
|
298
|
|
299 # Run all individual checks on current entry, reporting any problems.
|
|
300 sub check_entry {
|
|
301 if ($is_fuzzy) {
|
|
302 return;
|
|
303 }
|
|
304 $msgid = join("", @amsgid);
|
|
305 $msgstr = join("", @amsgstr);
|
|
306
|
|
307 unless ($opt_x) {
|
|
308 if (length($msgid)==0) {
|
|
309 print_problem "Zero length msgid\n";
|
|
310 }
|
|
311 }
|
|
312 if (length($msgstr)==0) {
|
|
313 unless ($opt_e) { return; }
|
|
314 print_problem "Untranslated msgid\n";
|
|
315 }
|
|
316 check_cformat;
|
|
317 check_whitespace_joins;
|
|
318 check_num_newlines;
|
|
319 check_leading_whitespace;
|
|
320 check_trailing_newlines;
|
|
321 check_trailing_whitespace;
|
|
322 check_leading_capitalization;
|
|
323 check_trailing_punctuation;
|
|
324 }
|
|
325
|
|
326 new_entry;
|
|
327
|
|
328 LINE:
|
|
329 while(<>) {
|
|
330 if ( m(^\s*$) ) {
|
|
331 if ($state==S_DOING_MSGSTR) {
|
|
332 check_entry;
|
|
333 new_entry;
|
|
334 }
|
|
335 next LINE;
|
|
336 }
|
|
337 if ( m(^\#, fuzzy) ) {
|
|
338 $is_fuzzy = 1;
|
|
339 }
|
|
340 if ( m(^\#, .*c-format) ) {
|
|
341 # .* is because can have fuzzy, c-format
|
|
342 $is_cformat = 1;
|
|
343 }
|
|
344 if ( m(^\#) ) {
|
|
345 next LINE;
|
|
346 }
|
|
347 if ( m(^msgid \"(.*)\"$) ) {
|
|
348 $entryline = $.;
|
|
349 @amsgid = ($1);
|
|
350 $state = S_DOING_MSGID;
|
|
351 next LINE;
|
|
352 }
|
|
353 if ( m(^msgid_plural \"(.*)\"$) ) {
|
|
354 $entryline = $.;
|
|
355 @amsgid = ($1);
|
|
356 $state = S_DOING_MSGID;
|
|
357 next LINE;
|
|
358 }
|
|
359 if ( m(^msgstr \"(.*)\"$) ) {
|
|
360 @amsgstr = ($1);
|
|
361 $state = S_DOING_MSGSTR;
|
|
362 next LINE;
|
|
363 }
|
|
364 if ( m(^msgstr\[[0-2]\] \"(.*)\"$) ) {
|
|
365 @amsgstr = ($1);
|
|
366 $state = S_DOING_MSGSTR;
|
|
367 next LINE;
|
|
368 }
|
|
369 if ( m(^\"(.*)\"$) ) {
|
|
370 if ($state==S_DOING_MSGID) {
|
|
371 push @amsgid, $1;
|
|
372 } elsif($state==S_DOING_MSGSTR) {
|
|
373 push @amsgstr, $1;
|
|
374 } else {
|
|
375 die "Looking at string $_ in bad state $state,";
|
|
376 }
|
|
377 next LINE;
|
|
378 }
|
|
379 die "Unexpected at $.: ", $_;
|
|
380 }
|